btrfs-progs: check: pass a template to add_extent_rec_nolookup
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 struct data_backref {
88         struct extent_backref node;
89         union {
90                 u64 parent;
91                 u64 root;
92         };
93         u64 owner;
94         u64 offset;
95         u64 disk_bytenr;
96         u64 bytes;
97         u64 ram_bytes;
98         u32 num_refs;
99         u32 found_ref;
100 };
101
102 /*
103  * Much like data_backref, just removed the undetermined members
104  * and change it to use list_head.
105  * During extent scan, it is stored in root->orphan_data_extent.
106  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
107  */
108 struct orphan_data_extent {
109         struct list_head list;
110         u64 root;
111         u64 objectid;
112         u64 offset;
113         u64 disk_bytenr;
114         u64 disk_len;
115 };
116
117 struct tree_backref {
118         struct extent_backref node;
119         union {
120                 u64 parent;
121                 u64 root;
122         };
123 };
124
125 struct extent_record {
126         struct list_head backrefs;
127         struct list_head dups;
128         struct list_head list;
129         struct cache_extent cache;
130         struct btrfs_disk_key parent_key;
131         u64 start;
132         u64 max_size;
133         u64 nr;
134         u64 refs;
135         u64 extent_item_refs;
136         u64 generation;
137         u64 parent_generation;
138         u64 info_objectid;
139         u32 num_duplicates;
140         u8 info_level;
141         int flag_block_full_backref;
142         unsigned int found_rec:1;
143         unsigned int content_checked:1;
144         unsigned int owner_ref_checked:1;
145         unsigned int is_root:1;
146         unsigned int metadata:1;
147         unsigned int bad_full_backref:1;
148         unsigned int crossing_stripes:1;
149         unsigned int wrong_chunk_type:1;
150 };
151
152 struct inode_backref {
153         struct list_head list;
154         unsigned int found_dir_item:1;
155         unsigned int found_dir_index:1;
156         unsigned int found_inode_ref:1;
157         unsigned int filetype:8;
158         int errors;
159         unsigned int ref_type;
160         u64 dir;
161         u64 index;
162         u16 namelen;
163         char name[0];
164 };
165
166 struct root_item_record {
167         struct list_head list;
168         u64 objectid;
169         u64 bytenr;
170         u64 last_snapshot;
171         u8 level;
172         u8 drop_level;
173         int level_size;
174         struct btrfs_key drop_key;
175 };
176
177 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
178 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
179 #define REF_ERR_NO_INODE_REF            (1 << 2)
180 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
181 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
182 #define REF_ERR_DUP_INODE_REF           (1 << 5)
183 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
184 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
185 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
186 #define REF_ERR_NO_ROOT_REF             (1 << 9)
187 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
188 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
189 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
190
191 struct file_extent_hole {
192         struct rb_node node;
193         u64 start;
194         u64 len;
195 };
196
197 struct inode_record {
198         struct list_head backrefs;
199         unsigned int checked:1;
200         unsigned int merging:1;
201         unsigned int found_inode_item:1;
202         unsigned int found_dir_item:1;
203         unsigned int found_file_extent:1;
204         unsigned int found_csum_item:1;
205         unsigned int some_csum_missing:1;
206         unsigned int nodatasum:1;
207         int errors;
208
209         u64 ino;
210         u32 nlink;
211         u32 imode;
212         u64 isize;
213         u64 nbytes;
214
215         u32 found_link;
216         u64 found_size;
217         u64 extent_start;
218         u64 extent_end;
219         struct rb_root holes;
220         struct list_head orphan_extents;
221
222         u32 refs;
223 };
224
225 #define I_ERR_NO_INODE_ITEM             (1 << 0)
226 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
227 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
228 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
229 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
230 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
231 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
232 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
233 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
234 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
235 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
236 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
237 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
238 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
239 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
240
241 struct root_backref {
242         struct list_head list;
243         unsigned int found_dir_item:1;
244         unsigned int found_dir_index:1;
245         unsigned int found_back_ref:1;
246         unsigned int found_forward_ref:1;
247         unsigned int reachable:1;
248         int errors;
249         u64 ref_root;
250         u64 dir;
251         u64 index;
252         u16 namelen;
253         char name[0];
254 };
255
256 struct root_record {
257         struct list_head backrefs;
258         struct cache_extent cache;
259         unsigned int found_root_item:1;
260         u64 objectid;
261         u32 found_ref;
262 };
263
264 struct ptr_node {
265         struct cache_extent cache;
266         void *data;
267 };
268
269 struct shared_node {
270         struct cache_extent cache;
271         struct cache_tree root_cache;
272         struct cache_tree inode_cache;
273         struct inode_record *current;
274         u32 refs;
275 };
276
277 struct block_info {
278         u64 start;
279         u32 size;
280 };
281
282 struct walk_control {
283         struct cache_tree shared;
284         struct shared_node *nodes[BTRFS_MAX_LEVEL];
285         int active_node;
286         int root_level;
287 };
288
289 struct bad_item {
290         struct btrfs_key key;
291         u64 root_id;
292         struct list_head list;
293 };
294
295 struct extent_entry {
296         u64 bytenr;
297         u64 bytes;
298         int count;
299         int broken;
300         struct list_head list;
301 };
302
303 struct root_item_info {
304         /* level of the root */
305         u8 level;
306         /* number of nodes at this level, must be 1 for a root */
307         int node_count;
308         u64 bytenr;
309         u64 gen;
310         struct cache_extent cache_extent;
311 };
312
313 static void *print_status_check(void *p)
314 {
315         struct task_ctx *priv = p;
316         const char work_indicator[] = { '.', 'o', 'O', 'o' };
317         uint32_t count = 0;
318         static char *task_position_string[] = {
319                 "checking extents",
320                 "checking free space cache",
321                 "checking fs roots",
322         };
323
324         task_period_start(priv->info, 1000 /* 1s */);
325
326         if (priv->tp == TASK_NOTHING)
327                 return NULL;
328
329         while (1) {
330                 printf("%s [%c]\r", task_position_string[priv->tp],
331                                 work_indicator[count % 4]);
332                 count++;
333                 fflush(stdout);
334                 task_period_wait(priv->info);
335         }
336         return NULL;
337 }
338
339 static int print_status_return(void *p)
340 {
341         printf("\n");
342         fflush(stdout);
343
344         return 0;
345 }
346
347 /* Compatible function to allow reuse of old codes */
348 static u64 first_extent_gap(struct rb_root *holes)
349 {
350         struct file_extent_hole *hole;
351
352         if (RB_EMPTY_ROOT(holes))
353                 return (u64)-1;
354
355         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
356         return hole->start;
357 }
358
359 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
360 {
361         struct file_extent_hole *hole1;
362         struct file_extent_hole *hole2;
363
364         hole1 = rb_entry(node1, struct file_extent_hole, node);
365         hole2 = rb_entry(node2, struct file_extent_hole, node);
366
367         if (hole1->start > hole2->start)
368                 return -1;
369         if (hole1->start < hole2->start)
370                 return 1;
371         /* Now hole1->start == hole2->start */
372         if (hole1->len >= hole2->len)
373                 /*
374                  * Hole 1 will be merge center
375                  * Same hole will be merged later
376                  */
377                 return -1;
378         /* Hole 2 will be merge center */
379         return 1;
380 }
381
382 /*
383  * Add a hole to the record
384  *
385  * This will do hole merge for copy_file_extent_holes(),
386  * which will ensure there won't be continuous holes.
387  */
388 static int add_file_extent_hole(struct rb_root *holes,
389                                 u64 start, u64 len)
390 {
391         struct file_extent_hole *hole;
392         struct file_extent_hole *prev = NULL;
393         struct file_extent_hole *next = NULL;
394
395         hole = malloc(sizeof(*hole));
396         if (!hole)
397                 return -ENOMEM;
398         hole->start = start;
399         hole->len = len;
400         /* Since compare will not return 0, no -EEXIST will happen */
401         rb_insert(holes, &hole->node, compare_hole);
402
403         /* simple merge with previous hole */
404         if (rb_prev(&hole->node))
405                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
406                                 node);
407         if (prev && prev->start + prev->len >= hole->start) {
408                 hole->len = hole->start + hole->len - prev->start;
409                 hole->start = prev->start;
410                 rb_erase(&prev->node, holes);
411                 free(prev);
412                 prev = NULL;
413         }
414
415         /* iterate merge with next holes */
416         while (1) {
417                 if (!rb_next(&hole->node))
418                         break;
419                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
420                                         node);
421                 if (hole->start + hole->len >= next->start) {
422                         if (hole->start + hole->len <= next->start + next->len)
423                                 hole->len = next->start + next->len -
424                                             hole->start;
425                         rb_erase(&next->node, holes);
426                         free(next);
427                         next = NULL;
428                 } else
429                         break;
430         }
431         return 0;
432 }
433
434 static int compare_hole_range(struct rb_node *node, void *data)
435 {
436         struct file_extent_hole *hole;
437         u64 start;
438
439         hole = (struct file_extent_hole *)data;
440         start = hole->start;
441
442         hole = rb_entry(node, struct file_extent_hole, node);
443         if (start < hole->start)
444                 return -1;
445         if (start >= hole->start && start < hole->start + hole->len)
446                 return 0;
447         return 1;
448 }
449
450 /*
451  * Delete a hole in the record
452  *
453  * This will do the hole split and is much restrict than add.
454  */
455 static int del_file_extent_hole(struct rb_root *holes,
456                                 u64 start, u64 len)
457 {
458         struct file_extent_hole *hole;
459         struct file_extent_hole tmp;
460         u64 prev_start = 0;
461         u64 prev_len = 0;
462         u64 next_start = 0;
463         u64 next_len = 0;
464         struct rb_node *node;
465         int have_prev = 0;
466         int have_next = 0;
467         int ret = 0;
468
469         tmp.start = start;
470         tmp.len = len;
471         node = rb_search(holes, &tmp, compare_hole_range, NULL);
472         if (!node)
473                 return -EEXIST;
474         hole = rb_entry(node, struct file_extent_hole, node);
475         if (start + len > hole->start + hole->len)
476                 return -EEXIST;
477
478         /*
479          * Now there will be no overflap, delete the hole and re-add the
480          * split(s) if they exists.
481          */
482         if (start > hole->start) {
483                 prev_start = hole->start;
484                 prev_len = start - hole->start;
485                 have_prev = 1;
486         }
487         if (hole->start + hole->len > start + len) {
488                 next_start = start + len;
489                 next_len = hole->start + hole->len - start - len;
490                 have_next = 1;
491         }
492         rb_erase(node, holes);
493         free(hole);
494         if (have_prev) {
495                 ret = add_file_extent_hole(holes, prev_start, prev_len);
496                 if (ret < 0)
497                         return ret;
498         }
499         if (have_next) {
500                 ret = add_file_extent_hole(holes, next_start, next_len);
501                 if (ret < 0)
502                         return ret;
503         }
504         return 0;
505 }
506
507 static int copy_file_extent_holes(struct rb_root *dst,
508                                   struct rb_root *src)
509 {
510         struct file_extent_hole *hole;
511         struct rb_node *node;
512         int ret = 0;
513
514         node = rb_first(src);
515         while (node) {
516                 hole = rb_entry(node, struct file_extent_hole, node);
517                 ret = add_file_extent_hole(dst, hole->start, hole->len);
518                 if (ret)
519                         break;
520                 node = rb_next(node);
521         }
522         return ret;
523 }
524
525 static void free_file_extent_holes(struct rb_root *holes)
526 {
527         struct rb_node *node;
528         struct file_extent_hole *hole;
529
530         node = rb_first(holes);
531         while (node) {
532                 hole = rb_entry(node, struct file_extent_hole, node);
533                 rb_erase(node, holes);
534                 free(hole);
535                 node = rb_first(holes);
536         }
537 }
538
539 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
540
541 static void record_root_in_trans(struct btrfs_trans_handle *trans,
542                                  struct btrfs_root *root)
543 {
544         if (root->last_trans != trans->transid) {
545                 root->track_dirty = 1;
546                 root->last_trans = trans->transid;
547                 root->commit_root = root->node;
548                 extent_buffer_get(root->node);
549         }
550 }
551
552 static u8 imode_to_type(u32 imode)
553 {
554 #define S_SHIFT 12
555         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
556                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
557                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
558                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
559                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
560                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
561                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
562                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
563         };
564
565         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
566 #undef S_SHIFT
567 }
568
569 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
570 {
571         struct device_record *rec1;
572         struct device_record *rec2;
573
574         rec1 = rb_entry(node1, struct device_record, node);
575         rec2 = rb_entry(node2, struct device_record, node);
576         if (rec1->devid > rec2->devid)
577                 return -1;
578         else if (rec1->devid < rec2->devid)
579                 return 1;
580         else
581                 return 0;
582 }
583
584 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
585 {
586         struct inode_record *rec;
587         struct inode_backref *backref;
588         struct inode_backref *orig;
589         struct inode_backref *tmp;
590         struct orphan_data_extent *src_orphan;
591         struct orphan_data_extent *dst_orphan;
592         size_t size;
593         int ret;
594
595         rec = malloc(sizeof(*rec));
596         if (!rec)
597                 return ERR_PTR(-ENOMEM);
598         memcpy(rec, orig_rec, sizeof(*rec));
599         rec->refs = 1;
600         INIT_LIST_HEAD(&rec->backrefs);
601         INIT_LIST_HEAD(&rec->orphan_extents);
602         rec->holes = RB_ROOT;
603
604         list_for_each_entry(orig, &orig_rec->backrefs, list) {
605                 size = sizeof(*orig) + orig->namelen + 1;
606                 backref = malloc(size);
607                 if (!backref) {
608                         ret = -ENOMEM;
609                         goto cleanup;
610                 }
611                 memcpy(backref, orig, size);
612                 list_add_tail(&backref->list, &rec->backrefs);
613         }
614         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
615                 dst_orphan = malloc(sizeof(*dst_orphan));
616                 if (!dst_orphan) {
617                         ret = -ENOMEM;
618                         goto cleanup;
619                 }
620                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
621                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
622         }
623         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
624         BUG_ON(ret < 0);
625
626         return rec;
627
628 cleanup:
629         if (!list_empty(&rec->backrefs))
630                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
631                         list_del(&orig->list);
632                         free(orig);
633                 }
634
635         if (!list_empty(&rec->orphan_extents))
636                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
637                         list_del(&orig->list);
638                         free(orig);
639                 }
640
641         free(rec);
642
643         return ERR_PTR(ret);
644 }
645
646 static void print_orphan_data_extents(struct list_head *orphan_extents,
647                                       u64 objectid)
648 {
649         struct orphan_data_extent *orphan;
650
651         if (list_empty(orphan_extents))
652                 return;
653         printf("The following data extent is lost in tree %llu:\n",
654                objectid);
655         list_for_each_entry(orphan, orphan_extents, list) {
656                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
657                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
658                        orphan->disk_len);
659         }
660 }
661
662 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
663 {
664         u64 root_objectid = root->root_key.objectid;
665         int errors = rec->errors;
666
667         if (!errors)
668                 return;
669         /* reloc root errors, we print its corresponding fs root objectid*/
670         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
671                 root_objectid = root->root_key.offset;
672                 fprintf(stderr, "reloc");
673         }
674         fprintf(stderr, "root %llu inode %llu errors %x",
675                 (unsigned long long) root_objectid,
676                 (unsigned long long) rec->ino, rec->errors);
677
678         if (errors & I_ERR_NO_INODE_ITEM)
679                 fprintf(stderr, ", no inode item");
680         if (errors & I_ERR_NO_ORPHAN_ITEM)
681                 fprintf(stderr, ", no orphan item");
682         if (errors & I_ERR_DUP_INODE_ITEM)
683                 fprintf(stderr, ", dup inode item");
684         if (errors & I_ERR_DUP_DIR_INDEX)
685                 fprintf(stderr, ", dup dir index");
686         if (errors & I_ERR_ODD_DIR_ITEM)
687                 fprintf(stderr, ", odd dir item");
688         if (errors & I_ERR_ODD_FILE_EXTENT)
689                 fprintf(stderr, ", odd file extent");
690         if (errors & I_ERR_BAD_FILE_EXTENT)
691                 fprintf(stderr, ", bad file extent");
692         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
693                 fprintf(stderr, ", file extent overlap");
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
695                 fprintf(stderr, ", file extent discount");
696         if (errors & I_ERR_DIR_ISIZE_WRONG)
697                 fprintf(stderr, ", dir isize wrong");
698         if (errors & I_ERR_FILE_NBYTES_WRONG)
699                 fprintf(stderr, ", nbytes wrong");
700         if (errors & I_ERR_ODD_CSUM_ITEM)
701                 fprintf(stderr, ", odd csum item");
702         if (errors & I_ERR_SOME_CSUM_MISSING)
703                 fprintf(stderr, ", some csum missing");
704         if (errors & I_ERR_LINK_COUNT_WRONG)
705                 fprintf(stderr, ", link count wrong");
706         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
707                 fprintf(stderr, ", orphan file extent");
708         fprintf(stderr, "\n");
709         /* Print the orphan extents if needed */
710         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
711                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
712
713         /* Print the holes if needed */
714         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
715                 struct file_extent_hole *hole;
716                 struct rb_node *node;
717                 int found = 0;
718
719                 node = rb_first(&rec->holes);
720                 fprintf(stderr, "Found file extent holes:\n");
721                 while (node) {
722                         found = 1;
723                         hole = rb_entry(node, struct file_extent_hole, node);
724                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
725                                 hole->start, hole->len);
726                         node = rb_next(node);
727                 }
728                 if (!found)
729                         fprintf(stderr, "\tstart: 0, len: %llu\n",
730                                 round_up(rec->isize, root->sectorsize));
731         }
732 }
733
734 static void print_ref_error(int errors)
735 {
736         if (errors & REF_ERR_NO_DIR_ITEM)
737                 fprintf(stderr, ", no dir item");
738         if (errors & REF_ERR_NO_DIR_INDEX)
739                 fprintf(stderr, ", no dir index");
740         if (errors & REF_ERR_NO_INODE_REF)
741                 fprintf(stderr, ", no inode ref");
742         if (errors & REF_ERR_DUP_DIR_ITEM)
743                 fprintf(stderr, ", dup dir item");
744         if (errors & REF_ERR_DUP_DIR_INDEX)
745                 fprintf(stderr, ", dup dir index");
746         if (errors & REF_ERR_DUP_INODE_REF)
747                 fprintf(stderr, ", dup inode ref");
748         if (errors & REF_ERR_INDEX_UNMATCH)
749                 fprintf(stderr, ", index unmatch");
750         if (errors & REF_ERR_FILETYPE_UNMATCH)
751                 fprintf(stderr, ", filetype unmatch");
752         if (errors & REF_ERR_NAME_TOO_LONG)
753                 fprintf(stderr, ", name too long");
754         if (errors & REF_ERR_NO_ROOT_REF)
755                 fprintf(stderr, ", no root ref");
756         if (errors & REF_ERR_NO_ROOT_BACKREF)
757                 fprintf(stderr, ", no root backref");
758         if (errors & REF_ERR_DUP_ROOT_REF)
759                 fprintf(stderr, ", dup root ref");
760         if (errors & REF_ERR_DUP_ROOT_BACKREF)
761                 fprintf(stderr, ", dup root backref");
762         fprintf(stderr, "\n");
763 }
764
765 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
766                                           u64 ino, int mod)
767 {
768         struct ptr_node *node;
769         struct cache_extent *cache;
770         struct inode_record *rec = NULL;
771         int ret;
772
773         cache = lookup_cache_extent(inode_cache, ino, 1);
774         if (cache) {
775                 node = container_of(cache, struct ptr_node, cache);
776                 rec = node->data;
777                 if (mod && rec->refs > 1) {
778                         node->data = clone_inode_rec(rec);
779                         if (IS_ERR(node->data))
780                                 return node->data;
781                         rec->refs--;
782                         rec = node->data;
783                 }
784         } else if (mod) {
785                 rec = calloc(1, sizeof(*rec));
786                 if (!rec)
787                         return ERR_PTR(-ENOMEM);
788                 rec->ino = ino;
789                 rec->extent_start = (u64)-1;
790                 rec->refs = 1;
791                 INIT_LIST_HEAD(&rec->backrefs);
792                 INIT_LIST_HEAD(&rec->orphan_extents);
793                 rec->holes = RB_ROOT;
794
795                 node = malloc(sizeof(*node));
796                 if (!node) {
797                         free(rec);
798                         return ERR_PTR(-ENOMEM);
799                 }
800                 node->cache.start = ino;
801                 node->cache.size = 1;
802                 node->data = rec;
803
804                 if (ino == BTRFS_FREE_INO_OBJECTID)
805                         rec->found_link = 1;
806
807                 ret = insert_cache_extent(inode_cache, &node->cache);
808                 if (ret)
809                         return ERR_PTR(-EEXIST);
810         }
811         return rec;
812 }
813
814 static void free_orphan_data_extents(struct list_head *orphan_extents)
815 {
816         struct orphan_data_extent *orphan;
817
818         while (!list_empty(orphan_extents)) {
819                 orphan = list_entry(orphan_extents->next,
820                                     struct orphan_data_extent, list);
821                 list_del(&orphan->list);
822                 free(orphan);
823         }
824 }
825
826 static void free_inode_rec(struct inode_record *rec)
827 {
828         struct inode_backref *backref;
829
830         if (--rec->refs > 0)
831                 return;
832
833         while (!list_empty(&rec->backrefs)) {
834                 backref = list_entry(rec->backrefs.next,
835                                      struct inode_backref, list);
836                 list_del(&backref->list);
837                 free(backref);
838         }
839         free_orphan_data_extents(&rec->orphan_extents);
840         free_file_extent_holes(&rec->holes);
841         free(rec);
842 }
843
844 static int can_free_inode_rec(struct inode_record *rec)
845 {
846         if (!rec->errors && rec->checked && rec->found_inode_item &&
847             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
848                 return 1;
849         return 0;
850 }
851
852 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
853                                  struct inode_record *rec)
854 {
855         struct cache_extent *cache;
856         struct inode_backref *tmp, *backref;
857         struct ptr_node *node;
858         unsigned char filetype;
859
860         if (!rec->found_inode_item)
861                 return;
862
863         filetype = imode_to_type(rec->imode);
864         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
865                 if (backref->found_dir_item && backref->found_dir_index) {
866                         if (backref->filetype != filetype)
867                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
868                         if (!backref->errors && backref->found_inode_ref &&
869                             rec->nlink == rec->found_link) {
870                                 list_del(&backref->list);
871                                 free(backref);
872                         }
873                 }
874         }
875
876         if (!rec->checked || rec->merging)
877                 return;
878
879         if (S_ISDIR(rec->imode)) {
880                 if (rec->found_size != rec->isize)
881                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
882                 if (rec->found_file_extent)
883                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
884         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
885                 if (rec->found_dir_item)
886                         rec->errors |= I_ERR_ODD_DIR_ITEM;
887                 if (rec->found_size != rec->nbytes)
888                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
889                 if (rec->nlink > 0 && !no_holes &&
890                     (rec->extent_end < rec->isize ||
891                      first_extent_gap(&rec->holes) < rec->isize))
892                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
893         }
894
895         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
896                 if (rec->found_csum_item && rec->nodatasum)
897                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
898                 if (rec->some_csum_missing && !rec->nodatasum)
899                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
900         }
901
902         BUG_ON(rec->refs != 1);
903         if (can_free_inode_rec(rec)) {
904                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
905                 node = container_of(cache, struct ptr_node, cache);
906                 BUG_ON(node->data != rec);
907                 remove_cache_extent(inode_cache, &node->cache);
908                 free(node);
909                 free_inode_rec(rec);
910         }
911 }
912
913 static int check_orphan_item(struct btrfs_root *root, u64 ino)
914 {
915         struct btrfs_path path;
916         struct btrfs_key key;
917         int ret;
918
919         key.objectid = BTRFS_ORPHAN_OBJECTID;
920         key.type = BTRFS_ORPHAN_ITEM_KEY;
921         key.offset = ino;
922
923         btrfs_init_path(&path);
924         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
925         btrfs_release_path(&path);
926         if (ret > 0)
927                 ret = -ENOENT;
928         return ret;
929 }
930
931 static int process_inode_item(struct extent_buffer *eb,
932                               int slot, struct btrfs_key *key,
933                               struct shared_node *active_node)
934 {
935         struct inode_record *rec;
936         struct btrfs_inode_item *item;
937
938         rec = active_node->current;
939         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
940         if (rec->found_inode_item) {
941                 rec->errors |= I_ERR_DUP_INODE_ITEM;
942                 return 1;
943         }
944         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
945         rec->nlink = btrfs_inode_nlink(eb, item);
946         rec->isize = btrfs_inode_size(eb, item);
947         rec->nbytes = btrfs_inode_nbytes(eb, item);
948         rec->imode = btrfs_inode_mode(eb, item);
949         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
950                 rec->nodatasum = 1;
951         rec->found_inode_item = 1;
952         if (rec->nlink == 0)
953                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
954         maybe_free_inode_rec(&active_node->inode_cache, rec);
955         return 0;
956 }
957
958 static struct inode_backref *get_inode_backref(struct inode_record *rec,
959                                                 const char *name,
960                                                 int namelen, u64 dir)
961 {
962         struct inode_backref *backref;
963
964         list_for_each_entry(backref, &rec->backrefs, list) {
965                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
966                         break;
967                 if (backref->dir != dir || backref->namelen != namelen)
968                         continue;
969                 if (memcmp(name, backref->name, namelen))
970                         continue;
971                 return backref;
972         }
973
974         backref = malloc(sizeof(*backref) + namelen + 1);
975         if (!backref)
976                 return NULL;
977         memset(backref, 0, sizeof(*backref));
978         backref->dir = dir;
979         backref->namelen = namelen;
980         memcpy(backref->name, name, namelen);
981         backref->name[namelen] = '\0';
982         list_add_tail(&backref->list, &rec->backrefs);
983         return backref;
984 }
985
986 static int add_inode_backref(struct cache_tree *inode_cache,
987                              u64 ino, u64 dir, u64 index,
988                              const char *name, int namelen,
989                              int filetype, int itemtype, int errors)
990 {
991         struct inode_record *rec;
992         struct inode_backref *backref;
993
994         rec = get_inode_rec(inode_cache, ino, 1);
995         BUG_ON(IS_ERR(rec));
996         backref = get_inode_backref(rec, name, namelen, dir);
997         BUG_ON(!backref);
998         if (errors)
999                 backref->errors |= errors;
1000         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1001                 if (backref->found_dir_index)
1002                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1003                 if (backref->found_inode_ref && backref->index != index)
1004                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1005                 if (backref->found_dir_item && backref->filetype != filetype)
1006                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1007
1008                 backref->index = index;
1009                 backref->filetype = filetype;
1010                 backref->found_dir_index = 1;
1011         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1012                 rec->found_link++;
1013                 if (backref->found_dir_item)
1014                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1015                 if (backref->found_dir_index && backref->filetype != filetype)
1016                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1017
1018                 backref->filetype = filetype;
1019                 backref->found_dir_item = 1;
1020         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1021                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1022                 if (backref->found_inode_ref)
1023                         backref->errors |= REF_ERR_DUP_INODE_REF;
1024                 if (backref->found_dir_index && backref->index != index)
1025                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1026                 else
1027                         backref->index = index;
1028
1029                 backref->ref_type = itemtype;
1030                 backref->found_inode_ref = 1;
1031         } else {
1032                 BUG_ON(1);
1033         }
1034
1035         maybe_free_inode_rec(inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1040                             struct cache_tree *dst_cache)
1041 {
1042         struct inode_backref *backref;
1043         u32 dir_count = 0;
1044         int ret = 0;
1045
1046         dst->merging = 1;
1047         list_for_each_entry(backref, &src->backrefs, list) {
1048                 if (backref->found_dir_index) {
1049                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1050                                         backref->index, backref->name,
1051                                         backref->namelen, backref->filetype,
1052                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1053                 }
1054                 if (backref->found_dir_item) {
1055                         dir_count++;
1056                         add_inode_backref(dst_cache, dst->ino,
1057                                         backref->dir, 0, backref->name,
1058                                         backref->namelen, backref->filetype,
1059                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1060                 }
1061                 if (backref->found_inode_ref) {
1062                         add_inode_backref(dst_cache, dst->ino,
1063                                         backref->dir, backref->index,
1064                                         backref->name, backref->namelen, 0,
1065                                         backref->ref_type, backref->errors);
1066                 }
1067         }
1068
1069         if (src->found_dir_item)
1070                 dst->found_dir_item = 1;
1071         if (src->found_file_extent)
1072                 dst->found_file_extent = 1;
1073         if (src->found_csum_item)
1074                 dst->found_csum_item = 1;
1075         if (src->some_csum_missing)
1076                 dst->some_csum_missing = 1;
1077         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1078                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1079                 if (ret < 0)
1080                         return ret;
1081         }
1082
1083         BUG_ON(src->found_link < dir_count);
1084         dst->found_link += src->found_link - dir_count;
1085         dst->found_size += src->found_size;
1086         if (src->extent_start != (u64)-1) {
1087                 if (dst->extent_start == (u64)-1) {
1088                         dst->extent_start = src->extent_start;
1089                         dst->extent_end = src->extent_end;
1090                 } else {
1091                         if (dst->extent_end > src->extent_start)
1092                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1093                         else if (dst->extent_end < src->extent_start) {
1094                                 ret = add_file_extent_hole(&dst->holes,
1095                                         dst->extent_end,
1096                                         src->extent_start - dst->extent_end);
1097                         }
1098                         if (dst->extent_end < src->extent_end)
1099                                 dst->extent_end = src->extent_end;
1100                 }
1101         }
1102
1103         dst->errors |= src->errors;
1104         if (src->found_inode_item) {
1105                 if (!dst->found_inode_item) {
1106                         dst->nlink = src->nlink;
1107                         dst->isize = src->isize;
1108                         dst->nbytes = src->nbytes;
1109                         dst->imode = src->imode;
1110                         dst->nodatasum = src->nodatasum;
1111                         dst->found_inode_item = 1;
1112                 } else {
1113                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1114                 }
1115         }
1116         dst->merging = 0;
1117
1118         return 0;
1119 }
1120
1121 static int splice_shared_node(struct shared_node *src_node,
1122                               struct shared_node *dst_node)
1123 {
1124         struct cache_extent *cache;
1125         struct ptr_node *node, *ins;
1126         struct cache_tree *src, *dst;
1127         struct inode_record *rec, *conflict;
1128         u64 current_ino = 0;
1129         int splice = 0;
1130         int ret;
1131
1132         if (--src_node->refs == 0)
1133                 splice = 1;
1134         if (src_node->current)
1135                 current_ino = src_node->current->ino;
1136
1137         src = &src_node->root_cache;
1138         dst = &dst_node->root_cache;
1139 again:
1140         cache = search_cache_extent(src, 0);
1141         while (cache) {
1142                 node = container_of(cache, struct ptr_node, cache);
1143                 rec = node->data;
1144                 cache = next_cache_extent(cache);
1145
1146                 if (splice) {
1147                         remove_cache_extent(src, &node->cache);
1148                         ins = node;
1149                 } else {
1150                         ins = malloc(sizeof(*ins));
1151                         BUG_ON(!ins);
1152                         ins->cache.start = node->cache.start;
1153                         ins->cache.size = node->cache.size;
1154                         ins->data = rec;
1155                         rec->refs++;
1156                 }
1157                 ret = insert_cache_extent(dst, &ins->cache);
1158                 if (ret == -EEXIST) {
1159                         conflict = get_inode_rec(dst, rec->ino, 1);
1160                         BUG_ON(IS_ERR(conflict));
1161                         merge_inode_recs(rec, conflict, dst);
1162                         if (rec->checked) {
1163                                 conflict->checked = 1;
1164                                 if (dst_node->current == conflict)
1165                                         dst_node->current = NULL;
1166                         }
1167                         maybe_free_inode_rec(dst, conflict);
1168                         free_inode_rec(rec);
1169                         free(ins);
1170                 } else {
1171                         BUG_ON(ret);
1172                 }
1173         }
1174
1175         if (src == &src_node->root_cache) {
1176                 src = &src_node->inode_cache;
1177                 dst = &dst_node->inode_cache;
1178                 goto again;
1179         }
1180
1181         if (current_ino > 0 && (!dst_node->current ||
1182             current_ino > dst_node->current->ino)) {
1183                 if (dst_node->current) {
1184                         dst_node->current->checked = 1;
1185                         maybe_free_inode_rec(dst, dst_node->current);
1186                 }
1187                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1188                 BUG_ON(IS_ERR(dst_node->current));
1189         }
1190         return 0;
1191 }
1192
1193 static void free_inode_ptr(struct cache_extent *cache)
1194 {
1195         struct ptr_node *node;
1196         struct inode_record *rec;
1197
1198         node = container_of(cache, struct ptr_node, cache);
1199         rec = node->data;
1200         free_inode_rec(rec);
1201         free(node);
1202 }
1203
1204 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1205
1206 static struct shared_node *find_shared_node(struct cache_tree *shared,
1207                                             u64 bytenr)
1208 {
1209         struct cache_extent *cache;
1210         struct shared_node *node;
1211
1212         cache = lookup_cache_extent(shared, bytenr, 1);
1213         if (cache) {
1214                 node = container_of(cache, struct shared_node, cache);
1215                 return node;
1216         }
1217         return NULL;
1218 }
1219
1220 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1221 {
1222         int ret;
1223         struct shared_node *node;
1224
1225         node = calloc(1, sizeof(*node));
1226         if (!node)
1227                 return -ENOMEM;
1228         node->cache.start = bytenr;
1229         node->cache.size = 1;
1230         cache_tree_init(&node->root_cache);
1231         cache_tree_init(&node->inode_cache);
1232         node->refs = refs;
1233
1234         ret = insert_cache_extent(shared, &node->cache);
1235
1236         return ret;
1237 }
1238
1239 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1240                              struct walk_control *wc, int level)
1241 {
1242         struct shared_node *node;
1243         struct shared_node *dest;
1244         int ret;
1245
1246         if (level == wc->active_node)
1247                 return 0;
1248
1249         BUG_ON(wc->active_node <= level);
1250         node = find_shared_node(&wc->shared, bytenr);
1251         if (!node) {
1252                 ret = add_shared_node(&wc->shared, bytenr, refs);
1253                 BUG_ON(ret);
1254                 node = find_shared_node(&wc->shared, bytenr);
1255                 wc->nodes[level] = node;
1256                 wc->active_node = level;
1257                 return 0;
1258         }
1259
1260         if (wc->root_level == wc->active_node &&
1261             btrfs_root_refs(&root->root_item) == 0) {
1262                 if (--node->refs == 0) {
1263                         free_inode_recs_tree(&node->root_cache);
1264                         free_inode_recs_tree(&node->inode_cache);
1265                         remove_cache_extent(&wc->shared, &node->cache);
1266                         free(node);
1267                 }
1268                 return 1;
1269         }
1270
1271         dest = wc->nodes[wc->active_node];
1272         splice_shared_node(node, dest);
1273         if (node->refs == 0) {
1274                 remove_cache_extent(&wc->shared, &node->cache);
1275                 free(node);
1276         }
1277         return 1;
1278 }
1279
1280 static int leave_shared_node(struct btrfs_root *root,
1281                              struct walk_control *wc, int level)
1282 {
1283         struct shared_node *node;
1284         struct shared_node *dest;
1285         int i;
1286
1287         if (level == wc->root_level)
1288                 return 0;
1289
1290         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1291                 if (wc->nodes[i])
1292                         break;
1293         }
1294         BUG_ON(i >= BTRFS_MAX_LEVEL);
1295
1296         node = wc->nodes[wc->active_node];
1297         wc->nodes[wc->active_node] = NULL;
1298         wc->active_node = i;
1299
1300         dest = wc->nodes[wc->active_node];
1301         if (wc->active_node < wc->root_level ||
1302             btrfs_root_refs(&root->root_item) > 0) {
1303                 BUG_ON(node->refs <= 1);
1304                 splice_shared_node(node, dest);
1305         } else {
1306                 BUG_ON(node->refs < 2);
1307                 node->refs--;
1308         }
1309         return 0;
1310 }
1311
1312 /*
1313  * Returns:
1314  * < 0 - on error
1315  * 1   - if the root with id child_root_id is a child of root parent_root_id
1316  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1317  *       has other root(s) as parent(s)
1318  * 2   - if the root child_root_id doesn't have any parent roots
1319  */
1320 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1321                          u64 child_root_id)
1322 {
1323         struct btrfs_path path;
1324         struct btrfs_key key;
1325         struct extent_buffer *leaf;
1326         int has_parent = 0;
1327         int ret;
1328
1329         btrfs_init_path(&path);
1330
1331         key.objectid = parent_root_id;
1332         key.type = BTRFS_ROOT_REF_KEY;
1333         key.offset = child_root_id;
1334         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1335                                 0, 0);
1336         if (ret < 0)
1337                 return ret;
1338         btrfs_release_path(&path);
1339         if (!ret)
1340                 return 1;
1341
1342         key.objectid = child_root_id;
1343         key.type = BTRFS_ROOT_BACKREF_KEY;
1344         key.offset = 0;
1345         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1346                                 0, 0);
1347         if (ret < 0)
1348                 goto out;
1349
1350         while (1) {
1351                 leaf = path.nodes[0];
1352                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1353                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1354                         if (ret)
1355                                 break;
1356                         leaf = path.nodes[0];
1357                 }
1358
1359                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1360                 if (key.objectid != child_root_id ||
1361                     key.type != BTRFS_ROOT_BACKREF_KEY)
1362                         break;
1363
1364                 has_parent = 1;
1365
1366                 if (key.offset == parent_root_id) {
1367                         btrfs_release_path(&path);
1368                         return 1;
1369                 }
1370
1371                 path.slots[0]++;
1372         }
1373 out:
1374         btrfs_release_path(&path);
1375         if (ret < 0)
1376                 return ret;
1377         return has_parent ? 0 : 2;
1378 }
1379
1380 static int process_dir_item(struct btrfs_root *root,
1381                             struct extent_buffer *eb,
1382                             int slot, struct btrfs_key *key,
1383                             struct shared_node *active_node)
1384 {
1385         u32 total;
1386         u32 cur = 0;
1387         u32 len;
1388         u32 name_len;
1389         u32 data_len;
1390         int error;
1391         int nritems = 0;
1392         int filetype;
1393         struct btrfs_dir_item *di;
1394         struct inode_record *rec;
1395         struct cache_tree *root_cache;
1396         struct cache_tree *inode_cache;
1397         struct btrfs_key location;
1398         char namebuf[BTRFS_NAME_LEN];
1399
1400         root_cache = &active_node->root_cache;
1401         inode_cache = &active_node->inode_cache;
1402         rec = active_node->current;
1403         rec->found_dir_item = 1;
1404
1405         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1406         total = btrfs_item_size_nr(eb, slot);
1407         while (cur < total) {
1408                 nritems++;
1409                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1410                 name_len = btrfs_dir_name_len(eb, di);
1411                 data_len = btrfs_dir_data_len(eb, di);
1412                 filetype = btrfs_dir_type(eb, di);
1413
1414                 rec->found_size += name_len;
1415                 if (name_len <= BTRFS_NAME_LEN) {
1416                         len = name_len;
1417                         error = 0;
1418                 } else {
1419                         len = BTRFS_NAME_LEN;
1420                         error = REF_ERR_NAME_TOO_LONG;
1421                 }
1422                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1423
1424                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1425                         add_inode_backref(inode_cache, location.objectid,
1426                                           key->objectid, key->offset, namebuf,
1427                                           len, filetype, key->type, error);
1428                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1429                         add_inode_backref(root_cache, location.objectid,
1430                                           key->objectid, key->offset,
1431                                           namebuf, len, filetype,
1432                                           key->type, error);
1433                 } else {
1434                         fprintf(stderr, "invalid location in dir item %u\n",
1435                                 location.type);
1436                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1437                                           key->objectid, key->offset, namebuf,
1438                                           len, filetype, key->type, error);
1439                 }
1440
1441                 len = sizeof(*di) + name_len + data_len;
1442                 di = (struct btrfs_dir_item *)((char *)di + len);
1443                 cur += len;
1444         }
1445         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1446                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1447
1448         return 0;
1449 }
1450
1451 static int process_inode_ref(struct extent_buffer *eb,
1452                              int slot, struct btrfs_key *key,
1453                              struct shared_node *active_node)
1454 {
1455         u32 total;
1456         u32 cur = 0;
1457         u32 len;
1458         u32 name_len;
1459         u64 index;
1460         int error;
1461         struct cache_tree *inode_cache;
1462         struct btrfs_inode_ref *ref;
1463         char namebuf[BTRFS_NAME_LEN];
1464
1465         inode_cache = &active_node->inode_cache;
1466
1467         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1468         total = btrfs_item_size_nr(eb, slot);
1469         while (cur < total) {
1470                 name_len = btrfs_inode_ref_name_len(eb, ref);
1471                 index = btrfs_inode_ref_index(eb, ref);
1472                 if (name_len <= BTRFS_NAME_LEN) {
1473                         len = name_len;
1474                         error = 0;
1475                 } else {
1476                         len = BTRFS_NAME_LEN;
1477                         error = REF_ERR_NAME_TOO_LONG;
1478                 }
1479                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1480                 add_inode_backref(inode_cache, key->objectid, key->offset,
1481                                   index, namebuf, len, 0, key->type, error);
1482
1483                 len = sizeof(*ref) + name_len;
1484                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1485                 cur += len;
1486         }
1487         return 0;
1488 }
1489
1490 static int process_inode_extref(struct extent_buffer *eb,
1491                                 int slot, struct btrfs_key *key,
1492                                 struct shared_node *active_node)
1493 {
1494         u32 total;
1495         u32 cur = 0;
1496         u32 len;
1497         u32 name_len;
1498         u64 index;
1499         u64 parent;
1500         int error;
1501         struct cache_tree *inode_cache;
1502         struct btrfs_inode_extref *extref;
1503         char namebuf[BTRFS_NAME_LEN];
1504
1505         inode_cache = &active_node->inode_cache;
1506
1507         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1508         total = btrfs_item_size_nr(eb, slot);
1509         while (cur < total) {
1510                 name_len = btrfs_inode_extref_name_len(eb, extref);
1511                 index = btrfs_inode_extref_index(eb, extref);
1512                 parent = btrfs_inode_extref_parent(eb, extref);
1513                 if (name_len <= BTRFS_NAME_LEN) {
1514                         len = name_len;
1515                         error = 0;
1516                 } else {
1517                         len = BTRFS_NAME_LEN;
1518                         error = REF_ERR_NAME_TOO_LONG;
1519                 }
1520                 read_extent_buffer(eb, namebuf,
1521                                    (unsigned long)(extref + 1), len);
1522                 add_inode_backref(inode_cache, key->objectid, parent,
1523                                   index, namebuf, len, 0, key->type, error);
1524
1525                 len = sizeof(*extref) + name_len;
1526                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1527                 cur += len;
1528         }
1529         return 0;
1530
1531 }
1532
1533 static int count_csum_range(struct btrfs_root *root, u64 start,
1534                             u64 len, u64 *found)
1535 {
1536         struct btrfs_key key;
1537         struct btrfs_path path;
1538         struct extent_buffer *leaf;
1539         int ret;
1540         size_t size;
1541         *found = 0;
1542         u64 csum_end;
1543         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1544
1545         btrfs_init_path(&path);
1546
1547         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1548         key.offset = start;
1549         key.type = BTRFS_EXTENT_CSUM_KEY;
1550
1551         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1552                                 &key, &path, 0, 0);
1553         if (ret < 0)
1554                 goto out;
1555         if (ret > 0 && path.slots[0] > 0) {
1556                 leaf = path.nodes[0];
1557                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1558                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1559                     key.type == BTRFS_EXTENT_CSUM_KEY)
1560                         path.slots[0]--;
1561         }
1562
1563         while (len > 0) {
1564                 leaf = path.nodes[0];
1565                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1566                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1567                         if (ret > 0)
1568                                 break;
1569                         else if (ret < 0)
1570                                 goto out;
1571                         leaf = path.nodes[0];
1572                 }
1573
1574                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1575                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1576                     key.type != BTRFS_EXTENT_CSUM_KEY)
1577                         break;
1578
1579                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1580                 if (key.offset >= start + len)
1581                         break;
1582
1583                 if (key.offset > start)
1584                         start = key.offset;
1585
1586                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1587                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1588                 if (csum_end > start) {
1589                         size = min(csum_end - start, len);
1590                         len -= size;
1591                         start += size;
1592                         *found += size;
1593                 }
1594
1595                 path.slots[0]++;
1596         }
1597 out:
1598         btrfs_release_path(&path);
1599         if (ret < 0)
1600                 return ret;
1601         return 0;
1602 }
1603
1604 static int process_file_extent(struct btrfs_root *root,
1605                                 struct extent_buffer *eb,
1606                                 int slot, struct btrfs_key *key,
1607                                 struct shared_node *active_node)
1608 {
1609         struct inode_record *rec;
1610         struct btrfs_file_extent_item *fi;
1611         u64 num_bytes = 0;
1612         u64 disk_bytenr = 0;
1613         u64 extent_offset = 0;
1614         u64 mask = root->sectorsize - 1;
1615         int extent_type;
1616         int ret;
1617
1618         rec = active_node->current;
1619         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1620         rec->found_file_extent = 1;
1621
1622         if (rec->extent_start == (u64)-1) {
1623                 rec->extent_start = key->offset;
1624                 rec->extent_end = key->offset;
1625         }
1626
1627         if (rec->extent_end > key->offset)
1628                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1629         else if (rec->extent_end < key->offset) {
1630                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1631                                            key->offset - rec->extent_end);
1632                 if (ret < 0)
1633                         return ret;
1634         }
1635
1636         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1637         extent_type = btrfs_file_extent_type(eb, fi);
1638
1639         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1640                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1641                 if (num_bytes == 0)
1642                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1643                 rec->found_size += num_bytes;
1644                 num_bytes = (num_bytes + mask) & ~mask;
1645         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1646                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1647                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1648                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1649                 extent_offset = btrfs_file_extent_offset(eb, fi);
1650                 if (num_bytes == 0 || (num_bytes & mask))
1651                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1652                 if (num_bytes + extent_offset >
1653                     btrfs_file_extent_ram_bytes(eb, fi))
1654                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1655                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1656                     (btrfs_file_extent_compression(eb, fi) ||
1657                      btrfs_file_extent_encryption(eb, fi) ||
1658                      btrfs_file_extent_other_encoding(eb, fi)))
1659                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1660                 if (disk_bytenr > 0)
1661                         rec->found_size += num_bytes;
1662         } else {
1663                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1664         }
1665         rec->extent_end = key->offset + num_bytes;
1666
1667         /*
1668          * The data reloc tree will copy full extents into its inode and then
1669          * copy the corresponding csums.  Because the extent it copied could be
1670          * a preallocated extent that hasn't been written to yet there may be no
1671          * csums to copy, ergo we won't have csums for our file extent.  This is
1672          * ok so just don't bother checking csums if the inode belongs to the
1673          * data reloc tree.
1674          */
1675         if (disk_bytenr > 0 &&
1676             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1677                 u64 found;
1678                 if (btrfs_file_extent_compression(eb, fi))
1679                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1680                 else
1681                         disk_bytenr += extent_offset;
1682
1683                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1684                 if (ret < 0)
1685                         return ret;
1686                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1687                         if (found > 0)
1688                                 rec->found_csum_item = 1;
1689                         if (found < num_bytes)
1690                                 rec->some_csum_missing = 1;
1691                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1692                         if (found > 0)
1693                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1694                 }
1695         }
1696         return 0;
1697 }
1698
1699 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1700                             struct walk_control *wc)
1701 {
1702         struct btrfs_key key;
1703         u32 nritems;
1704         int i;
1705         int ret = 0;
1706         struct cache_tree *inode_cache;
1707         struct shared_node *active_node;
1708
1709         if (wc->root_level == wc->active_node &&
1710             btrfs_root_refs(&root->root_item) == 0)
1711                 return 0;
1712
1713         active_node = wc->nodes[wc->active_node];
1714         inode_cache = &active_node->inode_cache;
1715         nritems = btrfs_header_nritems(eb);
1716         for (i = 0; i < nritems; i++) {
1717                 btrfs_item_key_to_cpu(eb, &key, i);
1718
1719                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1720                         continue;
1721                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1722                         continue;
1723
1724                 if (active_node->current == NULL ||
1725                     active_node->current->ino < key.objectid) {
1726                         if (active_node->current) {
1727                                 active_node->current->checked = 1;
1728                                 maybe_free_inode_rec(inode_cache,
1729                                                      active_node->current);
1730                         }
1731                         active_node->current = get_inode_rec(inode_cache,
1732                                                              key.objectid, 1);
1733                         BUG_ON(IS_ERR(active_node->current));
1734                 }
1735                 switch (key.type) {
1736                 case BTRFS_DIR_ITEM_KEY:
1737                 case BTRFS_DIR_INDEX_KEY:
1738                         ret = process_dir_item(root, eb, i, &key, active_node);
1739                         break;
1740                 case BTRFS_INODE_REF_KEY:
1741                         ret = process_inode_ref(eb, i, &key, active_node);
1742                         break;
1743                 case BTRFS_INODE_EXTREF_KEY:
1744                         ret = process_inode_extref(eb, i, &key, active_node);
1745                         break;
1746                 case BTRFS_INODE_ITEM_KEY:
1747                         ret = process_inode_item(eb, i, &key, active_node);
1748                         break;
1749                 case BTRFS_EXTENT_DATA_KEY:
1750                         ret = process_file_extent(root, eb, i, &key,
1751                                                   active_node);
1752                         break;
1753                 default:
1754                         break;
1755                 };
1756         }
1757         return ret;
1758 }
1759
1760 static void reada_walk_down(struct btrfs_root *root,
1761                             struct extent_buffer *node, int slot)
1762 {
1763         u64 bytenr;
1764         u64 ptr_gen;
1765         u32 nritems;
1766         u32 blocksize;
1767         int i;
1768         int level;
1769
1770         level = btrfs_header_level(node);
1771         if (level != 1)
1772                 return;
1773
1774         nritems = btrfs_header_nritems(node);
1775         blocksize = root->nodesize;
1776         for (i = slot; i < nritems; i++) {
1777                 bytenr = btrfs_node_blockptr(node, i);
1778                 ptr_gen = btrfs_node_ptr_generation(node, i);
1779                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1780         }
1781 }
1782
1783 /*
1784  * Check the child node/leaf by the following condition:
1785  * 1. the first item key of the node/leaf should be the same with the one
1786  *    in parent.
1787  * 2. block in parent node should match the child node/leaf.
1788  * 3. generation of parent node and child's header should be consistent.
1789  *
1790  * Or the child node/leaf pointed by the key in parent is not valid.
1791  *
1792  * We hope to check leaf owner too, but since subvol may share leaves,
1793  * which makes leaf owner check not so strong, key check should be
1794  * sufficient enough for that case.
1795  */
1796 static int check_child_node(struct btrfs_root *root,
1797                             struct extent_buffer *parent, int slot,
1798                             struct extent_buffer *child)
1799 {
1800         struct btrfs_key parent_key;
1801         struct btrfs_key child_key;
1802         int ret = 0;
1803
1804         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1805         if (btrfs_header_level(child) == 0)
1806                 btrfs_item_key_to_cpu(child, &child_key, 0);
1807         else
1808                 btrfs_node_key_to_cpu(child, &child_key, 0);
1809
1810         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1811                 ret = -EINVAL;
1812                 fprintf(stderr,
1813                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1814                         parent_key.objectid, parent_key.type, parent_key.offset,
1815                         child_key.objectid, child_key.type, child_key.offset);
1816         }
1817         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1818                 ret = -EINVAL;
1819                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1820                         btrfs_node_blockptr(parent, slot),
1821                         btrfs_header_bytenr(child));
1822         }
1823         if (btrfs_node_ptr_generation(parent, slot) !=
1824             btrfs_header_generation(child)) {
1825                 ret = -EINVAL;
1826                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1827                         btrfs_header_generation(child),
1828                         btrfs_node_ptr_generation(parent, slot));
1829         }
1830         return ret;
1831 }
1832
1833 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1834                           struct walk_control *wc, int *level)
1835 {
1836         enum btrfs_tree_block_status status;
1837         u64 bytenr;
1838         u64 ptr_gen;
1839         struct extent_buffer *next;
1840         struct extent_buffer *cur;
1841         u32 blocksize;
1842         int ret, err = 0;
1843         u64 refs;
1844
1845         WARN_ON(*level < 0);
1846         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847         ret = btrfs_lookup_extent_info(NULL, root,
1848                                        path->nodes[*level]->start,
1849                                        *level, 1, &refs, NULL);
1850         if (ret < 0) {
1851                 err = ret;
1852                 goto out;
1853         }
1854
1855         if (refs > 1) {
1856                 ret = enter_shared_node(root, path->nodes[*level]->start,
1857                                         refs, wc, *level);
1858                 if (ret > 0) {
1859                         err = ret;
1860                         goto out;
1861                 }
1862         }
1863
1864         while (*level >= 0) {
1865                 WARN_ON(*level < 0);
1866                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1867                 cur = path->nodes[*level];
1868
1869                 if (btrfs_header_level(cur) != *level)
1870                         WARN_ON(1);
1871
1872                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1873                         break;
1874                 if (*level == 0) {
1875                         ret = process_one_leaf(root, cur, wc);
1876                         if (ret < 0)
1877                                 err = ret;
1878                         break;
1879                 }
1880                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1881                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1882                 blocksize = root->nodesize;
1883                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1884                                                1, &refs, NULL);
1885                 if (ret < 0)
1886                         refs = 0;
1887
1888                 if (refs > 1) {
1889                         ret = enter_shared_node(root, bytenr, refs,
1890                                                 wc, *level - 1);
1891                         if (ret > 0) {
1892                                 path->slots[*level]++;
1893                                 continue;
1894                         }
1895                 }
1896
1897                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1898                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1899                         free_extent_buffer(next);
1900                         reada_walk_down(root, cur, path->slots[*level]);
1901                         next = read_tree_block(root, bytenr, blocksize,
1902                                                ptr_gen);
1903                         if (!extent_buffer_uptodate(next)) {
1904                                 struct btrfs_key node_key;
1905
1906                                 btrfs_node_key_to_cpu(path->nodes[*level],
1907                                                       &node_key,
1908                                                       path->slots[*level]);
1909                                 btrfs_add_corrupt_extent_record(root->fs_info,
1910                                                 &node_key,
1911                                                 path->nodes[*level]->start,
1912                                                 root->nodesize, *level);
1913                                 err = -EIO;
1914                                 goto out;
1915                         }
1916                 }
1917
1918                 ret = check_child_node(root, cur, path->slots[*level], next);
1919                 if (ret) {
1920                         err = ret;
1921                         goto out;
1922                 }
1923
1924                 if (btrfs_is_leaf(next))
1925                         status = btrfs_check_leaf(root, NULL, next);
1926                 else
1927                         status = btrfs_check_node(root, NULL, next);
1928                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1929                         free_extent_buffer(next);
1930                         err = -EIO;
1931                         goto out;
1932                 }
1933
1934                 *level = *level - 1;
1935                 free_extent_buffer(path->nodes[*level]);
1936                 path->nodes[*level] = next;
1937                 path->slots[*level] = 0;
1938         }
1939 out:
1940         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1941         return err;
1942 }
1943
1944 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1945                         struct walk_control *wc, int *level)
1946 {
1947         int i;
1948         struct extent_buffer *leaf;
1949
1950         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1951                 leaf = path->nodes[i];
1952                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1953                         path->slots[i]++;
1954                         *level = i;
1955                         return 0;
1956                 } else {
1957                         free_extent_buffer(path->nodes[*level]);
1958                         path->nodes[*level] = NULL;
1959                         BUG_ON(*level > wc->active_node);
1960                         if (*level == wc->active_node)
1961                                 leave_shared_node(root, wc, *level);
1962                         *level = i + 1;
1963                 }
1964         }
1965         return 1;
1966 }
1967
1968 static int check_root_dir(struct inode_record *rec)
1969 {
1970         struct inode_backref *backref;
1971         int ret = -1;
1972
1973         if (!rec->found_inode_item || rec->errors)
1974                 goto out;
1975         if (rec->nlink != 1 || rec->found_link != 0)
1976                 goto out;
1977         if (list_empty(&rec->backrefs))
1978                 goto out;
1979         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1980         if (!backref->found_inode_ref)
1981                 goto out;
1982         if (backref->index != 0 || backref->namelen != 2 ||
1983             memcmp(backref->name, "..", 2))
1984                 goto out;
1985         if (backref->found_dir_index || backref->found_dir_item)
1986                 goto out;
1987         ret = 0;
1988 out:
1989         return ret;
1990 }
1991
1992 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1993                               struct btrfs_root *root, struct btrfs_path *path,
1994                               struct inode_record *rec)
1995 {
1996         struct btrfs_inode_item *ei;
1997         struct btrfs_key key;
1998         int ret;
1999
2000         key.objectid = rec->ino;
2001         key.type = BTRFS_INODE_ITEM_KEY;
2002         key.offset = (u64)-1;
2003
2004         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2005         if (ret < 0)
2006                 goto out;
2007         if (ret) {
2008                 if (!path->slots[0]) {
2009                         ret = -ENOENT;
2010                         goto out;
2011                 }
2012                 path->slots[0]--;
2013                 ret = 0;
2014         }
2015         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2016         if (key.objectid != rec->ino) {
2017                 ret = -ENOENT;
2018                 goto out;
2019         }
2020
2021         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2022                             struct btrfs_inode_item);
2023         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2024         btrfs_mark_buffer_dirty(path->nodes[0]);
2025         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2026         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2027                root->root_key.objectid);
2028 out:
2029         btrfs_release_path(path);
2030         return ret;
2031 }
2032
2033 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2034                                     struct btrfs_root *root,
2035                                     struct btrfs_path *path,
2036                                     struct inode_record *rec)
2037 {
2038         int ret;
2039
2040         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2041         btrfs_release_path(path);
2042         if (!ret)
2043                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2044         return ret;
2045 }
2046
2047 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2048                                struct btrfs_root *root,
2049                                struct btrfs_path *path,
2050                                struct inode_record *rec)
2051 {
2052         struct btrfs_inode_item *ei;
2053         struct btrfs_key key;
2054         int ret = 0;
2055
2056         key.objectid = rec->ino;
2057         key.type = BTRFS_INODE_ITEM_KEY;
2058         key.offset = 0;
2059
2060         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2061         if (ret) {
2062                 if (ret > 0)
2063                         ret = -ENOENT;
2064                 goto out;
2065         }
2066
2067         /* Since ret == 0, no need to check anything */
2068         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2069                             struct btrfs_inode_item);
2070         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2071         btrfs_mark_buffer_dirty(path->nodes[0]);
2072         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2073         printf("reset nbytes for ino %llu root %llu\n",
2074                rec->ino, root->root_key.objectid);
2075 out:
2076         btrfs_release_path(path);
2077         return ret;
2078 }
2079
2080 static int add_missing_dir_index(struct btrfs_root *root,
2081                                  struct cache_tree *inode_cache,
2082                                  struct inode_record *rec,
2083                                  struct inode_backref *backref)
2084 {
2085         struct btrfs_path *path;
2086         struct btrfs_trans_handle *trans;
2087         struct btrfs_dir_item *dir_item;
2088         struct extent_buffer *leaf;
2089         struct btrfs_key key;
2090         struct btrfs_disk_key disk_key;
2091         struct inode_record *dir_rec;
2092         unsigned long name_ptr;
2093         u32 data_size = sizeof(*dir_item) + backref->namelen;
2094         int ret;
2095
2096         path = btrfs_alloc_path();
2097         if (!path)
2098                 return -ENOMEM;
2099
2100         trans = btrfs_start_transaction(root, 1);
2101         if (IS_ERR(trans)) {
2102                 btrfs_free_path(path);
2103                 return PTR_ERR(trans);
2104         }
2105
2106         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2107                 (unsigned long long)rec->ino);
2108         key.objectid = backref->dir;
2109         key.type = BTRFS_DIR_INDEX_KEY;
2110         key.offset = backref->index;
2111
2112         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2113         BUG_ON(ret);
2114
2115         leaf = path->nodes[0];
2116         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2117
2118         disk_key.objectid = cpu_to_le64(rec->ino);
2119         disk_key.type = BTRFS_INODE_ITEM_KEY;
2120         disk_key.offset = 0;
2121
2122         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2123         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2124         btrfs_set_dir_data_len(leaf, dir_item, 0);
2125         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2126         name_ptr = (unsigned long)(dir_item + 1);
2127         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2128         btrfs_mark_buffer_dirty(leaf);
2129         btrfs_free_path(path);
2130         btrfs_commit_transaction(trans, root);
2131
2132         backref->found_dir_index = 1;
2133         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2134         BUG_ON(IS_ERR(dir_rec));
2135         if (!dir_rec)
2136                 return 0;
2137         dir_rec->found_size += backref->namelen;
2138         if (dir_rec->found_size == dir_rec->isize &&
2139             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2140                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2141         if (dir_rec->found_size != dir_rec->isize)
2142                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2143
2144         return 0;
2145 }
2146
2147 static int delete_dir_index(struct btrfs_root *root,
2148                             struct cache_tree *inode_cache,
2149                             struct inode_record *rec,
2150                             struct inode_backref *backref)
2151 {
2152         struct btrfs_trans_handle *trans;
2153         struct btrfs_dir_item *di;
2154         struct btrfs_path *path;
2155         int ret = 0;
2156
2157         path = btrfs_alloc_path();
2158         if (!path)
2159                 return -ENOMEM;
2160
2161         trans = btrfs_start_transaction(root, 1);
2162         if (IS_ERR(trans)) {
2163                 btrfs_free_path(path);
2164                 return PTR_ERR(trans);
2165         }
2166
2167
2168         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2169                 (unsigned long long)backref->dir,
2170                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2171                 (unsigned long long)root->objectid);
2172
2173         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2174                                     backref->name, backref->namelen,
2175                                     backref->index, -1);
2176         if (IS_ERR(di)) {
2177                 ret = PTR_ERR(di);
2178                 btrfs_free_path(path);
2179                 btrfs_commit_transaction(trans, root);
2180                 if (ret == -ENOENT)
2181                         return 0;
2182                 return ret;
2183         }
2184
2185         if (!di)
2186                 ret = btrfs_del_item(trans, root, path);
2187         else
2188                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2189         BUG_ON(ret);
2190         btrfs_free_path(path);
2191         btrfs_commit_transaction(trans, root);
2192         return ret;
2193 }
2194
2195 static int create_inode_item(struct btrfs_root *root,
2196                              struct inode_record *rec,
2197                              struct inode_backref *backref, int root_dir)
2198 {
2199         struct btrfs_trans_handle *trans;
2200         struct btrfs_inode_item inode_item;
2201         time_t now = time(NULL);
2202         int ret;
2203
2204         trans = btrfs_start_transaction(root, 1);
2205         if (IS_ERR(trans)) {
2206                 ret = PTR_ERR(trans);
2207                 return ret;
2208         }
2209
2210         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2211                 "be incomplete, please check permissions and content after "
2212                 "the fsck completes.\n", (unsigned long long)root->objectid,
2213                 (unsigned long long)rec->ino);
2214
2215         memset(&inode_item, 0, sizeof(inode_item));
2216         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2217         if (root_dir)
2218                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2219         else
2220                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2221         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2222         if (rec->found_dir_item) {
2223                 if (rec->found_file_extent)
2224                         fprintf(stderr, "root %llu inode %llu has both a dir "
2225                                 "item and extents, unsure if it is a dir or a "
2226                                 "regular file so setting it as a directory\n",
2227                                 (unsigned long long)root->objectid,
2228                                 (unsigned long long)rec->ino);
2229                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2230                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2231         } else if (!rec->found_dir_item) {
2232                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2233                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2234         }
2235         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2236         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2237         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2238         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2239         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2240         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2241         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2242         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2243
2244         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2245         BUG_ON(ret);
2246         btrfs_commit_transaction(trans, root);
2247         return 0;
2248 }
2249
2250 static int repair_inode_backrefs(struct btrfs_root *root,
2251                                  struct inode_record *rec,
2252                                  struct cache_tree *inode_cache,
2253                                  int delete)
2254 {
2255         struct inode_backref *tmp, *backref;
2256         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2257         int ret = 0;
2258         int repaired = 0;
2259
2260         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2261                 if (!delete && rec->ino == root_dirid) {
2262                         if (!rec->found_inode_item) {
2263                                 ret = create_inode_item(root, rec, backref, 1);
2264                                 if (ret)
2265                                         break;
2266                                 repaired++;
2267                         }
2268                 }
2269
2270                 /* Index 0 for root dir's are special, don't mess with it */
2271                 if (rec->ino == root_dirid && backref->index == 0)
2272                         continue;
2273
2274                 if (delete &&
2275                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2276                      (backref->found_dir_index && backref->found_inode_ref &&
2277                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2278                         ret = delete_dir_index(root, inode_cache, rec, backref);
2279                         if (ret)
2280                                 break;
2281                         repaired++;
2282                         list_del(&backref->list);
2283                         free(backref);
2284                 }
2285
2286                 if (!delete && !backref->found_dir_index &&
2287                     backref->found_dir_item && backref->found_inode_ref) {
2288                         ret = add_missing_dir_index(root, inode_cache, rec,
2289                                                     backref);
2290                         if (ret)
2291                                 break;
2292                         repaired++;
2293                         if (backref->found_dir_item &&
2294                             backref->found_dir_index &&
2295                             backref->found_dir_index) {
2296                                 if (!backref->errors &&
2297                                     backref->found_inode_ref) {
2298                                         list_del(&backref->list);
2299                                         free(backref);
2300                                 }
2301                         }
2302                 }
2303
2304                 if (!delete && (!backref->found_dir_index &&
2305                                 !backref->found_dir_item &&
2306                                 backref->found_inode_ref)) {
2307                         struct btrfs_trans_handle *trans;
2308                         struct btrfs_key location;
2309
2310                         ret = check_dir_conflict(root, backref->name,
2311                                                  backref->namelen,
2312                                                  backref->dir,
2313                                                  backref->index);
2314                         if (ret) {
2315                                 /*
2316                                  * let nlink fixing routine to handle it,
2317                                  * which can do it better.
2318                                  */
2319                                 ret = 0;
2320                                 break;
2321                         }
2322                         location.objectid = rec->ino;
2323                         location.type = BTRFS_INODE_ITEM_KEY;
2324                         location.offset = 0;
2325
2326                         trans = btrfs_start_transaction(root, 1);
2327                         if (IS_ERR(trans)) {
2328                                 ret = PTR_ERR(trans);
2329                                 break;
2330                         }
2331                         fprintf(stderr, "adding missing dir index/item pair "
2332                                 "for inode %llu\n",
2333                                 (unsigned long long)rec->ino);
2334                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2335                                                     backref->namelen,
2336                                                     backref->dir, &location,
2337                                                     imode_to_type(rec->imode),
2338                                                     backref->index);
2339                         BUG_ON(ret);
2340                         btrfs_commit_transaction(trans, root);
2341                         repaired++;
2342                 }
2343
2344                 if (!delete && (backref->found_inode_ref &&
2345                                 backref->found_dir_index &&
2346                                 backref->found_dir_item &&
2347                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2348                                 !rec->found_inode_item)) {
2349                         ret = create_inode_item(root, rec, backref, 0);
2350                         if (ret)
2351                                 break;
2352                         repaired++;
2353                 }
2354
2355         }
2356         return ret ? ret : repaired;
2357 }
2358
2359 /*
2360  * To determine the file type for nlink/inode_item repair
2361  *
2362  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2363  * Return -ENOENT if file type is not found.
2364  */
2365 static int find_file_type(struct inode_record *rec, u8 *type)
2366 {
2367         struct inode_backref *backref;
2368
2369         /* For inode item recovered case */
2370         if (rec->found_inode_item) {
2371                 *type = imode_to_type(rec->imode);
2372                 return 0;
2373         }
2374
2375         list_for_each_entry(backref, &rec->backrefs, list) {
2376                 if (backref->found_dir_index || backref->found_dir_item) {
2377                         *type = backref->filetype;
2378                         return 0;
2379                 }
2380         }
2381         return -ENOENT;
2382 }
2383
2384 /*
2385  * To determine the file name for nlink repair
2386  *
2387  * Return 0 if file name is found, set name and namelen.
2388  * Return -ENOENT if file name is not found.
2389  */
2390 static int find_file_name(struct inode_record *rec,
2391                           char *name, int *namelen)
2392 {
2393         struct inode_backref *backref;
2394
2395         list_for_each_entry(backref, &rec->backrefs, list) {
2396                 if (backref->found_dir_index || backref->found_dir_item ||
2397                     backref->found_inode_ref) {
2398                         memcpy(name, backref->name, backref->namelen);
2399                         *namelen = backref->namelen;
2400                         return 0;
2401                 }
2402         }
2403         return -ENOENT;
2404 }
2405
2406 /* Reset the nlink of the inode to the correct one */
2407 static int reset_nlink(struct btrfs_trans_handle *trans,
2408                        struct btrfs_root *root,
2409                        struct btrfs_path *path,
2410                        struct inode_record *rec)
2411 {
2412         struct inode_backref *backref;
2413         struct inode_backref *tmp;
2414         struct btrfs_key key;
2415         struct btrfs_inode_item *inode_item;
2416         int ret = 0;
2417
2418         /* We don't believe this either, reset it and iterate backref */
2419         rec->found_link = 0;
2420
2421         /* Remove all backref including the valid ones */
2422         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2423                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2424                                    backref->index, backref->name,
2425                                    backref->namelen, 0);
2426                 if (ret < 0)
2427                         goto out;
2428
2429                 /* remove invalid backref, so it won't be added back */
2430                 if (!(backref->found_dir_index &&
2431                       backref->found_dir_item &&
2432                       backref->found_inode_ref)) {
2433                         list_del(&backref->list);
2434                         free(backref);
2435                 } else {
2436                         rec->found_link++;
2437                 }
2438         }
2439
2440         /* Set nlink to 0 */
2441         key.objectid = rec->ino;
2442         key.type = BTRFS_INODE_ITEM_KEY;
2443         key.offset = 0;
2444         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2445         if (ret < 0)
2446                 goto out;
2447         if (ret > 0) {
2448                 ret = -ENOENT;
2449                 goto out;
2450         }
2451         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2452                                     struct btrfs_inode_item);
2453         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2454         btrfs_mark_buffer_dirty(path->nodes[0]);
2455         btrfs_release_path(path);
2456
2457         /*
2458          * Add back valid inode_ref/dir_item/dir_index,
2459          * add_link() will handle the nlink inc, so new nlink must be correct
2460          */
2461         list_for_each_entry(backref, &rec->backrefs, list) {
2462                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2463                                      backref->name, backref->namelen,
2464                                      backref->filetype, &backref->index, 1);
2465                 if (ret < 0)
2466                         goto out;
2467         }
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2474                                struct btrfs_root *root,
2475                                struct btrfs_path *path,
2476                                struct inode_record *rec)
2477 {
2478         char *dir_name = "lost+found";
2479         char namebuf[BTRFS_NAME_LEN] = {0};
2480         u64 lost_found_ino;
2481         u32 mode = 0700;
2482         u8 type = 0;
2483         int namelen = 0;
2484         int name_recovered = 0;
2485         int type_recovered = 0;
2486         int ret = 0;
2487
2488         /*
2489          * Get file name and type first before these invalid inode ref
2490          * are deleted by remove_all_invalid_backref()
2491          */
2492         name_recovered = !find_file_name(rec, namebuf, &namelen);
2493         type_recovered = !find_file_type(rec, &type);
2494
2495         if (!name_recovered) {
2496                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2497                        rec->ino, rec->ino);
2498                 namelen = count_digits(rec->ino);
2499                 sprintf(namebuf, "%llu", rec->ino);
2500                 name_recovered = 1;
2501         }
2502         if (!type_recovered) {
2503                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2504                        rec->ino);
2505                 type = BTRFS_FT_REG_FILE;
2506                 type_recovered = 1;
2507         }
2508
2509         ret = reset_nlink(trans, root, path, rec);
2510         if (ret < 0) {
2511                 fprintf(stderr,
2512                         "Failed to reset nlink for inode %llu: %s\n",
2513                         rec->ino, strerror(-ret));
2514                 goto out;
2515         }
2516
2517         if (rec->found_link == 0) {
2518                 lost_found_ino = root->highest_inode;
2519                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2520                         ret = -EOVERFLOW;
2521                         goto out;
2522                 }
2523                 lost_found_ino++;
2524                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2525                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2526                                   mode);
2527                 if (ret < 0) {
2528                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2529                                 dir_name, strerror(-ret));
2530                         goto out;
2531                 }
2532                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2533                                      namebuf, namelen, type, NULL, 1);
2534                 /*
2535                  * Add ".INO" suffix several times to handle case where
2536                  * "FILENAME.INO" is already taken by another file.
2537                  */
2538                 while (ret == -EEXIST) {
2539                         /*
2540                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2541                          */
2542                         if (namelen + count_digits(rec->ino) + 1 >
2543                             BTRFS_NAME_LEN) {
2544                                 ret = -EFBIG;
2545                                 goto out;
2546                         }
2547                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2548                                  ".%llu", rec->ino);
2549                         namelen += count_digits(rec->ino) + 1;
2550                         ret = btrfs_add_link(trans, root, rec->ino,
2551                                              lost_found_ino, namebuf,
2552                                              namelen, type, NULL, 1);
2553                 }
2554                 if (ret < 0) {
2555                         fprintf(stderr,
2556                                 "Failed to link the inode %llu to %s dir: %s\n",
2557                                 rec->ino, dir_name, strerror(-ret));
2558                         goto out;
2559                 }
2560                 /*
2561                  * Just increase the found_link, don't actually add the
2562                  * backref. This will make things easier and this inode
2563                  * record will be freed after the repair is done.
2564                  * So fsck will not report problem about this inode.
2565                  */
2566                 rec->found_link++;
2567                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2568                        namelen, namebuf, dir_name);
2569         }
2570         printf("Fixed the nlink of inode %llu\n", rec->ino);
2571 out:
2572         /*
2573          * Clear the flag anyway, or we will loop forever for the same inode
2574          * as it will not be removed from the bad inode list and the dead loop
2575          * happens.
2576          */
2577         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2578         btrfs_release_path(path);
2579         return ret;
2580 }
2581
2582 /*
2583  * Check if there is any normal(reg or prealloc) file extent for given
2584  * ino.
2585  * This is used to determine the file type when neither its dir_index/item or
2586  * inode_item exists.
2587  *
2588  * This will *NOT* report error, if any error happens, just consider it does
2589  * not have any normal file extent.
2590  */
2591 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2592 {
2593         struct btrfs_path *path;
2594         struct btrfs_key key;
2595         struct btrfs_key found_key;
2596         struct btrfs_file_extent_item *fi;
2597         u8 type;
2598         int ret = 0;
2599
2600         path = btrfs_alloc_path();
2601         if (!path)
2602                 goto out;
2603         key.objectid = ino;
2604         key.type = BTRFS_EXTENT_DATA_KEY;
2605         key.offset = 0;
2606
2607         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2608         if (ret < 0) {
2609                 ret = 0;
2610                 goto out;
2611         }
2612         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2613                 ret = btrfs_next_leaf(root, path);
2614                 if (ret) {
2615                         ret = 0;
2616                         goto out;
2617                 }
2618         }
2619         while (1) {
2620                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2621                                       path->slots[0]);
2622                 if (found_key.objectid != ino ||
2623                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2624                         break;
2625                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2626                                     struct btrfs_file_extent_item);
2627                 type = btrfs_file_extent_type(path->nodes[0], fi);
2628                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2629                         ret = 1;
2630                         goto out;
2631                 }
2632         }
2633 out:
2634         btrfs_free_path(path);
2635         return ret;
2636 }
2637
2638 static u32 btrfs_type_to_imode(u8 type)
2639 {
2640         static u32 imode_by_btrfs_type[] = {
2641                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2642                 [BTRFS_FT_DIR]          = S_IFDIR,
2643                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2644                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2645                 [BTRFS_FT_FIFO]         = S_IFIFO,
2646                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2647                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2648         };
2649
2650         return imode_by_btrfs_type[(type)];
2651 }
2652
2653 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2654                                 struct btrfs_root *root,
2655                                 struct btrfs_path *path,
2656                                 struct inode_record *rec)
2657 {
2658         u8 filetype;
2659         u32 mode = 0700;
2660         int type_recovered = 0;
2661         int ret = 0;
2662
2663         printf("Trying to rebuild inode:%llu\n", rec->ino);
2664
2665         type_recovered = !find_file_type(rec, &filetype);
2666
2667         /*
2668          * Try to determine inode type if type not found.
2669          *
2670          * For found regular file extent, it must be FILE.
2671          * For found dir_item/index, it must be DIR.
2672          *
2673          * For undetermined one, use FILE as fallback.
2674          *
2675          * TODO:
2676          * 1. If found backref(inode_index/item is already handled) to it,
2677          *    it must be DIR.
2678          *    Need new inode-inode ref structure to allow search for that.
2679          */
2680         if (!type_recovered) {
2681                 if (rec->found_file_extent &&
2682                     find_normal_file_extent(root, rec->ino)) {
2683                         type_recovered = 1;
2684                         filetype = BTRFS_FT_REG_FILE;
2685                 } else if (rec->found_dir_item) {
2686                         type_recovered = 1;
2687                         filetype = BTRFS_FT_DIR;
2688                 } else if (!list_empty(&rec->orphan_extents)) {
2689                         type_recovered = 1;
2690                         filetype = BTRFS_FT_REG_FILE;
2691                 } else{
2692                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2693                                rec->ino);
2694                         type_recovered = 1;
2695                         filetype = BTRFS_FT_REG_FILE;
2696                 }
2697         }
2698
2699         ret = btrfs_new_inode(trans, root, rec->ino,
2700                               mode | btrfs_type_to_imode(filetype));
2701         if (ret < 0)
2702                 goto out;
2703
2704         /*
2705          * Here inode rebuild is done, we only rebuild the inode item,
2706          * don't repair the nlink(like move to lost+found).
2707          * That is the job of nlink repair.
2708          *
2709          * We just fill the record and return
2710          */
2711         rec->found_dir_item = 1;
2712         rec->imode = mode | btrfs_type_to_imode(filetype);
2713         rec->nlink = 0;
2714         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2715         /* Ensure the inode_nlinks repair function will be called */
2716         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2717 out:
2718         return ret;
2719 }
2720
2721 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2722                                       struct btrfs_root *root,
2723                                       struct btrfs_path *path,
2724                                       struct inode_record *rec)
2725 {
2726         struct orphan_data_extent *orphan;
2727         struct orphan_data_extent *tmp;
2728         int ret = 0;
2729
2730         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2731                 /*
2732                  * Check for conflicting file extents
2733                  *
2734                  * Here we don't know whether the extents is compressed or not,
2735                  * so we can only assume it not compressed nor data offset,
2736                  * and use its disk_len as extent length.
2737                  */
2738                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2739                                        orphan->offset, orphan->disk_len, 0);
2740                 btrfs_release_path(path);
2741                 if (ret < 0)
2742                         goto out;
2743                 if (!ret) {
2744                         fprintf(stderr,
2745                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2746                                 orphan->disk_bytenr, orphan->disk_len);
2747                         ret = btrfs_free_extent(trans,
2748                                         root->fs_info->extent_root,
2749                                         orphan->disk_bytenr, orphan->disk_len,
2750                                         0, root->objectid, orphan->objectid,
2751                                         orphan->offset);
2752                         if (ret < 0)
2753                                 goto out;
2754                 }
2755                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2756                                 orphan->offset, orphan->disk_bytenr,
2757                                 orphan->disk_len, orphan->disk_len);
2758                 if (ret < 0)
2759                         goto out;
2760
2761                 /* Update file size info */
2762                 rec->found_size += orphan->disk_len;
2763                 if (rec->found_size == rec->nbytes)
2764                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2765
2766                 /* Update the file extent hole info too */
2767                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2768                                            orphan->disk_len);
2769                 if (ret < 0)
2770                         goto out;
2771                 if (RB_EMPTY_ROOT(&rec->holes))
2772                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2773
2774                 list_del(&orphan->list);
2775                 free(orphan);
2776         }
2777         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2778 out:
2779         return ret;
2780 }
2781
2782 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2783                                         struct btrfs_root *root,
2784                                         struct btrfs_path *path,
2785                                         struct inode_record *rec)
2786 {
2787         struct rb_node *node;
2788         struct file_extent_hole *hole;
2789         int found = 0;
2790         int ret = 0;
2791
2792         node = rb_first(&rec->holes);
2793
2794         while (node) {
2795                 found = 1;
2796                 hole = rb_entry(node, struct file_extent_hole, node);
2797                 ret = btrfs_punch_hole(trans, root, rec->ino,
2798                                        hole->start, hole->len);
2799                 if (ret < 0)
2800                         goto out;
2801                 ret = del_file_extent_hole(&rec->holes, hole->start,
2802                                            hole->len);
2803                 if (ret < 0)
2804                         goto out;
2805                 if (RB_EMPTY_ROOT(&rec->holes))
2806                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2807                 node = rb_first(&rec->holes);
2808         }
2809         /* special case for a file losing all its file extent */
2810         if (!found) {
2811                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2812                                        round_up(rec->isize, root->sectorsize));
2813                 if (ret < 0)
2814                         goto out;
2815         }
2816         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2817                rec->ino, root->objectid);
2818 out:
2819         return ret;
2820 }
2821
2822 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2823 {
2824         struct btrfs_trans_handle *trans;
2825         struct btrfs_path *path;
2826         int ret = 0;
2827
2828         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2829                              I_ERR_NO_ORPHAN_ITEM |
2830                              I_ERR_LINK_COUNT_WRONG |
2831                              I_ERR_NO_INODE_ITEM |
2832                              I_ERR_FILE_EXTENT_ORPHAN |
2833                              I_ERR_FILE_EXTENT_DISCOUNT|
2834                              I_ERR_FILE_NBYTES_WRONG)))
2835                 return rec->errors;
2836
2837         path = btrfs_alloc_path();
2838         if (!path)
2839                 return -ENOMEM;
2840
2841         /*
2842          * For nlink repair, it may create a dir and add link, so
2843          * 2 for parent(256)'s dir_index and dir_item
2844          * 2 for lost+found dir's inode_item and inode_ref
2845          * 1 for the new inode_ref of the file
2846          * 2 for lost+found dir's dir_index and dir_item for the file
2847          */
2848         trans = btrfs_start_transaction(root, 7);
2849         if (IS_ERR(trans)) {
2850                 btrfs_free_path(path);
2851                 return PTR_ERR(trans);
2852         }
2853
2854         if (rec->errors & I_ERR_NO_INODE_ITEM)
2855                 ret = repair_inode_no_item(trans, root, path, rec);
2856         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2857                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2858         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2859                 ret = repair_inode_discount_extent(trans, root, path, rec);
2860         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2861                 ret = repair_inode_isize(trans, root, path, rec);
2862         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2863                 ret = repair_inode_orphan_item(trans, root, path, rec);
2864         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2865                 ret = repair_inode_nlinks(trans, root, path, rec);
2866         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2867                 ret = repair_inode_nbytes(trans, root, path, rec);
2868         btrfs_commit_transaction(trans, root);
2869         btrfs_free_path(path);
2870         return ret;
2871 }
2872
2873 static int check_inode_recs(struct btrfs_root *root,
2874                             struct cache_tree *inode_cache)
2875 {
2876         struct cache_extent *cache;
2877         struct ptr_node *node;
2878         struct inode_record *rec;
2879         struct inode_backref *backref;
2880         int stage = 0;
2881         int ret = 0;
2882         int err = 0;
2883         u64 error = 0;
2884         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2885
2886         if (btrfs_root_refs(&root->root_item) == 0) {
2887                 if (!cache_tree_empty(inode_cache))
2888                         fprintf(stderr, "warning line %d\n", __LINE__);
2889                 return 0;
2890         }
2891
2892         /*
2893          * We need to record the highest inode number for later 'lost+found'
2894          * dir creation.
2895          * We must select a ino not used/refered by any existing inode, or
2896          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2897          * this may cause 'lost+found' dir has wrong nlinks.
2898          */
2899         cache = last_cache_extent(inode_cache);
2900         if (cache) {
2901                 node = container_of(cache, struct ptr_node, cache);
2902                 rec = node->data;
2903                 if (rec->ino > root->highest_inode)
2904                         root->highest_inode = rec->ino;
2905         }
2906
2907         /*
2908          * We need to repair backrefs first because we could change some of the
2909          * errors in the inode recs.
2910          *
2911          * We also need to go through and delete invalid backrefs first and then
2912          * add the correct ones second.  We do this because we may get EEXIST
2913          * when adding back the correct index because we hadn't yet deleted the
2914          * invalid index.
2915          *
2916          * For example, if we were missing a dir index then the directories
2917          * isize would be wrong, so if we fixed the isize to what we thought it
2918          * would be and then fixed the backref we'd still have a invalid fs, so
2919          * we need to add back the dir index and then check to see if the isize
2920          * is still wrong.
2921          */
2922         while (stage < 3) {
2923                 stage++;
2924                 if (stage == 3 && !err)
2925                         break;
2926
2927                 cache = search_cache_extent(inode_cache, 0);
2928                 while (repair && cache) {
2929                         node = container_of(cache, struct ptr_node, cache);
2930                         rec = node->data;
2931                         cache = next_cache_extent(cache);
2932
2933                         /* Need to free everything up and rescan */
2934                         if (stage == 3) {
2935                                 remove_cache_extent(inode_cache, &node->cache);
2936                                 free(node);
2937                                 free_inode_rec(rec);
2938                                 continue;
2939                         }
2940
2941                         if (list_empty(&rec->backrefs))
2942                                 continue;
2943
2944                         ret = repair_inode_backrefs(root, rec, inode_cache,
2945                                                     stage == 1);
2946                         if (ret < 0) {
2947                                 err = ret;
2948                                 stage = 2;
2949                                 break;
2950                         } if (ret > 0) {
2951                                 err = -EAGAIN;
2952                         }
2953                 }
2954         }
2955         if (err)
2956                 return err;
2957
2958         rec = get_inode_rec(inode_cache, root_dirid, 0);
2959         BUG_ON(IS_ERR(rec));
2960         if (rec) {
2961                 ret = check_root_dir(rec);
2962                 if (ret) {
2963                         fprintf(stderr, "root %llu root dir %llu error\n",
2964                                 (unsigned long long)root->root_key.objectid,
2965                                 (unsigned long long)root_dirid);
2966                         print_inode_error(root, rec);
2967                         error++;
2968                 }
2969         } else {
2970                 if (repair) {
2971                         struct btrfs_trans_handle *trans;
2972
2973                         trans = btrfs_start_transaction(root, 1);
2974                         if (IS_ERR(trans)) {
2975                                 err = PTR_ERR(trans);
2976                                 return err;
2977                         }
2978
2979                         fprintf(stderr,
2980                                 "root %llu missing its root dir, recreating\n",
2981                                 (unsigned long long)root->objectid);
2982
2983                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2984                         BUG_ON(ret);
2985
2986                         btrfs_commit_transaction(trans, root);
2987                         return -EAGAIN;
2988                 }
2989
2990                 fprintf(stderr, "root %llu root dir %llu not found\n",
2991                         (unsigned long long)root->root_key.objectid,
2992                         (unsigned long long)root_dirid);
2993         }
2994
2995         while (1) {
2996                 cache = search_cache_extent(inode_cache, 0);
2997                 if (!cache)
2998                         break;
2999                 node = container_of(cache, struct ptr_node, cache);
3000                 rec = node->data;
3001                 remove_cache_extent(inode_cache, &node->cache);
3002                 free(node);
3003                 if (rec->ino == root_dirid ||
3004                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3005                         free_inode_rec(rec);
3006                         continue;
3007                 }
3008
3009                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3010                         ret = check_orphan_item(root, rec->ino);
3011                         if (ret == 0)
3012                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3013                         if (can_free_inode_rec(rec)) {
3014                                 free_inode_rec(rec);
3015                                 continue;
3016                         }
3017                 }
3018
3019                 if (!rec->found_inode_item)
3020                         rec->errors |= I_ERR_NO_INODE_ITEM;
3021                 if (rec->found_link != rec->nlink)
3022                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3023                 if (repair) {
3024                         ret = try_repair_inode(root, rec);
3025                         if (ret == 0 && can_free_inode_rec(rec)) {
3026                                 free_inode_rec(rec);
3027                                 continue;
3028                         }
3029                         ret = 0;
3030                 }
3031
3032                 if (!(repair && ret == 0))
3033                         error++;
3034                 print_inode_error(root, rec);
3035                 list_for_each_entry(backref, &rec->backrefs, list) {
3036                         if (!backref->found_dir_item)
3037                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3038                         if (!backref->found_dir_index)
3039                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3040                         if (!backref->found_inode_ref)
3041                                 backref->errors |= REF_ERR_NO_INODE_REF;
3042                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3043                                 " namelen %u name %s filetype %d errors %x",
3044                                 (unsigned long long)backref->dir,
3045                                 (unsigned long long)backref->index,
3046                                 backref->namelen, backref->name,
3047                                 backref->filetype, backref->errors);
3048                         print_ref_error(backref->errors);
3049                 }
3050                 free_inode_rec(rec);
3051         }
3052         return (error > 0) ? -1 : 0;
3053 }
3054
3055 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3056                                         u64 objectid)
3057 {
3058         struct cache_extent *cache;
3059         struct root_record *rec = NULL;
3060         int ret;
3061
3062         cache = lookup_cache_extent(root_cache, objectid, 1);
3063         if (cache) {
3064                 rec = container_of(cache, struct root_record, cache);
3065         } else {
3066                 rec = calloc(1, sizeof(*rec));
3067                 if (!rec)
3068                         return ERR_PTR(-ENOMEM);
3069                 rec->objectid = objectid;
3070                 INIT_LIST_HEAD(&rec->backrefs);
3071                 rec->cache.start = objectid;
3072                 rec->cache.size = 1;
3073
3074                 ret = insert_cache_extent(root_cache, &rec->cache);
3075                 if (ret)
3076                         return ERR_PTR(-EEXIST);
3077         }
3078         return rec;
3079 }
3080
3081 static struct root_backref *get_root_backref(struct root_record *rec,
3082                                              u64 ref_root, u64 dir, u64 index,
3083                                              const char *name, int namelen)
3084 {
3085         struct root_backref *backref;
3086
3087         list_for_each_entry(backref, &rec->backrefs, list) {
3088                 if (backref->ref_root != ref_root || backref->dir != dir ||
3089                     backref->namelen != namelen)
3090                         continue;
3091                 if (memcmp(name, backref->name, namelen))
3092                         continue;
3093                 return backref;
3094         }
3095
3096         backref = calloc(1, sizeof(*backref) + namelen + 1);
3097         if (!backref)
3098                 return NULL;
3099         backref->ref_root = ref_root;
3100         backref->dir = dir;
3101         backref->index = index;
3102         backref->namelen = namelen;
3103         memcpy(backref->name, name, namelen);
3104         backref->name[namelen] = '\0';
3105         list_add_tail(&backref->list, &rec->backrefs);
3106         return backref;
3107 }
3108
3109 static void free_root_record(struct cache_extent *cache)
3110 {
3111         struct root_record *rec;
3112         struct root_backref *backref;
3113
3114         rec = container_of(cache, struct root_record, cache);
3115         while (!list_empty(&rec->backrefs)) {
3116                 backref = list_entry(rec->backrefs.next,
3117                                      struct root_backref, list);
3118                 list_del(&backref->list);
3119                 free(backref);
3120         }
3121
3122         kfree(rec);
3123 }
3124
3125 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3126
3127 static int add_root_backref(struct cache_tree *root_cache,
3128                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3129                             const char *name, int namelen,
3130                             int item_type, int errors)
3131 {
3132         struct root_record *rec;
3133         struct root_backref *backref;
3134
3135         rec = get_root_rec(root_cache, root_id);
3136         BUG_ON(IS_ERR(rec));
3137         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3138         BUG_ON(!backref);
3139
3140         backref->errors |= errors;
3141
3142         if (item_type != BTRFS_DIR_ITEM_KEY) {
3143                 if (backref->found_dir_index || backref->found_back_ref ||
3144                     backref->found_forward_ref) {
3145                         if (backref->index != index)
3146                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3147                 } else {
3148                         backref->index = index;
3149                 }
3150         }
3151
3152         if (item_type == BTRFS_DIR_ITEM_KEY) {
3153                 if (backref->found_forward_ref)
3154                         rec->found_ref++;
3155                 backref->found_dir_item = 1;
3156         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3157                 backref->found_dir_index = 1;
3158         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3159                 if (backref->found_forward_ref)
3160                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3161                 else if (backref->found_dir_item)
3162                         rec->found_ref++;
3163                 backref->found_forward_ref = 1;
3164         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3165                 if (backref->found_back_ref)
3166                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3167                 backref->found_back_ref = 1;
3168         } else {
3169                 BUG_ON(1);
3170         }
3171
3172         if (backref->found_forward_ref && backref->found_dir_item)
3173                 backref->reachable = 1;
3174         return 0;
3175 }
3176
3177 static int merge_root_recs(struct btrfs_root *root,
3178                            struct cache_tree *src_cache,
3179                            struct cache_tree *dst_cache)
3180 {
3181         struct cache_extent *cache;
3182         struct ptr_node *node;
3183         struct inode_record *rec;
3184         struct inode_backref *backref;
3185         int ret = 0;
3186
3187         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3188                 free_inode_recs_tree(src_cache);
3189                 return 0;
3190         }
3191
3192         while (1) {
3193                 cache = search_cache_extent(src_cache, 0);
3194                 if (!cache)
3195                         break;
3196                 node = container_of(cache, struct ptr_node, cache);
3197                 rec = node->data;
3198                 remove_cache_extent(src_cache, &node->cache);
3199                 free(node);
3200
3201                 ret = is_child_root(root, root->objectid, rec->ino);
3202                 if (ret < 0)
3203                         break;
3204                 else if (ret == 0)
3205                         goto skip;
3206
3207                 list_for_each_entry(backref, &rec->backrefs, list) {
3208                         BUG_ON(backref->found_inode_ref);
3209                         if (backref->found_dir_item)
3210                                 add_root_backref(dst_cache, rec->ino,
3211                                         root->root_key.objectid, backref->dir,
3212                                         backref->index, backref->name,
3213                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3214                                         backref->errors);
3215                         if (backref->found_dir_index)
3216                                 add_root_backref(dst_cache, rec->ino,
3217                                         root->root_key.objectid, backref->dir,
3218                                         backref->index, backref->name,
3219                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3220                                         backref->errors);
3221                 }
3222 skip:
3223                 free_inode_rec(rec);
3224         }
3225         if (ret < 0)
3226                 return ret;
3227         return 0;
3228 }
3229
3230 static int check_root_refs(struct btrfs_root *root,
3231                            struct cache_tree *root_cache)
3232 {
3233         struct root_record *rec;
3234         struct root_record *ref_root;
3235         struct root_backref *backref;
3236         struct cache_extent *cache;
3237         int loop = 1;
3238         int ret;
3239         int error;
3240         int errors = 0;
3241
3242         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3243         BUG_ON(IS_ERR(rec));
3244         rec->found_ref = 1;
3245
3246         /* fixme: this can not detect circular references */
3247         while (loop) {
3248                 loop = 0;
3249                 cache = search_cache_extent(root_cache, 0);
3250                 while (1) {
3251                         if (!cache)
3252                                 break;
3253                         rec = container_of(cache, struct root_record, cache);
3254                         cache = next_cache_extent(cache);
3255
3256                         if (rec->found_ref == 0)
3257                                 continue;
3258
3259                         list_for_each_entry(backref, &rec->backrefs, list) {
3260                                 if (!backref->reachable)
3261                                         continue;
3262
3263                                 ref_root = get_root_rec(root_cache,
3264                                                         backref->ref_root);
3265                                 BUG_ON(IS_ERR(ref_root));
3266                                 if (ref_root->found_ref > 0)
3267                                         continue;
3268
3269                                 backref->reachable = 0;
3270                                 rec->found_ref--;
3271                                 if (rec->found_ref == 0)
3272                                         loop = 1;
3273                         }
3274                 }
3275         }
3276
3277         cache = search_cache_extent(root_cache, 0);
3278         while (1) {
3279                 if (!cache)
3280                         break;
3281                 rec = container_of(cache, struct root_record, cache);
3282                 cache = next_cache_extent(cache);
3283
3284                 if (rec->found_ref == 0 &&
3285                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3286                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3287                         ret = check_orphan_item(root->fs_info->tree_root,
3288                                                 rec->objectid);
3289                         if (ret == 0)
3290                                 continue;
3291
3292                         /*
3293                          * If we don't have a root item then we likely just have
3294                          * a dir item in a snapshot for this root but no actual
3295                          * ref key or anything so it's meaningless.
3296                          */
3297                         if (!rec->found_root_item)
3298                                 continue;
3299                         errors++;
3300                         fprintf(stderr, "fs tree %llu not referenced\n",
3301                                 (unsigned long long)rec->objectid);
3302                 }
3303
3304                 error = 0;
3305                 if (rec->found_ref > 0 && !rec->found_root_item)
3306                         error = 1;
3307                 list_for_each_entry(backref, &rec->backrefs, list) {
3308                         if (!backref->found_dir_item)
3309                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3310                         if (!backref->found_dir_index)
3311                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3312                         if (!backref->found_back_ref)
3313                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3314                         if (!backref->found_forward_ref)
3315                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3316                         if (backref->reachable && backref->errors)
3317                                 error = 1;
3318                 }
3319                 if (!error)
3320                         continue;
3321
3322                 errors++;
3323                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3324                         (unsigned long long)rec->objectid, rec->found_ref,
3325                          rec->found_root_item ? "" : "not found");
3326
3327                 list_for_each_entry(backref, &rec->backrefs, list) {
3328                         if (!backref->reachable)
3329                                 continue;
3330                         if (!backref->errors && rec->found_root_item)
3331                                 continue;
3332                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3333                                 " index %llu namelen %u name %s errors %x\n",
3334                                 (unsigned long long)backref->ref_root,
3335                                 (unsigned long long)backref->dir,
3336                                 (unsigned long long)backref->index,
3337                                 backref->namelen, backref->name,
3338                                 backref->errors);
3339                         print_ref_error(backref->errors);
3340                 }
3341         }
3342         return errors > 0 ? 1 : 0;
3343 }
3344
3345 static int process_root_ref(struct extent_buffer *eb, int slot,
3346                             struct btrfs_key *key,
3347                             struct cache_tree *root_cache)
3348 {
3349         u64 dirid;
3350         u64 index;
3351         u32 len;
3352         u32 name_len;
3353         struct btrfs_root_ref *ref;
3354         char namebuf[BTRFS_NAME_LEN];
3355         int error;
3356
3357         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3358
3359         dirid = btrfs_root_ref_dirid(eb, ref);
3360         index = btrfs_root_ref_sequence(eb, ref);
3361         name_len = btrfs_root_ref_name_len(eb, ref);
3362
3363         if (name_len <= BTRFS_NAME_LEN) {
3364                 len = name_len;
3365                 error = 0;
3366         } else {
3367                 len = BTRFS_NAME_LEN;
3368                 error = REF_ERR_NAME_TOO_LONG;
3369         }
3370         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3371
3372         if (key->type == BTRFS_ROOT_REF_KEY) {
3373                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3374                                  index, namebuf, len, key->type, error);
3375         } else {
3376                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3377                                  index, namebuf, len, key->type, error);
3378         }
3379         return 0;
3380 }
3381
3382 static void free_corrupt_block(struct cache_extent *cache)
3383 {
3384         struct btrfs_corrupt_block *corrupt;
3385
3386         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3387         free(corrupt);
3388 }
3389
3390 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3391
3392 /*
3393  * Repair the btree of the given root.
3394  *
3395  * The fix is to remove the node key in corrupt_blocks cache_tree.
3396  * and rebalance the tree.
3397  * After the fix, the btree should be writeable.
3398  */
3399 static int repair_btree(struct btrfs_root *root,
3400                         struct cache_tree *corrupt_blocks)
3401 {
3402         struct btrfs_trans_handle *trans;
3403         struct btrfs_path *path;
3404         struct btrfs_corrupt_block *corrupt;
3405         struct cache_extent *cache;
3406         struct btrfs_key key;
3407         u64 offset;
3408         int level;
3409         int ret = 0;
3410
3411         if (cache_tree_empty(corrupt_blocks))
3412                 return 0;
3413
3414         path = btrfs_alloc_path();
3415         if (!path)
3416                 return -ENOMEM;
3417
3418         trans = btrfs_start_transaction(root, 1);
3419         if (IS_ERR(trans)) {
3420                 ret = PTR_ERR(trans);
3421                 fprintf(stderr, "Error starting transaction: %s\n",
3422                         strerror(-ret));
3423                 goto out_free_path;
3424         }
3425         cache = first_cache_extent(corrupt_blocks);
3426         while (cache) {
3427                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3428                                        cache);
3429                 level = corrupt->level;
3430                 path->lowest_level = level;
3431                 key.objectid = corrupt->key.objectid;
3432                 key.type = corrupt->key.type;
3433                 key.offset = corrupt->key.offset;
3434
3435                 /*
3436                  * Here we don't want to do any tree balance, since it may
3437                  * cause a balance with corrupted brother leaf/node,
3438                  * so ins_len set to 0 here.
3439                  * Balance will be done after all corrupt node/leaf is deleted.
3440                  */
3441                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3442                 if (ret < 0)
3443                         goto out;
3444                 offset = btrfs_node_blockptr(path->nodes[level],
3445                                              path->slots[level]);
3446
3447                 /* Remove the ptr */
3448                 ret = btrfs_del_ptr(trans, root, path, level,
3449                                     path->slots[level]);
3450                 if (ret < 0)
3451                         goto out;
3452                 /*
3453                  * Remove the corresponding extent
3454                  * return value is not concerned.
3455                  */
3456                 btrfs_release_path(path);
3457                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3458                                         0, root->root_key.objectid,
3459                                         level - 1, 0);
3460                 cache = next_cache_extent(cache);
3461         }
3462
3463         /* Balance the btree using btrfs_search_slot() */
3464         cache = first_cache_extent(corrupt_blocks);
3465         while (cache) {
3466                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3467                                        cache);
3468                 memcpy(&key, &corrupt->key, sizeof(key));
3469                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3470                 if (ret < 0)
3471                         goto out;
3472                 /* return will always >0 since it won't find the item */
3473                 ret = 0;
3474                 btrfs_release_path(path);
3475                 cache = next_cache_extent(cache);
3476         }
3477 out:
3478         btrfs_commit_transaction(trans, root);
3479 out_free_path:
3480         btrfs_free_path(path);
3481         return ret;
3482 }
3483
3484 static int check_fs_root(struct btrfs_root *root,
3485                          struct cache_tree *root_cache,
3486                          struct walk_control *wc)
3487 {
3488         int ret = 0;
3489         int err = 0;
3490         int wret;
3491         int level;
3492         struct btrfs_path path;
3493         struct shared_node root_node;
3494         struct root_record *rec;
3495         struct btrfs_root_item *root_item = &root->root_item;
3496         struct cache_tree corrupt_blocks;
3497         struct orphan_data_extent *orphan;
3498         struct orphan_data_extent *tmp;
3499         enum btrfs_tree_block_status status;
3500
3501         /*
3502          * Reuse the corrupt_block cache tree to record corrupted tree block
3503          *
3504          * Unlike the usage in extent tree check, here we do it in a per
3505          * fs/subvol tree base.
3506          */
3507         cache_tree_init(&corrupt_blocks);
3508         root->fs_info->corrupt_blocks = &corrupt_blocks;
3509
3510         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3511                 rec = get_root_rec(root_cache, root->root_key.objectid);
3512                 BUG_ON(IS_ERR(rec));
3513                 if (btrfs_root_refs(root_item) > 0)
3514                         rec->found_root_item = 1;
3515         }
3516
3517         btrfs_init_path(&path);
3518         memset(&root_node, 0, sizeof(root_node));
3519         cache_tree_init(&root_node.root_cache);
3520         cache_tree_init(&root_node.inode_cache);
3521
3522         /* Move the orphan extent record to corresponding inode_record */
3523         list_for_each_entry_safe(orphan, tmp,
3524                                  &root->orphan_data_extents, list) {
3525                 struct inode_record *inode;
3526
3527                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3528                                       1);
3529                 BUG_ON(IS_ERR(inode));
3530                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3531                 list_move(&orphan->list, &inode->orphan_extents);
3532         }
3533
3534         level = btrfs_header_level(root->node);
3535         memset(wc->nodes, 0, sizeof(wc->nodes));
3536         wc->nodes[level] = &root_node;
3537         wc->active_node = level;
3538         wc->root_level = level;
3539
3540         /* We may not have checked the root block, lets do that now */
3541         if (btrfs_is_leaf(root->node))
3542                 status = btrfs_check_leaf(root, NULL, root->node);
3543         else
3544                 status = btrfs_check_node(root, NULL, root->node);
3545         if (status != BTRFS_TREE_BLOCK_CLEAN)
3546                 return -EIO;
3547
3548         if (btrfs_root_refs(root_item) > 0 ||
3549             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3550                 path.nodes[level] = root->node;
3551                 extent_buffer_get(root->node);
3552                 path.slots[level] = 0;
3553         } else {
3554                 struct btrfs_key key;
3555                 struct btrfs_disk_key found_key;
3556
3557                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3558                 level = root_item->drop_level;
3559                 path.lowest_level = level;
3560                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3561                 if (wret < 0)
3562                         goto skip_walking;
3563                 btrfs_node_key(path.nodes[level], &found_key,
3564                                 path.slots[level]);
3565                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3566                                         sizeof(found_key)));
3567         }
3568
3569         while (1) {
3570                 wret = walk_down_tree(root, &path, wc, &level);
3571                 if (wret < 0)
3572                         ret = wret;
3573                 if (wret != 0)
3574                         break;
3575
3576                 wret = walk_up_tree(root, &path, wc, &level);
3577                 if (wret < 0)
3578                         ret = wret;
3579                 if (wret != 0)
3580                         break;
3581         }
3582 skip_walking:
3583         btrfs_release_path(&path);
3584
3585         if (!cache_tree_empty(&corrupt_blocks)) {
3586                 struct cache_extent *cache;
3587                 struct btrfs_corrupt_block *corrupt;
3588
3589                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3590                        root->root_key.objectid);
3591                 cache = first_cache_extent(&corrupt_blocks);
3592                 while (cache) {
3593                         corrupt = container_of(cache,
3594                                                struct btrfs_corrupt_block,
3595                                                cache);
3596                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3597                                cache->start, corrupt->level,
3598                                corrupt->key.objectid, corrupt->key.type,
3599                                corrupt->key.offset);
3600                         cache = next_cache_extent(cache);
3601                 }
3602                 if (repair) {
3603                         printf("Try to repair the btree for root %llu\n",
3604                                root->root_key.objectid);
3605                         ret = repair_btree(root, &corrupt_blocks);
3606                         if (ret < 0)
3607                                 fprintf(stderr, "Failed to repair btree: %s\n",
3608                                         strerror(-ret));
3609                         if (!ret)
3610                                 printf("Btree for root %llu is fixed\n",
3611                                        root->root_key.objectid);
3612                 }
3613         }
3614
3615         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3616         if (err < 0)
3617                 ret = err;
3618
3619         if (root_node.current) {
3620                 root_node.current->checked = 1;
3621                 maybe_free_inode_rec(&root_node.inode_cache,
3622                                 root_node.current);
3623         }
3624
3625         err = check_inode_recs(root, &root_node.inode_cache);
3626         if (!ret)
3627                 ret = err;
3628
3629         free_corrupt_blocks_tree(&corrupt_blocks);
3630         root->fs_info->corrupt_blocks = NULL;
3631         free_orphan_data_extents(&root->orphan_data_extents);
3632         return ret;
3633 }
3634
3635 static int fs_root_objectid(u64 objectid)
3636 {
3637         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3638             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3639                 return 1;
3640         return is_fstree(objectid);
3641 }
3642
3643 static int check_fs_roots(struct btrfs_root *root,
3644                           struct cache_tree *root_cache)
3645 {
3646         struct btrfs_path path;
3647         struct btrfs_key key;
3648         struct walk_control wc;
3649         struct extent_buffer *leaf, *tree_node;
3650         struct btrfs_root *tmp_root;
3651         struct btrfs_root *tree_root = root->fs_info->tree_root;
3652         int ret;
3653         int err = 0;
3654
3655         if (ctx.progress_enabled) {
3656                 ctx.tp = TASK_FS_ROOTS;
3657                 task_start(ctx.info);
3658         }
3659
3660         /*
3661          * Just in case we made any changes to the extent tree that weren't
3662          * reflected into the free space cache yet.
3663          */
3664         if (repair)
3665                 reset_cached_block_groups(root->fs_info);
3666         memset(&wc, 0, sizeof(wc));
3667         cache_tree_init(&wc.shared);
3668         btrfs_init_path(&path);
3669
3670 again:
3671         key.offset = 0;
3672         key.objectid = 0;
3673         key.type = BTRFS_ROOT_ITEM_KEY;
3674         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3675         if (ret < 0) {
3676                 err = 1;
3677                 goto out;
3678         }
3679         tree_node = tree_root->node;
3680         while (1) {
3681                 if (tree_node != tree_root->node) {
3682                         free_root_recs_tree(root_cache);
3683                         btrfs_release_path(&path);
3684                         goto again;
3685                 }
3686                 leaf = path.nodes[0];
3687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3688                         ret = btrfs_next_leaf(tree_root, &path);
3689                         if (ret) {
3690                                 if (ret < 0)
3691                                         err = 1;
3692                                 break;
3693                         }
3694                         leaf = path.nodes[0];
3695                 }
3696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3697                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3698                     fs_root_objectid(key.objectid)) {
3699                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3700                                 tmp_root = btrfs_read_fs_root_no_cache(
3701                                                 root->fs_info, &key);
3702                         } else {
3703                                 key.offset = (u64)-1;
3704                                 tmp_root = btrfs_read_fs_root(
3705                                                 root->fs_info, &key);
3706                         }
3707                         if (IS_ERR(tmp_root)) {
3708                                 err = 1;
3709                                 goto next;
3710                         }
3711                         ret = check_fs_root(tmp_root, root_cache, &wc);
3712                         if (ret == -EAGAIN) {
3713                                 free_root_recs_tree(root_cache);
3714                                 btrfs_release_path(&path);
3715                                 goto again;
3716                         }
3717                         if (ret)
3718                                 err = 1;
3719                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3720                                 btrfs_free_fs_root(tmp_root);
3721                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3722                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3723                         process_root_ref(leaf, path.slots[0], &key,
3724                                          root_cache);
3725                 }
3726 next:
3727                 path.slots[0]++;
3728         }
3729 out:
3730         btrfs_release_path(&path);
3731         if (err)
3732                 free_extent_cache_tree(&wc.shared);
3733         if (!cache_tree_empty(&wc.shared))
3734                 fprintf(stderr, "warning line %d\n", __LINE__);
3735
3736         task_stop(ctx.info);
3737
3738         return err;
3739 }
3740
3741 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3742 {
3743         struct list_head *cur = rec->backrefs.next;
3744         struct extent_backref *back;
3745         struct tree_backref *tback;
3746         struct data_backref *dback;
3747         u64 found = 0;
3748         int err = 0;
3749
3750         while(cur != &rec->backrefs) {
3751                 back = list_entry(cur, struct extent_backref, list);
3752                 cur = cur->next;
3753                 if (!back->found_extent_tree) {
3754                         err = 1;
3755                         if (!print_errs)
3756                                 goto out;
3757                         if (back->is_data) {
3758                                 dback = (struct data_backref *)back;
3759                                 fprintf(stderr, "Backref %llu %s %llu"
3760                                         " owner %llu offset %llu num_refs %lu"
3761                                         " not found in extent tree\n",
3762                                         (unsigned long long)rec->start,
3763                                         back->full_backref ?
3764                                         "parent" : "root",
3765                                         back->full_backref ?
3766                                         (unsigned long long)dback->parent:
3767                                         (unsigned long long)dback->root,
3768                                         (unsigned long long)dback->owner,
3769                                         (unsigned long long)dback->offset,
3770                                         (unsigned long)dback->num_refs);
3771                         } else {
3772                                 tback = (struct tree_backref *)back;
3773                                 fprintf(stderr, "Backref %llu parent %llu"
3774                                         " root %llu not found in extent tree\n",
3775                                         (unsigned long long)rec->start,
3776                                         (unsigned long long)tback->parent,
3777                                         (unsigned long long)tback->root);
3778                         }
3779                 }
3780                 if (!back->is_data && !back->found_ref) {
3781                         err = 1;
3782                         if (!print_errs)
3783                                 goto out;
3784                         tback = (struct tree_backref *)back;
3785                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3786                                 (unsigned long long)rec->start,
3787                                 back->full_backref ? "parent" : "root",
3788                                 back->full_backref ?
3789                                 (unsigned long long)tback->parent :
3790                                 (unsigned long long)tback->root, back);
3791                 }
3792                 if (back->is_data) {
3793                         dback = (struct data_backref *)back;
3794                         if (dback->found_ref != dback->num_refs) {
3795                                 err = 1;
3796                                 if (!print_errs)
3797                                         goto out;
3798                                 fprintf(stderr, "Incorrect local backref count"
3799                                         " on %llu %s %llu owner %llu"
3800                                         " offset %llu found %u wanted %u back %p\n",
3801                                         (unsigned long long)rec->start,
3802                                         back->full_backref ?
3803                                         "parent" : "root",
3804                                         back->full_backref ?
3805                                         (unsigned long long)dback->parent:
3806                                         (unsigned long long)dback->root,
3807                                         (unsigned long long)dback->owner,
3808                                         (unsigned long long)dback->offset,
3809                                         dback->found_ref, dback->num_refs, back);
3810                         }
3811                         if (dback->disk_bytenr != rec->start) {
3812                                 err = 1;
3813                                 if (!print_errs)
3814                                         goto out;
3815                                 fprintf(stderr, "Backref disk bytenr does not"
3816                                         " match extent record, bytenr=%llu, "
3817                                         "ref bytenr=%llu\n",
3818                                         (unsigned long long)rec->start,
3819                                         (unsigned long long)dback->disk_bytenr);
3820                         }
3821
3822                         if (dback->bytes != rec->nr) {
3823                                 err = 1;
3824                                 if (!print_errs)
3825                                         goto out;
3826                                 fprintf(stderr, "Backref bytes do not match "
3827                                         "extent backref, bytenr=%llu, ref "
3828                                         "bytes=%llu, backref bytes=%llu\n",
3829                                         (unsigned long long)rec->start,
3830                                         (unsigned long long)rec->nr,
3831                                         (unsigned long long)dback->bytes);
3832                         }
3833                 }
3834                 if (!back->is_data) {
3835                         found += 1;
3836                 } else {
3837                         dback = (struct data_backref *)back;
3838                         found += dback->found_ref;
3839                 }
3840         }
3841         if (found != rec->refs) {
3842                 err = 1;
3843                 if (!print_errs)
3844                         goto out;
3845                 fprintf(stderr, "Incorrect global backref count "
3846                         "on %llu found %llu wanted %llu\n",
3847                         (unsigned long long)rec->start,
3848                         (unsigned long long)found,
3849                         (unsigned long long)rec->refs);
3850         }
3851 out:
3852         return err;
3853 }
3854
3855 static int free_all_extent_backrefs(struct extent_record *rec)
3856 {
3857         struct extent_backref *back;
3858         struct list_head *cur;
3859         while (!list_empty(&rec->backrefs)) {
3860                 cur = rec->backrefs.next;
3861                 back = list_entry(cur, struct extent_backref, list);
3862                 list_del(cur);
3863                 free(back);
3864         }
3865         return 0;
3866 }
3867
3868 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3869                                      struct cache_tree *extent_cache)
3870 {
3871         struct cache_extent *cache;
3872         struct extent_record *rec;
3873
3874         while (1) {
3875                 cache = first_cache_extent(extent_cache);
3876                 if (!cache)
3877                         break;
3878                 rec = container_of(cache, struct extent_record, cache);
3879                 remove_cache_extent(extent_cache, cache);
3880                 free_all_extent_backrefs(rec);
3881                 free(rec);
3882         }
3883 }
3884
3885 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3886                                  struct extent_record *rec)
3887 {
3888         if (rec->content_checked && rec->owner_ref_checked &&
3889             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3890             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3891             !rec->bad_full_backref && !rec->crossing_stripes &&
3892             !rec->wrong_chunk_type) {
3893                 remove_cache_extent(extent_cache, &rec->cache);
3894                 free_all_extent_backrefs(rec);
3895                 list_del_init(&rec->list);
3896                 free(rec);
3897         }
3898         return 0;
3899 }
3900
3901 static int check_owner_ref(struct btrfs_root *root,
3902                             struct extent_record *rec,
3903                             struct extent_buffer *buf)
3904 {
3905         struct extent_backref *node;
3906         struct tree_backref *back;
3907         struct btrfs_root *ref_root;
3908         struct btrfs_key key;
3909         struct btrfs_path path;
3910         struct extent_buffer *parent;
3911         int level;
3912         int found = 0;
3913         int ret;
3914
3915         list_for_each_entry(node, &rec->backrefs, list) {
3916                 if (node->is_data)
3917                         continue;
3918                 if (!node->found_ref)
3919                         continue;
3920                 if (node->full_backref)
3921                         continue;
3922                 back = (struct tree_backref *)node;
3923                 if (btrfs_header_owner(buf) == back->root)
3924                         return 0;
3925         }
3926         BUG_ON(rec->is_root);
3927
3928         /* try to find the block by search corresponding fs tree */
3929         key.objectid = btrfs_header_owner(buf);
3930         key.type = BTRFS_ROOT_ITEM_KEY;
3931         key.offset = (u64)-1;
3932
3933         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3934         if (IS_ERR(ref_root))
3935                 return 1;
3936
3937         level = btrfs_header_level(buf);
3938         if (level == 0)
3939                 btrfs_item_key_to_cpu(buf, &key, 0);
3940         else
3941                 btrfs_node_key_to_cpu(buf, &key, 0);
3942
3943         btrfs_init_path(&path);
3944         path.lowest_level = level + 1;
3945         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3946         if (ret < 0)
3947                 return 0;
3948
3949         parent = path.nodes[level + 1];
3950         if (parent && buf->start == btrfs_node_blockptr(parent,
3951                                                         path.slots[level + 1]))
3952                 found = 1;
3953
3954         btrfs_release_path(&path);
3955         return found ? 0 : 1;
3956 }
3957
3958 static int is_extent_tree_record(struct extent_record *rec)
3959 {
3960         struct list_head *cur = rec->backrefs.next;
3961         struct extent_backref *node;
3962         struct tree_backref *back;
3963         int is_extent = 0;
3964
3965         while(cur != &rec->backrefs) {
3966                 node = list_entry(cur, struct extent_backref, list);
3967                 cur = cur->next;
3968                 if (node->is_data)
3969                         return 0;
3970                 back = (struct tree_backref *)node;
3971                 if (node->full_backref)
3972                         return 0;
3973                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3974                         is_extent = 1;
3975         }
3976         return is_extent;
3977 }
3978
3979
3980 static int record_bad_block_io(struct btrfs_fs_info *info,
3981                                struct cache_tree *extent_cache,
3982                                u64 start, u64 len)
3983 {
3984         struct extent_record *rec;
3985         struct cache_extent *cache;
3986         struct btrfs_key key;
3987
3988         cache = lookup_cache_extent(extent_cache, start, len);
3989         if (!cache)
3990                 return 0;
3991
3992         rec = container_of(cache, struct extent_record, cache);
3993         if (!is_extent_tree_record(rec))
3994                 return 0;
3995
3996         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3997         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3998 }
3999
4000 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4001                        struct extent_buffer *buf, int slot)
4002 {
4003         if (btrfs_header_level(buf)) {
4004                 struct btrfs_key_ptr ptr1, ptr2;
4005
4006                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4007                                    sizeof(struct btrfs_key_ptr));
4008                 read_extent_buffer(buf, &ptr2,
4009                                    btrfs_node_key_ptr_offset(slot + 1),
4010                                    sizeof(struct btrfs_key_ptr));
4011                 write_extent_buffer(buf, &ptr1,
4012                                     btrfs_node_key_ptr_offset(slot + 1),
4013                                     sizeof(struct btrfs_key_ptr));
4014                 write_extent_buffer(buf, &ptr2,
4015                                     btrfs_node_key_ptr_offset(slot),
4016                                     sizeof(struct btrfs_key_ptr));
4017                 if (slot == 0) {
4018                         struct btrfs_disk_key key;
4019                         btrfs_node_key(buf, &key, 0);
4020                         btrfs_fixup_low_keys(root, path, &key,
4021                                              btrfs_header_level(buf) + 1);
4022                 }
4023         } else {
4024                 struct btrfs_item *item1, *item2;
4025                 struct btrfs_key k1, k2;
4026                 char *item1_data, *item2_data;
4027                 u32 item1_offset, item2_offset, item1_size, item2_size;
4028
4029                 item1 = btrfs_item_nr(slot);
4030                 item2 = btrfs_item_nr(slot + 1);
4031                 btrfs_item_key_to_cpu(buf, &k1, slot);
4032                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4033                 item1_offset = btrfs_item_offset(buf, item1);
4034                 item2_offset = btrfs_item_offset(buf, item2);
4035                 item1_size = btrfs_item_size(buf, item1);
4036                 item2_size = btrfs_item_size(buf, item2);
4037
4038                 item1_data = malloc(item1_size);
4039                 if (!item1_data)
4040                         return -ENOMEM;
4041                 item2_data = malloc(item2_size);
4042                 if (!item2_data) {
4043                         free(item1_data);
4044                         return -ENOMEM;
4045                 }
4046
4047                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4048                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4049
4050                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4051                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4052                 free(item1_data);
4053                 free(item2_data);
4054
4055                 btrfs_set_item_offset(buf, item1, item2_offset);
4056                 btrfs_set_item_offset(buf, item2, item1_offset);
4057                 btrfs_set_item_size(buf, item1, item2_size);
4058                 btrfs_set_item_size(buf, item2, item1_size);
4059
4060                 path->slots[0] = slot;
4061                 btrfs_set_item_key_unsafe(root, path, &k2);
4062                 path->slots[0] = slot + 1;
4063                 btrfs_set_item_key_unsafe(root, path, &k1);
4064         }
4065         return 0;
4066 }
4067
4068 static int fix_key_order(struct btrfs_trans_handle *trans,
4069                          struct btrfs_root *root,
4070                          struct btrfs_path *path)
4071 {
4072         struct extent_buffer *buf;
4073         struct btrfs_key k1, k2;
4074         int i;
4075         int level = path->lowest_level;
4076         int ret = -EIO;
4077
4078         buf = path->nodes[level];
4079         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4080                 if (level) {
4081                         btrfs_node_key_to_cpu(buf, &k1, i);
4082                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4083                 } else {
4084                         btrfs_item_key_to_cpu(buf, &k1, i);
4085                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4086                 }
4087                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4088                         continue;
4089                 ret = swap_values(root, path, buf, i);
4090                 if (ret)
4091                         break;
4092                 btrfs_mark_buffer_dirty(buf);
4093                 i = 0;
4094         }
4095         return ret;
4096 }
4097
4098 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4099                              struct btrfs_root *root,
4100                              struct btrfs_path *path,
4101                              struct extent_buffer *buf, int slot)
4102 {
4103         struct btrfs_key key;
4104         int nritems = btrfs_header_nritems(buf);
4105
4106         btrfs_item_key_to_cpu(buf, &key, slot);
4107
4108         /* These are all the keys we can deal with missing. */
4109         if (key.type != BTRFS_DIR_INDEX_KEY &&
4110             key.type != BTRFS_EXTENT_ITEM_KEY &&
4111             key.type != BTRFS_METADATA_ITEM_KEY &&
4112             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4113             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4114                 return -1;
4115
4116         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4117                (unsigned long long)key.objectid, key.type,
4118                (unsigned long long)key.offset, slot, buf->start);
4119         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4120                               btrfs_item_nr_offset(slot + 1),
4121                               sizeof(struct btrfs_item) *
4122                               (nritems - slot - 1));
4123         btrfs_set_header_nritems(buf, nritems - 1);
4124         if (slot == 0) {
4125                 struct btrfs_disk_key disk_key;
4126
4127                 btrfs_item_key(buf, &disk_key, 0);
4128                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4129         }
4130         btrfs_mark_buffer_dirty(buf);
4131         return 0;
4132 }
4133
4134 static int fix_item_offset(struct btrfs_trans_handle *trans,
4135                            struct btrfs_root *root,
4136                            struct btrfs_path *path)
4137 {
4138         struct extent_buffer *buf;
4139         int i;
4140         int ret = 0;
4141
4142         /* We should only get this for leaves */
4143         BUG_ON(path->lowest_level);
4144         buf = path->nodes[0];
4145 again:
4146         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4147                 unsigned int shift = 0, offset;
4148
4149                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4150                     BTRFS_LEAF_DATA_SIZE(root)) {
4151                         if (btrfs_item_end_nr(buf, i) >
4152                             BTRFS_LEAF_DATA_SIZE(root)) {
4153                                 ret = delete_bogus_item(trans, root, path,
4154                                                         buf, i);
4155                                 if (!ret)
4156                                         goto again;
4157                                 fprintf(stderr, "item is off the end of the "
4158                                         "leaf, can't fix\n");
4159                                 ret = -EIO;
4160                                 break;
4161                         }
4162                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4163                                 btrfs_item_end_nr(buf, i);
4164                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4165                            btrfs_item_offset_nr(buf, i - 1)) {
4166                         if (btrfs_item_end_nr(buf, i) >
4167                             btrfs_item_offset_nr(buf, i - 1)) {
4168                                 ret = delete_bogus_item(trans, root, path,
4169                                                         buf, i);
4170                                 if (!ret)
4171                                         goto again;
4172                                 fprintf(stderr, "items overlap, can't fix\n");
4173                                 ret = -EIO;
4174                                 break;
4175                         }
4176                         shift = btrfs_item_offset_nr(buf, i - 1) -
4177                                 btrfs_item_end_nr(buf, i);
4178                 }
4179                 if (!shift)
4180                         continue;
4181
4182                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4183                        i, shift, (unsigned long long)buf->start);
4184                 offset = btrfs_item_offset_nr(buf, i);
4185                 memmove_extent_buffer(buf,
4186                                       btrfs_leaf_data(buf) + offset + shift,
4187                                       btrfs_leaf_data(buf) + offset,
4188                                       btrfs_item_size_nr(buf, i));
4189                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4190                                       offset + shift);
4191                 btrfs_mark_buffer_dirty(buf);
4192         }
4193
4194         /*
4195          * We may have moved things, in which case we want to exit so we don't
4196          * write those changes out.  Once we have proper abort functionality in
4197          * progs this can be changed to something nicer.
4198          */
4199         BUG_ON(ret);
4200         return ret;
4201 }
4202
4203 /*
4204  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4205  * then just return -EIO.
4206  */
4207 static int try_to_fix_bad_block(struct btrfs_root *root,
4208                                 struct extent_buffer *buf,
4209                                 enum btrfs_tree_block_status status)
4210 {
4211         struct btrfs_trans_handle *trans;
4212         struct ulist *roots;
4213         struct ulist_node *node;
4214         struct btrfs_root *search_root;
4215         struct btrfs_path *path;
4216         struct ulist_iterator iter;
4217         struct btrfs_key root_key, key;
4218         int ret;
4219
4220         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4221             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4222                 return -EIO;
4223
4224         path = btrfs_alloc_path();
4225         if (!path)
4226                 return -EIO;
4227
4228         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4229                                    0, &roots);
4230         if (ret) {
4231                 btrfs_free_path(path);
4232                 return -EIO;
4233         }
4234
4235         ULIST_ITER_INIT(&iter);
4236         while ((node = ulist_next(roots, &iter))) {
4237                 root_key.objectid = node->val;
4238                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4239                 root_key.offset = (u64)-1;
4240
4241                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4242                 if (IS_ERR(root)) {
4243                         ret = -EIO;
4244                         break;
4245                 }
4246
4247
4248                 trans = btrfs_start_transaction(search_root, 0);
4249                 if (IS_ERR(trans)) {
4250                         ret = PTR_ERR(trans);
4251                         break;
4252                 }
4253
4254                 path->lowest_level = btrfs_header_level(buf);
4255                 path->skip_check_block = 1;
4256                 if (path->lowest_level)
4257                         btrfs_node_key_to_cpu(buf, &key, 0);
4258                 else
4259                         btrfs_item_key_to_cpu(buf, &key, 0);
4260                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4261                 if (ret) {
4262                         ret = -EIO;
4263                         btrfs_commit_transaction(trans, search_root);
4264                         break;
4265                 }
4266                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4267                         ret = fix_key_order(trans, search_root, path);
4268                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4269                         ret = fix_item_offset(trans, search_root, path);
4270                 if (ret) {
4271                         btrfs_commit_transaction(trans, search_root);
4272                         break;
4273                 }
4274                 btrfs_release_path(path);
4275                 btrfs_commit_transaction(trans, search_root);
4276         }
4277         ulist_free(roots);
4278         btrfs_free_path(path);
4279         return ret;
4280 }
4281
4282 static int check_block(struct btrfs_root *root,
4283                        struct cache_tree *extent_cache,
4284                        struct extent_buffer *buf, u64 flags)
4285 {
4286         struct extent_record *rec;
4287         struct cache_extent *cache;
4288         struct btrfs_key key;
4289         enum btrfs_tree_block_status status;
4290         int ret = 0;
4291         int level;
4292
4293         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4294         if (!cache)
4295                 return 1;
4296         rec = container_of(cache, struct extent_record, cache);
4297         rec->generation = btrfs_header_generation(buf);
4298
4299         level = btrfs_header_level(buf);
4300         if (btrfs_header_nritems(buf) > 0) {
4301
4302                 if (level == 0)
4303                         btrfs_item_key_to_cpu(buf, &key, 0);
4304                 else
4305                         btrfs_node_key_to_cpu(buf, &key, 0);
4306
4307                 rec->info_objectid = key.objectid;
4308         }
4309         rec->info_level = level;
4310
4311         if (btrfs_is_leaf(buf))
4312                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4313         else
4314                 status = btrfs_check_node(root, &rec->parent_key, buf);
4315
4316         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4317                 if (repair)
4318                         status = try_to_fix_bad_block(root, buf, status);
4319                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4320                         ret = -EIO;
4321                         fprintf(stderr, "bad block %llu\n",
4322                                 (unsigned long long)buf->start);
4323                 } else {
4324                         /*
4325                          * Signal to callers we need to start the scan over
4326                          * again since we'll have cow'ed blocks.
4327                          */
4328                         ret = -EAGAIN;
4329                 }
4330         } else {
4331                 rec->content_checked = 1;
4332                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4333                         rec->owner_ref_checked = 1;
4334                 else {
4335                         ret = check_owner_ref(root, rec, buf);
4336                         if (!ret)
4337                                 rec->owner_ref_checked = 1;
4338                 }
4339         }
4340         if (!ret)
4341                 maybe_free_extent_rec(extent_cache, rec);
4342         return ret;
4343 }
4344
4345 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4346                                                 u64 parent, u64 root)
4347 {
4348         struct list_head *cur = rec->backrefs.next;
4349         struct extent_backref *node;
4350         struct tree_backref *back;
4351
4352         while(cur != &rec->backrefs) {
4353                 node = list_entry(cur, struct extent_backref, list);
4354                 cur = cur->next;
4355                 if (node->is_data)
4356                         continue;
4357                 back = (struct tree_backref *)node;
4358                 if (parent > 0) {
4359                         if (!node->full_backref)
4360                                 continue;
4361                         if (parent == back->parent)
4362                                 return back;
4363                 } else {
4364                         if (node->full_backref)
4365                                 continue;
4366                         if (back->root == root)
4367                                 return back;
4368                 }
4369         }
4370         return NULL;
4371 }
4372
4373 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4374                                                 u64 parent, u64 root)
4375 {
4376         struct tree_backref *ref = malloc(sizeof(*ref));
4377
4378         if (!ref)
4379                 return NULL;
4380         memset(&ref->node, 0, sizeof(ref->node));
4381         if (parent > 0) {
4382                 ref->parent = parent;
4383                 ref->node.full_backref = 1;
4384         } else {
4385                 ref->root = root;
4386                 ref->node.full_backref = 0;
4387         }
4388         list_add_tail(&ref->node.list, &rec->backrefs);
4389
4390         return ref;
4391 }
4392
4393 static struct data_backref *find_data_backref(struct extent_record *rec,
4394                                                 u64 parent, u64 root,
4395                                                 u64 owner, u64 offset,
4396                                                 int found_ref,
4397                                                 u64 disk_bytenr, u64 bytes)
4398 {
4399         struct list_head *cur = rec->backrefs.next;
4400         struct extent_backref *node;
4401         struct data_backref *back;
4402
4403         while(cur != &rec->backrefs) {
4404                 node = list_entry(cur, struct extent_backref, list);
4405                 cur = cur->next;
4406                 if (!node->is_data)
4407                         continue;
4408                 back = (struct data_backref *)node;
4409                 if (parent > 0) {
4410                         if (!node->full_backref)
4411                                 continue;
4412                         if (parent == back->parent)
4413                                 return back;
4414                 } else {
4415                         if (node->full_backref)
4416                                 continue;
4417                         if (back->root == root && back->owner == owner &&
4418                             back->offset == offset) {
4419                                 if (found_ref && node->found_ref &&
4420                                     (back->bytes != bytes ||
4421                                     back->disk_bytenr != disk_bytenr))
4422                                         continue;
4423                                 return back;
4424                         }
4425                 }
4426         }
4427         return NULL;
4428 }
4429
4430 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4431                                                 u64 parent, u64 root,
4432                                                 u64 owner, u64 offset,
4433                                                 u64 max_size)
4434 {
4435         struct data_backref *ref = malloc(sizeof(*ref));
4436
4437         if (!ref)
4438                 return NULL;
4439         memset(&ref->node, 0, sizeof(ref->node));
4440         ref->node.is_data = 1;
4441
4442         if (parent > 0) {
4443                 ref->parent = parent;
4444                 ref->owner = 0;
4445                 ref->offset = 0;
4446                 ref->node.full_backref = 1;
4447         } else {
4448                 ref->root = root;
4449                 ref->owner = owner;
4450                 ref->offset = offset;
4451                 ref->node.full_backref = 0;
4452         }
4453         ref->bytes = max_size;
4454         ref->found_ref = 0;
4455         ref->num_refs = 0;
4456         list_add_tail(&ref->node.list, &rec->backrefs);
4457         if (max_size > rec->max_size)
4458                 rec->max_size = max_size;
4459         return ref;
4460 }
4461
4462 /* Check if the type of extent matches with its chunk */
4463 static void check_extent_type(struct extent_record *rec)
4464 {
4465         struct btrfs_block_group_cache *bg_cache;
4466
4467         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4468         if (!bg_cache)
4469                 return;
4470
4471         /* data extent, check chunk directly*/
4472         if (!rec->metadata) {
4473                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4474                         rec->wrong_chunk_type = 1;
4475                 return;
4476         }
4477
4478         /* metadata extent, check the obvious case first */
4479         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4480                                  BTRFS_BLOCK_GROUP_METADATA))) {
4481                 rec->wrong_chunk_type = 1;
4482                 return;
4483         }
4484
4485         /*
4486          * Check SYSTEM extent, as it's also marked as metadata, we can only
4487          * make sure it's a SYSTEM extent by its backref
4488          */
4489         if (!list_empty(&rec->backrefs)) {
4490                 struct extent_backref *node;
4491                 struct tree_backref *tback;
4492                 u64 bg_type;
4493
4494                 node = list_entry(rec->backrefs.next, struct extent_backref,
4495                                   list);
4496                 if (node->is_data) {
4497                         /* tree block shouldn't have data backref */
4498                         rec->wrong_chunk_type = 1;
4499                         return;
4500                 }
4501                 tback = container_of(node, struct tree_backref, node);
4502
4503                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4504                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4505                 else
4506                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4507                 if (!(bg_cache->flags & bg_type))
4508                         rec->wrong_chunk_type = 1;
4509         }
4510 }
4511
4512 /*
4513  * Allocate a new extent record, fill default values from @tmpl and insert int
4514  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4515  * the cache, otherwise it fails.
4516  */
4517 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4518                 struct extent_record *tmpl)
4519 {
4520         struct extent_record *rec;
4521         int ret = 0;
4522
4523         rec = malloc(sizeof(*rec));
4524         if (!rec)
4525                 return -ENOMEM;
4526         rec->start = tmpl->start;
4527         rec->max_size = tmpl->max_size;
4528         rec->nr = max(tmpl->nr, tmpl->max_size);
4529         rec->found_rec = tmpl->found_rec;
4530         rec->content_checked = tmpl->content_checked;
4531         rec->owner_ref_checked = tmpl->owner_ref_checked;
4532         rec->num_duplicates = 0;
4533         rec->metadata = tmpl->metadata;
4534         rec->flag_block_full_backref = -1;
4535         rec->bad_full_backref = 0;
4536         rec->crossing_stripes = 0;
4537         rec->wrong_chunk_type = 0;
4538         INIT_LIST_HEAD(&rec->backrefs);
4539         INIT_LIST_HEAD(&rec->dups);
4540         INIT_LIST_HEAD(&rec->list);
4541
4542         if (tmpl->is_root)
4543                 rec->is_root = 1;
4544         else
4545                 rec->is_root = 0;
4546
4547         rec->refs = tmpl->refs;
4548
4549         if (tmpl->extent_item_refs)
4550                 rec->extent_item_refs = tmpl->extent_item_refs;
4551         else
4552                 rec->extent_item_refs = 0;
4553
4554         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4555
4556         if (tmpl->parent_generation)
4557                 rec->parent_generation = tmpl->parent_generation;
4558         else
4559                 rec->parent_generation = 0;
4560
4561         rec->cache.start = tmpl->start;
4562         rec->cache.size = tmpl->nr;
4563         ret = insert_cache_extent(extent_cache, &rec->cache);
4564         BUG_ON(ret);
4565         bytes_used += tmpl->nr;
4566
4567         if (tmpl->metadata)
4568                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4569                                 rec->max_size);
4570         check_extent_type(rec);
4571         return ret;
4572 }
4573
4574 static int add_extent_rec(struct cache_tree *extent_cache,
4575                           struct btrfs_key *parent_key, u64 parent_gen,
4576                           u64 start, u64 nr, u64 extent_item_refs,
4577                           int is_root, int inc_ref, int set_checked,
4578                           int metadata, int extent_rec, u64 max_size)
4579 {
4580         struct extent_record *rec;
4581         struct cache_extent *cache;
4582         struct extent_record tmpl;
4583         int ret = 0;
4584         int dup = 0;
4585
4586         cache = lookup_cache_extent(extent_cache, start, nr);
4587         if (cache) {
4588                 rec = container_of(cache, struct extent_record, cache);
4589                 if (inc_ref)
4590                         rec->refs++;
4591                 if (rec->nr == 1)
4592                         rec->nr = max(nr, max_size);
4593
4594                 /*
4595                  * We need to make sure to reset nr to whatever the extent
4596                  * record says was the real size, this way we can compare it to
4597                  * the backrefs.
4598                  */
4599                 if (extent_rec) {
4600                         if (start != rec->start || rec->found_rec) {
4601                                 struct extent_record *tmp;
4602
4603                                 dup = 1;
4604                                 if (list_empty(&rec->list))
4605                                         list_add_tail(&rec->list,
4606                                                       &duplicate_extents);
4607
4608                                 /*
4609                                  * We have to do this song and dance in case we
4610                                  * find an extent record that falls inside of
4611                                  * our current extent record but does not have
4612                                  * the same objectid.
4613                                  */
4614                                 tmp = malloc(sizeof(*tmp));
4615                                 if (!tmp)
4616                                         return -ENOMEM;
4617                                 tmp->start = start;
4618                                 tmp->max_size = max_size;
4619                                 tmp->nr = nr;
4620                                 tmp->found_rec = 1;
4621                                 tmp->metadata = metadata;
4622                                 tmp->extent_item_refs = extent_item_refs;
4623                                 INIT_LIST_HEAD(&tmp->list);
4624                                 list_add_tail(&tmp->list, &rec->dups);
4625                                 rec->num_duplicates++;
4626                         } else {
4627                                 rec->nr = nr;
4628                                 rec->found_rec = 1;
4629                         }
4630                 }
4631
4632                 if (extent_item_refs && !dup) {
4633                         if (rec->extent_item_refs) {
4634                                 fprintf(stderr, "block %llu rec "
4635                                         "extent_item_refs %llu, passed %llu\n",
4636                                         (unsigned long long)start,
4637                                         (unsigned long long)
4638                                                         rec->extent_item_refs,
4639                                         (unsigned long long)extent_item_refs);
4640                         }
4641                         rec->extent_item_refs = extent_item_refs;
4642                 }
4643                 if (is_root)
4644                         rec->is_root = 1;
4645                 if (set_checked) {
4646                         rec->content_checked = 1;
4647                         rec->owner_ref_checked = 1;
4648                 }
4649
4650                 if (parent_key)
4651                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4652                 if (parent_gen)
4653                         rec->parent_generation = parent_gen;
4654
4655                 if (rec->max_size < max_size)
4656                         rec->max_size = max_size;
4657
4658                 /*
4659                  * A metadata extent can't cross stripe_len boundary, otherwise
4660                  * kernel scrub won't be able to handle it.
4661                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4662                  * it.
4663                  */
4664                 if (metadata)
4665                         rec->crossing_stripes = check_crossing_stripes(
4666                                         rec->start, rec->max_size);
4667                 check_extent_type(rec);
4668                 maybe_free_extent_rec(extent_cache, rec);
4669                 return ret;
4670         }
4671
4672         memset(&tmpl, 0, sizeof(tmpl));
4673
4674         if (parent_key)
4675                 btrfs_cpu_key_to_disk(&tmpl.parent_key, parent_key);
4676         tmpl.parent_generation = parent_gen;
4677         tmpl.start = start;
4678         tmpl.nr = nr;
4679         tmpl.extent_item_refs = extent_item_refs;
4680         tmpl.is_root = is_root;
4681         tmpl.metadata = metadata;
4682         tmpl.found_rec = extent_rec;
4683         tmpl.max_size = max_size;
4684         tmpl.content_checked = set_checked;
4685         tmpl.owner_ref_checked = set_checked;
4686         tmpl.refs = !!inc_ref;
4687
4688         ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4689
4690         return ret;
4691 }
4692
4693 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4694                             u64 parent, u64 root, int found_ref)
4695 {
4696         struct extent_record *rec;
4697         struct tree_backref *back;
4698         struct cache_extent *cache;
4699
4700         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4701         if (!cache) {
4702                 struct extent_record tmpl;
4703
4704                 memset(&tmpl, 0, sizeof(tmpl));
4705                 tmpl.start = bytenr;
4706                 tmpl.nr = 1;
4707                 tmpl.metadata = 1;
4708
4709                 add_extent_rec_nolookup(extent_cache, &tmpl);
4710
4711                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4712                 if (!cache)
4713                         abort();
4714         }
4715
4716         rec = container_of(cache, struct extent_record, cache);
4717         if (rec->start != bytenr) {
4718                 abort();
4719         }
4720
4721         back = find_tree_backref(rec, parent, root);
4722         if (!back) {
4723                 back = alloc_tree_backref(rec, parent, root);
4724                 BUG_ON(!back);
4725         }
4726
4727         if (found_ref) {
4728                 if (back->node.found_ref) {
4729                         fprintf(stderr, "Extent back ref already exists "
4730                                 "for %llu parent %llu root %llu \n",
4731                                 (unsigned long long)bytenr,
4732                                 (unsigned long long)parent,
4733                                 (unsigned long long)root);
4734                 }
4735                 back->node.found_ref = 1;
4736         } else {
4737                 if (back->node.found_extent_tree) {
4738                         fprintf(stderr, "Extent back ref already exists "
4739                                 "for %llu parent %llu root %llu \n",
4740                                 (unsigned long long)bytenr,
4741                                 (unsigned long long)parent,
4742                                 (unsigned long long)root);
4743                 }
4744                 back->node.found_extent_tree = 1;
4745         }
4746         check_extent_type(rec);
4747         maybe_free_extent_rec(extent_cache, rec);
4748         return 0;
4749 }
4750
4751 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4752                             u64 parent, u64 root, u64 owner, u64 offset,
4753                             u32 num_refs, int found_ref, u64 max_size)
4754 {
4755         struct extent_record *rec;
4756         struct data_backref *back;
4757         struct cache_extent *cache;
4758
4759         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4760         if (!cache) {
4761                 struct extent_record tmpl;
4762
4763                 memset(&tmpl, 0, sizeof(tmpl));
4764                 tmpl.start = bytenr;
4765                 tmpl.nr = 1;
4766                 tmpl.max_size = max_size;
4767
4768                 add_extent_rec_nolookup(extent_cache, &tmpl);
4769
4770                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4771                 if (!cache)
4772                         abort();
4773         }
4774
4775         rec = container_of(cache, struct extent_record, cache);
4776         if (rec->max_size < max_size)
4777                 rec->max_size = max_size;
4778
4779         /*
4780          * If found_ref is set then max_size is the real size and must match the
4781          * existing refs.  So if we have already found a ref then we need to
4782          * make sure that this ref matches the existing one, otherwise we need
4783          * to add a new backref so we can notice that the backrefs don't match
4784          * and we need to figure out who is telling the truth.  This is to
4785          * account for that awful fsync bug I introduced where we'd end up with
4786          * a btrfs_file_extent_item that would have its length include multiple
4787          * prealloc extents or point inside of a prealloc extent.
4788          */
4789         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4790                                  bytenr, max_size);
4791         if (!back) {
4792                 back = alloc_data_backref(rec, parent, root, owner, offset,
4793                                           max_size);
4794                 BUG_ON(!back);
4795         }
4796
4797         if (found_ref) {
4798                 BUG_ON(num_refs != 1);
4799                 if (back->node.found_ref)
4800                         BUG_ON(back->bytes != max_size);
4801                 back->node.found_ref = 1;
4802                 back->found_ref += 1;
4803                 back->bytes = max_size;
4804                 back->disk_bytenr = bytenr;
4805                 rec->refs += 1;
4806                 rec->content_checked = 1;
4807                 rec->owner_ref_checked = 1;
4808         } else {
4809                 if (back->node.found_extent_tree) {
4810                         fprintf(stderr, "Extent back ref already exists "
4811                                 "for %llu parent %llu root %llu "
4812                                 "owner %llu offset %llu num_refs %lu\n",
4813                                 (unsigned long long)bytenr,
4814                                 (unsigned long long)parent,
4815                                 (unsigned long long)root,
4816                                 (unsigned long long)owner,
4817                                 (unsigned long long)offset,
4818                                 (unsigned long)num_refs);
4819                 }
4820                 back->num_refs = num_refs;
4821                 back->node.found_extent_tree = 1;
4822         }
4823         maybe_free_extent_rec(extent_cache, rec);
4824         return 0;
4825 }
4826
4827 static int add_pending(struct cache_tree *pending,
4828                        struct cache_tree *seen, u64 bytenr, u32 size)
4829 {
4830         int ret;
4831         ret = add_cache_extent(seen, bytenr, size);
4832         if (ret)
4833                 return ret;
4834         add_cache_extent(pending, bytenr, size);
4835         return 0;
4836 }
4837
4838 static int pick_next_pending(struct cache_tree *pending,
4839                         struct cache_tree *reada,
4840                         struct cache_tree *nodes,
4841                         u64 last, struct block_info *bits, int bits_nr,
4842                         int *reada_bits)
4843 {
4844         unsigned long node_start = last;
4845         struct cache_extent *cache;
4846         int ret;
4847
4848         cache = search_cache_extent(reada, 0);
4849         if (cache) {
4850                 bits[0].start = cache->start;
4851                 bits[0].size = cache->size;
4852                 *reada_bits = 1;
4853                 return 1;
4854         }
4855         *reada_bits = 0;
4856         if (node_start > 32768)
4857                 node_start -= 32768;
4858
4859         cache = search_cache_extent(nodes, node_start);
4860         if (!cache)
4861                 cache = search_cache_extent(nodes, 0);
4862
4863         if (!cache) {
4864                  cache = search_cache_extent(pending, 0);
4865                  if (!cache)
4866                          return 0;
4867                  ret = 0;
4868                  do {
4869                          bits[ret].start = cache->start;
4870                          bits[ret].size = cache->size;
4871                          cache = next_cache_extent(cache);
4872                          ret++;
4873                  } while (cache && ret < bits_nr);
4874                  return ret;
4875         }
4876
4877         ret = 0;
4878         do {
4879                 bits[ret].start = cache->start;
4880                 bits[ret].size = cache->size;
4881                 cache = next_cache_extent(cache);
4882                 ret++;
4883         } while (cache && ret < bits_nr);
4884
4885         if (bits_nr - ret > 8) {
4886                 u64 lookup = bits[0].start + bits[0].size;
4887                 struct cache_extent *next;
4888                 next = search_cache_extent(pending, lookup);
4889                 while(next) {
4890                         if (next->start - lookup > 32768)
4891                                 break;
4892                         bits[ret].start = next->start;
4893                         bits[ret].size = next->size;
4894                         lookup = next->start + next->size;
4895                         ret++;
4896                         if (ret == bits_nr)
4897                                 break;
4898                         next = next_cache_extent(next);
4899                         if (!next)
4900                                 break;
4901                 }
4902         }
4903         return ret;
4904 }
4905
4906 static void free_chunk_record(struct cache_extent *cache)
4907 {
4908         struct chunk_record *rec;
4909
4910         rec = container_of(cache, struct chunk_record, cache);
4911         list_del_init(&rec->list);
4912         list_del_init(&rec->dextents);
4913         free(rec);
4914 }
4915
4916 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4917 {
4918         cache_tree_free_extents(chunk_cache, free_chunk_record);
4919 }
4920
4921 static void free_device_record(struct rb_node *node)
4922 {
4923         struct device_record *rec;
4924
4925         rec = container_of(node, struct device_record, node);
4926         free(rec);
4927 }
4928
4929 FREE_RB_BASED_TREE(device_cache, free_device_record);
4930
4931 int insert_block_group_record(struct block_group_tree *tree,
4932                               struct block_group_record *bg_rec)
4933 {
4934         int ret;
4935
4936         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4937         if (ret)
4938                 return ret;
4939
4940         list_add_tail(&bg_rec->list, &tree->block_groups);
4941         return 0;
4942 }
4943
4944 static void free_block_group_record(struct cache_extent *cache)
4945 {
4946         struct block_group_record *rec;
4947
4948         rec = container_of(cache, struct block_group_record, cache);
4949         list_del_init(&rec->list);
4950         free(rec);
4951 }
4952
4953 void free_block_group_tree(struct block_group_tree *tree)
4954 {
4955         cache_tree_free_extents(&tree->tree, free_block_group_record);
4956 }
4957
4958 int insert_device_extent_record(struct device_extent_tree *tree,
4959                                 struct device_extent_record *de_rec)
4960 {
4961         int ret;
4962
4963         /*
4964          * Device extent is a bit different from the other extents, because
4965          * the extents which belong to the different devices may have the
4966          * same start and size, so we need use the special extent cache
4967          * search/insert functions.
4968          */
4969         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4970         if (ret)
4971                 return ret;
4972
4973         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4974         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4975         return 0;
4976 }
4977
4978 static void free_device_extent_record(struct cache_extent *cache)
4979 {
4980         struct device_extent_record *rec;
4981
4982         rec = container_of(cache, struct device_extent_record, cache);
4983         if (!list_empty(&rec->chunk_list))
4984                 list_del_init(&rec->chunk_list);
4985         if (!list_empty(&rec->device_list))
4986                 list_del_init(&rec->device_list);
4987         free(rec);
4988 }
4989
4990 void free_device_extent_tree(struct device_extent_tree *tree)
4991 {
4992         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4993 }
4994
4995 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4996 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4997                                  struct extent_buffer *leaf, int slot)
4998 {
4999         struct btrfs_extent_ref_v0 *ref0;
5000         struct btrfs_key key;
5001
5002         btrfs_item_key_to_cpu(leaf, &key, slot);
5003         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5004         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5005                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5006         } else {
5007                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5008                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5009         }
5010         return 0;
5011 }
5012 #endif
5013
5014 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5015                                             struct btrfs_key *key,
5016                                             int slot)
5017 {
5018         struct btrfs_chunk *ptr;
5019         struct chunk_record *rec;
5020         int num_stripes, i;
5021
5022         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5023         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5024
5025         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5026         if (!rec) {
5027                 fprintf(stderr, "memory allocation failed\n");
5028                 exit(-1);
5029         }
5030
5031         INIT_LIST_HEAD(&rec->list);
5032         INIT_LIST_HEAD(&rec->dextents);
5033         rec->bg_rec = NULL;
5034
5035         rec->cache.start = key->offset;
5036         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5037
5038         rec->generation = btrfs_header_generation(leaf);
5039
5040         rec->objectid = key->objectid;
5041         rec->type = key->type;
5042         rec->offset = key->offset;
5043
5044         rec->length = rec->cache.size;
5045         rec->owner = btrfs_chunk_owner(leaf, ptr);
5046         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5047         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5048         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5049         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5050         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5051         rec->num_stripes = num_stripes;
5052         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5053
5054         for (i = 0; i < rec->num_stripes; ++i) {
5055                 rec->stripes[i].devid =
5056                         btrfs_stripe_devid_nr(leaf, ptr, i);
5057                 rec->stripes[i].offset =
5058                         btrfs_stripe_offset_nr(leaf, ptr, i);
5059                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5060                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5061                                 BTRFS_UUID_SIZE);
5062         }
5063
5064         return rec;
5065 }
5066
5067 static int process_chunk_item(struct cache_tree *chunk_cache,
5068                               struct btrfs_key *key, struct extent_buffer *eb,
5069                               int slot)
5070 {
5071         struct chunk_record *rec;
5072         int ret = 0;
5073
5074         rec = btrfs_new_chunk_record(eb, key, slot);
5075         ret = insert_cache_extent(chunk_cache, &rec->cache);
5076         if (ret) {
5077                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5078                         rec->offset, rec->length);
5079                 free(rec);
5080         }
5081
5082         return ret;
5083 }
5084
5085 static int process_device_item(struct rb_root *dev_cache,
5086                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5087 {
5088         struct btrfs_dev_item *ptr;
5089         struct device_record *rec;
5090         int ret = 0;
5091
5092         ptr = btrfs_item_ptr(eb,
5093                 slot, struct btrfs_dev_item);
5094
5095         rec = malloc(sizeof(*rec));
5096         if (!rec) {
5097                 fprintf(stderr, "memory allocation failed\n");
5098                 return -ENOMEM;
5099         }
5100
5101         rec->devid = key->offset;
5102         rec->generation = btrfs_header_generation(eb);
5103
5104         rec->objectid = key->objectid;
5105         rec->type = key->type;
5106         rec->offset = key->offset;
5107
5108         rec->devid = btrfs_device_id(eb, ptr);
5109         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5110         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5111
5112         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5113         if (ret) {
5114                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5115                 free(rec);
5116         }
5117
5118         return ret;
5119 }
5120
5121 struct block_group_record *
5122 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5123                              int slot)
5124 {
5125         struct btrfs_block_group_item *ptr;
5126         struct block_group_record *rec;
5127
5128         rec = calloc(1, sizeof(*rec));
5129         if (!rec) {
5130                 fprintf(stderr, "memory allocation failed\n");
5131                 exit(-1);
5132         }
5133
5134         rec->cache.start = key->objectid;
5135         rec->cache.size = key->offset;
5136
5137         rec->generation = btrfs_header_generation(leaf);
5138
5139         rec->objectid = key->objectid;
5140         rec->type = key->type;
5141         rec->offset = key->offset;
5142
5143         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5144         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5145
5146         INIT_LIST_HEAD(&rec->list);
5147
5148         return rec;
5149 }
5150
5151 static int process_block_group_item(struct block_group_tree *block_group_cache,
5152                                     struct btrfs_key *key,
5153                                     struct extent_buffer *eb, int slot)
5154 {
5155         struct block_group_record *rec;
5156         int ret = 0;
5157
5158         rec = btrfs_new_block_group_record(eb, key, slot);
5159         ret = insert_block_group_record(block_group_cache, rec);
5160         if (ret) {
5161                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5162                         rec->objectid, rec->offset);
5163                 free(rec);
5164         }
5165
5166         return ret;
5167 }
5168
5169 struct device_extent_record *
5170 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5171                                struct btrfs_key *key, int slot)
5172 {
5173         struct device_extent_record *rec;
5174         struct btrfs_dev_extent *ptr;
5175
5176         rec = calloc(1, sizeof(*rec));
5177         if (!rec) {
5178                 fprintf(stderr, "memory allocation failed\n");
5179                 exit(-1);
5180         }
5181
5182         rec->cache.objectid = key->objectid;
5183         rec->cache.start = key->offset;
5184
5185         rec->generation = btrfs_header_generation(leaf);
5186
5187         rec->objectid = key->objectid;
5188         rec->type = key->type;
5189         rec->offset = key->offset;
5190
5191         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5192         rec->chunk_objecteid =
5193                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5194         rec->chunk_offset =
5195                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5196         rec->length = btrfs_dev_extent_length(leaf, ptr);
5197         rec->cache.size = rec->length;
5198
5199         INIT_LIST_HEAD(&rec->chunk_list);
5200         INIT_LIST_HEAD(&rec->device_list);
5201
5202         return rec;
5203 }
5204
5205 static int
5206 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5207                            struct btrfs_key *key, struct extent_buffer *eb,
5208                            int slot)
5209 {
5210         struct device_extent_record *rec;
5211         int ret;
5212
5213         rec = btrfs_new_device_extent_record(eb, key, slot);
5214         ret = insert_device_extent_record(dev_extent_cache, rec);
5215         if (ret) {
5216                 fprintf(stderr,
5217                         "Device extent[%llu, %llu, %llu] existed.\n",
5218                         rec->objectid, rec->offset, rec->length);
5219                 free(rec);
5220         }
5221
5222         return ret;
5223 }
5224
5225 static int process_extent_item(struct btrfs_root *root,
5226                                struct cache_tree *extent_cache,
5227                                struct extent_buffer *eb, int slot)
5228 {
5229         struct btrfs_extent_item *ei;
5230         struct btrfs_extent_inline_ref *iref;
5231         struct btrfs_extent_data_ref *dref;
5232         struct btrfs_shared_data_ref *sref;
5233         struct btrfs_key key;
5234         unsigned long end;
5235         unsigned long ptr;
5236         int type;
5237         u32 item_size = btrfs_item_size_nr(eb, slot);
5238         u64 refs = 0;
5239         u64 offset;
5240         u64 num_bytes;
5241         int metadata = 0;
5242
5243         btrfs_item_key_to_cpu(eb, &key, slot);
5244
5245         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5246                 metadata = 1;
5247                 num_bytes = root->nodesize;
5248         } else {
5249                 num_bytes = key.offset;
5250         }
5251
5252         if (item_size < sizeof(*ei)) {
5253 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5254                 struct btrfs_extent_item_v0 *ei0;
5255                 BUG_ON(item_size != sizeof(*ei0));
5256                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5257                 refs = btrfs_extent_refs_v0(eb, ei0);
5258 #else
5259                 BUG();
5260 #endif
5261                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5262                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5263                                       num_bytes);
5264         }
5265
5266         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5267         refs = btrfs_extent_refs(eb, ei);
5268         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5269                 metadata = 1;
5270         else
5271                 metadata = 0;
5272
5273         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5274                        refs, 0, 0, 0, metadata, 1, num_bytes);
5275
5276         ptr = (unsigned long)(ei + 1);
5277         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5278             key.type == BTRFS_EXTENT_ITEM_KEY)
5279                 ptr += sizeof(struct btrfs_tree_block_info);
5280
5281         end = (unsigned long)ei + item_size;
5282         while (ptr < end) {
5283                 iref = (struct btrfs_extent_inline_ref *)ptr;
5284                 type = btrfs_extent_inline_ref_type(eb, iref);
5285                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5286                 switch (type) {
5287                 case BTRFS_TREE_BLOCK_REF_KEY:
5288                         add_tree_backref(extent_cache, key.objectid,
5289                                          0, offset, 0);
5290                         break;
5291                 case BTRFS_SHARED_BLOCK_REF_KEY:
5292                         add_tree_backref(extent_cache, key.objectid,
5293                                          offset, 0, 0);
5294                         break;
5295                 case BTRFS_EXTENT_DATA_REF_KEY:
5296                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5297                         add_data_backref(extent_cache, key.objectid, 0,
5298                                         btrfs_extent_data_ref_root(eb, dref),
5299                                         btrfs_extent_data_ref_objectid(eb,
5300                                                                        dref),
5301                                         btrfs_extent_data_ref_offset(eb, dref),
5302                                         btrfs_extent_data_ref_count(eb, dref),
5303                                         0, num_bytes);
5304                         break;
5305                 case BTRFS_SHARED_DATA_REF_KEY:
5306                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5307                         add_data_backref(extent_cache, key.objectid, offset,
5308                                         0, 0, 0,
5309                                         btrfs_shared_data_ref_count(eb, sref),
5310                                         0, num_bytes);
5311                         break;
5312                 default:
5313                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5314                                 key.objectid, key.type, num_bytes);
5315                         goto out;
5316                 }
5317                 ptr += btrfs_extent_inline_ref_size(type);
5318         }
5319         WARN_ON(ptr > end);
5320 out:
5321         return 0;
5322 }
5323
5324 static int check_cache_range(struct btrfs_root *root,
5325                              struct btrfs_block_group_cache *cache,
5326                              u64 offset, u64 bytes)
5327 {
5328         struct btrfs_free_space *entry;
5329         u64 *logical;
5330         u64 bytenr;
5331         int stripe_len;
5332         int i, nr, ret;
5333
5334         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5335                 bytenr = btrfs_sb_offset(i);
5336                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5337                                        cache->key.objectid, bytenr, 0,
5338                                        &logical, &nr, &stripe_len);
5339                 if (ret)
5340                         return ret;
5341
5342                 while (nr--) {
5343                         if (logical[nr] + stripe_len <= offset)
5344                                 continue;
5345                         if (offset + bytes <= logical[nr])
5346                                 continue;
5347                         if (logical[nr] == offset) {
5348                                 if (stripe_len >= bytes) {
5349                                         kfree(logical);
5350                                         return 0;
5351                                 }
5352                                 bytes -= stripe_len;
5353                                 offset += stripe_len;
5354                         } else if (logical[nr] < offset) {
5355                                 if (logical[nr] + stripe_len >=
5356                                     offset + bytes) {
5357                                         kfree(logical);
5358                                         return 0;
5359                                 }
5360                                 bytes = (offset + bytes) -
5361                                         (logical[nr] + stripe_len);
5362                                 offset = logical[nr] + stripe_len;
5363                         } else {
5364                                 /*
5365                                  * Could be tricky, the super may land in the
5366                                  * middle of the area we're checking.  First
5367                                  * check the easiest case, it's at the end.
5368                                  */
5369                                 if (logical[nr] + stripe_len >=
5370                                     bytes + offset) {
5371                                         bytes = logical[nr] - offset;
5372                                         continue;
5373                                 }
5374
5375                                 /* Check the left side */
5376                                 ret = check_cache_range(root, cache,
5377                                                         offset,
5378                                                         logical[nr] - offset);
5379                                 if (ret) {
5380                                         kfree(logical);
5381                                         return ret;
5382                                 }
5383
5384                                 /* Now we continue with the right side */
5385                                 bytes = (offset + bytes) -
5386                                         (logical[nr] + stripe_len);
5387                                 offset = logical[nr] + stripe_len;
5388                         }
5389                 }
5390
5391                 kfree(logical);
5392         }
5393
5394         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5395         if (!entry) {
5396                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5397                         offset, offset+bytes);
5398                 return -EINVAL;
5399         }
5400
5401         if (entry->offset != offset) {
5402                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5403                         entry->offset);
5404                 return -EINVAL;
5405         }
5406
5407         if (entry->bytes != bytes) {
5408                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5409                         bytes, entry->bytes, offset);
5410                 return -EINVAL;
5411         }
5412
5413         unlink_free_space(cache->free_space_ctl, entry);
5414         free(entry);
5415         return 0;
5416 }
5417
5418 static int verify_space_cache(struct btrfs_root *root,
5419                               struct btrfs_block_group_cache *cache)
5420 {
5421         struct btrfs_path *path;
5422         struct extent_buffer *leaf;
5423         struct btrfs_key key;
5424         u64 last;
5425         int ret = 0;
5426
5427         path = btrfs_alloc_path();
5428         if (!path)
5429                 return -ENOMEM;
5430
5431         root = root->fs_info->extent_root;
5432
5433         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5434
5435         key.objectid = last;
5436         key.offset = 0;
5437         key.type = BTRFS_EXTENT_ITEM_KEY;
5438
5439         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5440         if (ret < 0)
5441                 goto out;
5442         ret = 0;
5443         while (1) {
5444                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5445                         ret = btrfs_next_leaf(root, path);
5446                         if (ret < 0)
5447                                 goto out;
5448                         if (ret > 0) {
5449                                 ret = 0;
5450                                 break;
5451                         }
5452                 }
5453                 leaf = path->nodes[0];
5454                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5455                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5456                         break;
5457                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5458                     key.type != BTRFS_METADATA_ITEM_KEY) {
5459                         path->slots[0]++;
5460                         continue;
5461                 }
5462
5463                 if (last == key.objectid) {
5464                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5465                                 last = key.objectid + key.offset;
5466                         else
5467                                 last = key.objectid + root->nodesize;
5468                         path->slots[0]++;
5469                         continue;
5470                 }
5471
5472                 ret = check_cache_range(root, cache, last,
5473                                         key.objectid - last);
5474                 if (ret)
5475                         break;
5476                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5477                         last = key.objectid + key.offset;
5478                 else
5479                         last = key.objectid + root->nodesize;
5480                 path->slots[0]++;
5481         }
5482
5483         if (last < cache->key.objectid + cache->key.offset)
5484                 ret = check_cache_range(root, cache, last,
5485                                         cache->key.objectid +
5486                                         cache->key.offset - last);
5487
5488 out:
5489         btrfs_free_path(path);
5490
5491         if (!ret &&
5492             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5493                 fprintf(stderr, "There are still entries left in the space "
5494                         "cache\n");
5495                 ret = -EINVAL;
5496         }
5497
5498         return ret;
5499 }
5500
5501 static int check_space_cache(struct btrfs_root *root)
5502 {
5503         struct btrfs_block_group_cache *cache;
5504         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5505         int ret;
5506         int error = 0;
5507
5508         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5509             btrfs_super_generation(root->fs_info->super_copy) !=
5510             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5511                 printf("cache and super generation don't match, space cache "
5512                        "will be invalidated\n");
5513                 return 0;
5514         }
5515
5516         if (ctx.progress_enabled) {
5517                 ctx.tp = TASK_FREE_SPACE;
5518                 task_start(ctx.info);
5519         }
5520
5521         while (1) {
5522                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5523                 if (!cache)
5524                         break;
5525
5526                 start = cache->key.objectid + cache->key.offset;
5527                 if (!cache->free_space_ctl) {
5528                         if (btrfs_init_free_space_ctl(cache,
5529                                                       root->sectorsize)) {
5530                                 ret = -ENOMEM;
5531                                 break;
5532                         }
5533                 } else {
5534                         btrfs_remove_free_space_cache(cache);
5535                 }
5536
5537                 if (btrfs_fs_compat_ro(root->fs_info,
5538                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5539                         ret = exclude_super_stripes(root, cache);
5540                         if (ret) {
5541                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5542                                         strerror(-ret));
5543                                 error++;
5544                                 continue;
5545                         }
5546                         ret = load_free_space_tree(root->fs_info, cache);
5547                         free_excluded_extents(root, cache);
5548                         if (ret < 0) {
5549                                 fprintf(stderr, "could not load free space tree: %s\n",
5550                                         strerror(-ret));
5551                                 error++;
5552                                 continue;
5553                         }
5554                         error += ret;
5555                 } else {
5556                         ret = load_free_space_cache(root->fs_info, cache);
5557                         if (!ret)
5558                                 continue;
5559                 }
5560
5561                 ret = verify_space_cache(root, cache);
5562                 if (ret) {
5563                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5564                                 cache->key.objectid);
5565                         error++;
5566                 }
5567         }
5568
5569         task_stop(ctx.info);
5570
5571         return error ? -EINVAL : 0;
5572 }
5573
5574 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5575                         u64 num_bytes, unsigned long leaf_offset,
5576                         struct extent_buffer *eb) {
5577
5578         u64 offset = 0;
5579         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5580         char *data;
5581         unsigned long csum_offset;
5582         u32 csum;
5583         u32 csum_expected;
5584         u64 read_len;
5585         u64 data_checked = 0;
5586         u64 tmp;
5587         int ret = 0;
5588         int mirror;
5589         int num_copies;
5590
5591         if (num_bytes % root->sectorsize)
5592                 return -EINVAL;
5593
5594         data = malloc(num_bytes);
5595         if (!data)
5596                 return -ENOMEM;
5597
5598         while (offset < num_bytes) {
5599                 mirror = 0;
5600 again:
5601                 read_len = num_bytes - offset;
5602                 /* read as much space once a time */
5603                 ret = read_extent_data(root, data + offset,
5604                                 bytenr + offset, &read_len, mirror);
5605                 if (ret)
5606                         goto out;
5607                 data_checked = 0;
5608                 /* verify every 4k data's checksum */
5609                 while (data_checked < read_len) {
5610                         csum = ~(u32)0;
5611                         tmp = offset + data_checked;
5612
5613                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5614                                                csum, root->sectorsize);
5615                         btrfs_csum_final(csum, (char *)&csum);
5616
5617                         csum_offset = leaf_offset +
5618                                  tmp / root->sectorsize * csum_size;
5619                         read_extent_buffer(eb, (char *)&csum_expected,
5620                                            csum_offset, csum_size);
5621                         /* try another mirror */
5622                         if (csum != csum_expected) {
5623                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5624                                                 mirror, bytenr + tmp,
5625                                                 csum, csum_expected);
5626                                 num_copies = btrfs_num_copies(
5627                                                 &root->fs_info->mapping_tree,
5628                                                 bytenr, num_bytes);
5629                                 if (mirror < num_copies - 1) {
5630                                         mirror += 1;
5631                                         goto again;
5632                                 }
5633                         }
5634                         data_checked += root->sectorsize;
5635                 }
5636                 offset += read_len;
5637         }
5638 out:
5639         free(data);
5640         return ret;
5641 }
5642
5643 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5644                                u64 num_bytes)
5645 {
5646         struct btrfs_path *path;
5647         struct extent_buffer *leaf;
5648         struct btrfs_key key;
5649         int ret;
5650
5651         path = btrfs_alloc_path();
5652         if (!path) {
5653                 fprintf(stderr, "Error allocing path\n");
5654                 return -ENOMEM;
5655         }
5656
5657         key.objectid = bytenr;
5658         key.type = BTRFS_EXTENT_ITEM_KEY;
5659         key.offset = (u64)-1;
5660
5661 again:
5662         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5663                                 0, 0);
5664         if (ret < 0) {
5665                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5666                 btrfs_free_path(path);
5667                 return ret;
5668         } else if (ret) {
5669                 if (path->slots[0] > 0) {
5670                         path->slots[0]--;
5671                 } else {
5672                         ret = btrfs_prev_leaf(root, path);
5673                         if (ret < 0) {
5674                                 goto out;
5675                         } else if (ret > 0) {
5676                                 ret = 0;
5677                                 goto out;
5678                         }
5679                 }
5680         }
5681
5682         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5683
5684         /*
5685          * Block group items come before extent items if they have the same
5686          * bytenr, so walk back one more just in case.  Dear future traveler,
5687          * first congrats on mastering time travel.  Now if it's not too much
5688          * trouble could you go back to 2006 and tell Chris to make the
5689          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5690          * EXTENT_ITEM_KEY please?
5691          */
5692         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5693                 if (path->slots[0] > 0) {
5694                         path->slots[0]--;
5695                 } else {
5696                         ret = btrfs_prev_leaf(root, path);
5697                         if (ret < 0) {
5698                                 goto out;
5699                         } else if (ret > 0) {
5700                                 ret = 0;
5701                                 goto out;
5702                         }
5703                 }
5704                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5705         }
5706
5707         while (num_bytes) {
5708                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5709                         ret = btrfs_next_leaf(root, path);
5710                         if (ret < 0) {
5711                                 fprintf(stderr, "Error going to next leaf "
5712                                         "%d\n", ret);
5713                                 btrfs_free_path(path);
5714                                 return ret;
5715                         } else if (ret) {
5716                                 break;
5717                         }
5718                 }
5719                 leaf = path->nodes[0];
5720                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5721                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5722                         path->slots[0]++;
5723                         continue;
5724                 }
5725                 if (key.objectid + key.offset < bytenr) {
5726                         path->slots[0]++;
5727                         continue;
5728                 }
5729                 if (key.objectid > bytenr + num_bytes)
5730                         break;
5731
5732                 if (key.objectid == bytenr) {
5733                         if (key.offset >= num_bytes) {
5734                                 num_bytes = 0;
5735                                 break;
5736                         }
5737                         num_bytes -= key.offset;
5738                         bytenr += key.offset;
5739                 } else if (key.objectid < bytenr) {
5740                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5741                                 num_bytes = 0;
5742                                 break;
5743                         }
5744                         num_bytes = (bytenr + num_bytes) -
5745                                 (key.objectid + key.offset);
5746                         bytenr = key.objectid + key.offset;
5747                 } else {
5748                         if (key.objectid + key.offset < bytenr + num_bytes) {
5749                                 u64 new_start = key.objectid + key.offset;
5750                                 u64 new_bytes = bytenr + num_bytes - new_start;
5751
5752                                 /*
5753                                  * Weird case, the extent is in the middle of
5754                                  * our range, we'll have to search one side
5755                                  * and then the other.  Not sure if this happens
5756                                  * in real life, but no harm in coding it up
5757                                  * anyway just in case.
5758                                  */
5759                                 btrfs_release_path(path);
5760                                 ret = check_extent_exists(root, new_start,
5761                                                           new_bytes);
5762                                 if (ret) {
5763                                         fprintf(stderr, "Right section didn't "
5764                                                 "have a record\n");
5765                                         break;
5766                                 }
5767                                 num_bytes = key.objectid - bytenr;
5768                                 goto again;
5769                         }
5770                         num_bytes = key.objectid - bytenr;
5771                 }
5772                 path->slots[0]++;
5773         }
5774         ret = 0;
5775
5776 out:
5777         if (num_bytes && !ret) {
5778                 fprintf(stderr, "There are no extents for csum range "
5779                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5780                 ret = 1;
5781         }
5782
5783         btrfs_free_path(path);
5784         return ret;
5785 }
5786
5787 static int check_csums(struct btrfs_root *root)
5788 {
5789         struct btrfs_path *path;
5790         struct extent_buffer *leaf;
5791         struct btrfs_key key;
5792         u64 offset = 0, num_bytes = 0;
5793         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5794         int errors = 0;
5795         int ret;
5796         u64 data_len;
5797         unsigned long leaf_offset;
5798
5799         root = root->fs_info->csum_root;
5800         if (!extent_buffer_uptodate(root->node)) {
5801                 fprintf(stderr, "No valid csum tree found\n");
5802                 return -ENOENT;
5803         }
5804
5805         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5806         key.type = BTRFS_EXTENT_CSUM_KEY;
5807         key.offset = 0;
5808
5809         path = btrfs_alloc_path();
5810         if (!path)
5811                 return -ENOMEM;
5812
5813         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5814         if (ret < 0) {
5815                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5816                 btrfs_free_path(path);
5817                 return ret;
5818         }
5819
5820         if (ret > 0 && path->slots[0])
5821                 path->slots[0]--;
5822         ret = 0;
5823
5824         while (1) {
5825                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5826                         ret = btrfs_next_leaf(root, path);
5827                         if (ret < 0) {
5828                                 fprintf(stderr, "Error going to next leaf "
5829                                         "%d\n", ret);
5830                                 break;
5831                         }
5832                         if (ret)
5833                                 break;
5834                 }
5835                 leaf = path->nodes[0];
5836
5837                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5838                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5839                         path->slots[0]++;
5840                         continue;
5841                 }
5842
5843                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5844                               csum_size) * root->sectorsize;
5845                 if (!check_data_csum)
5846                         goto skip_csum_check;
5847                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5848                 ret = check_extent_csums(root, key.offset, data_len,
5849                                          leaf_offset, leaf);
5850                 if (ret)
5851                         break;
5852 skip_csum_check:
5853                 if (!num_bytes) {
5854                         offset = key.offset;
5855                 } else if (key.offset != offset + num_bytes) {
5856                         ret = check_extent_exists(root, offset, num_bytes);
5857                         if (ret) {
5858                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5859                                         "there is no extent record\n",
5860                                         offset, offset+num_bytes);
5861                                 errors++;
5862                         }
5863                         offset = key.offset;
5864                         num_bytes = 0;
5865                 }
5866                 num_bytes += data_len;
5867                 path->slots[0]++;
5868         }
5869
5870         btrfs_free_path(path);
5871         return errors;
5872 }
5873
5874 static int is_dropped_key(struct btrfs_key *key,
5875                           struct btrfs_key *drop_key) {
5876         if (key->objectid < drop_key->objectid)
5877                 return 1;
5878         else if (key->objectid == drop_key->objectid) {
5879                 if (key->type < drop_key->type)
5880                         return 1;
5881                 else if (key->type == drop_key->type) {
5882                         if (key->offset < drop_key->offset)
5883                                 return 1;
5884                 }
5885         }
5886         return 0;
5887 }
5888
5889 /*
5890  * Here are the rules for FULL_BACKREF.
5891  *
5892  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5893  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5894  *      FULL_BACKREF set.
5895  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5896  *    if it happened after the relocation occurred since we'll have dropped the
5897  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5898  *    have no real way to know for sure.
5899  *
5900  * We process the blocks one root at a time, and we start from the lowest root
5901  * objectid and go to the highest.  So we can just lookup the owner backref for
5902  * the record and if we don't find it then we know it doesn't exist and we have
5903  * a FULL BACKREF.
5904  *
5905  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5906  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5907  * be set or not and then we can check later once we've gathered all the refs.
5908  */
5909 static int calc_extent_flag(struct btrfs_root *root,
5910                            struct cache_tree *extent_cache,
5911                            struct extent_buffer *buf,
5912                            struct root_item_record *ri,
5913                            u64 *flags)
5914 {
5915         struct extent_record *rec;
5916         struct cache_extent *cache;
5917         struct tree_backref *tback;
5918         u64 owner = 0;
5919
5920         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5921         /* we have added this extent before */
5922         BUG_ON(!cache);
5923         rec = container_of(cache, struct extent_record, cache);
5924
5925         /*
5926          * Except file/reloc tree, we can not have
5927          * FULL BACKREF MODE
5928          */
5929         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5930                 goto normal;
5931         /*
5932          * root node
5933          */
5934         if (buf->start == ri->bytenr)
5935                 goto normal;
5936
5937         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5938                 goto full_backref;
5939
5940         owner = btrfs_header_owner(buf);
5941         if (owner == ri->objectid)
5942                 goto normal;
5943
5944         tback = find_tree_backref(rec, 0, owner);
5945         if (!tback)
5946                 goto full_backref;
5947 normal:
5948         *flags = 0;
5949         if (rec->flag_block_full_backref != -1 &&
5950             rec->flag_block_full_backref != 0)
5951                 rec->bad_full_backref = 1;
5952         return 0;
5953 full_backref:
5954         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5955         if (rec->flag_block_full_backref != -1 &&
5956             rec->flag_block_full_backref != 1)
5957                 rec->bad_full_backref = 1;
5958         return 0;
5959 }
5960
5961 static int run_next_block(struct btrfs_root *root,
5962                           struct block_info *bits,
5963                           int bits_nr,
5964                           u64 *last,
5965                           struct cache_tree *pending,
5966                           struct cache_tree *seen,
5967                           struct cache_tree *reada,
5968                           struct cache_tree *nodes,
5969                           struct cache_tree *extent_cache,
5970                           struct cache_tree *chunk_cache,
5971                           struct rb_root *dev_cache,
5972                           struct block_group_tree *block_group_cache,
5973                           struct device_extent_tree *dev_extent_cache,
5974                           struct root_item_record *ri)
5975 {
5976         struct extent_buffer *buf;
5977         struct extent_record *rec = NULL;
5978         u64 bytenr;
5979         u32 size;
5980         u64 parent;
5981         u64 owner;
5982         u64 flags;
5983         u64 ptr;
5984         u64 gen = 0;
5985         int ret = 0;
5986         int i;
5987         int nritems;
5988         struct btrfs_key key;
5989         struct cache_extent *cache;
5990         int reada_bits;
5991
5992         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5993                                     bits_nr, &reada_bits);
5994         if (nritems == 0)
5995                 return 1;
5996
5997         if (!reada_bits) {
5998                 for(i = 0; i < nritems; i++) {
5999                         ret = add_cache_extent(reada, bits[i].start,
6000                                                bits[i].size);
6001                         if (ret == -EEXIST)
6002                                 continue;
6003
6004                         /* fixme, get the parent transid */
6005                         readahead_tree_block(root, bits[i].start,
6006                                              bits[i].size, 0);
6007                 }
6008         }
6009         *last = bits[0].start;
6010         bytenr = bits[0].start;
6011         size = bits[0].size;
6012
6013         cache = lookup_cache_extent(pending, bytenr, size);
6014         if (cache) {
6015                 remove_cache_extent(pending, cache);
6016                 free(cache);
6017         }
6018         cache = lookup_cache_extent(reada, bytenr, size);
6019         if (cache) {
6020                 remove_cache_extent(reada, cache);
6021                 free(cache);
6022         }
6023         cache = lookup_cache_extent(nodes, bytenr, size);
6024         if (cache) {
6025                 remove_cache_extent(nodes, cache);
6026                 free(cache);
6027         }
6028         cache = lookup_cache_extent(extent_cache, bytenr, size);
6029         if (cache) {
6030                 rec = container_of(cache, struct extent_record, cache);
6031                 gen = rec->parent_generation;
6032         }
6033
6034         /* fixme, get the real parent transid */
6035         buf = read_tree_block(root, bytenr, size, gen);
6036         if (!extent_buffer_uptodate(buf)) {
6037                 record_bad_block_io(root->fs_info,
6038                                     extent_cache, bytenr, size);
6039                 goto out;
6040         }
6041
6042         nritems = btrfs_header_nritems(buf);
6043
6044         flags = 0;
6045         if (!init_extent_tree) {
6046                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6047                                        btrfs_header_level(buf), 1, NULL,
6048                                        &flags);
6049                 if (ret < 0) {
6050                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6051                         if (ret < 0) {
6052                                 fprintf(stderr, "Couldn't calc extent flags\n");
6053                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6054                         }
6055                 }
6056         } else {
6057                 flags = 0;
6058                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6059                 if (ret < 0) {
6060                         fprintf(stderr, "Couldn't calc extent flags\n");
6061                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6062                 }
6063         }
6064
6065         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6066                 if (ri != NULL &&
6067                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6068                     ri->objectid == btrfs_header_owner(buf)) {
6069                         /*
6070                          * Ok we got to this block from it's original owner and
6071                          * we have FULL_BACKREF set.  Relocation can leave
6072                          * converted blocks over so this is altogether possible,
6073                          * however it's not possible if the generation > the
6074                          * last snapshot, so check for this case.
6075                          */
6076                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6077                             btrfs_header_generation(buf) > ri->last_snapshot) {
6078                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6079                                 rec->bad_full_backref = 1;
6080                         }
6081                 }
6082         } else {
6083                 if (ri != NULL &&
6084                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6085                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6086                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6087                         rec->bad_full_backref = 1;
6088                 }
6089         }
6090
6091         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6092                 rec->flag_block_full_backref = 1;
6093                 parent = bytenr;
6094                 owner = 0;
6095         } else {
6096                 rec->flag_block_full_backref = 0;
6097                 parent = 0;
6098                 owner = btrfs_header_owner(buf);
6099         }
6100
6101         ret = check_block(root, extent_cache, buf, flags);
6102         if (ret)
6103                 goto out;
6104
6105         if (btrfs_is_leaf(buf)) {
6106                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6107                 for (i = 0; i < nritems; i++) {
6108                         struct btrfs_file_extent_item *fi;
6109                         btrfs_item_key_to_cpu(buf, &key, i);
6110                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6111                                 process_extent_item(root, extent_cache, buf,
6112                                                     i);
6113                                 continue;
6114                         }
6115                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6116                                 process_extent_item(root, extent_cache, buf,
6117                                                     i);
6118                                 continue;
6119                         }
6120                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6121                                 total_csum_bytes +=
6122                                         btrfs_item_size_nr(buf, i);
6123                                 continue;
6124                         }
6125                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6126                                 process_chunk_item(chunk_cache, &key, buf, i);
6127                                 continue;
6128                         }
6129                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6130                                 process_device_item(dev_cache, &key, buf, i);
6131                                 continue;
6132                         }
6133                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6134                                 process_block_group_item(block_group_cache,
6135                                         &key, buf, i);
6136                                 continue;
6137                         }
6138                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6139                                 process_device_extent_item(dev_extent_cache,
6140                                         &key, buf, i);
6141                                 continue;
6142
6143                         }
6144                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6145 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6146                                 process_extent_ref_v0(extent_cache, buf, i);
6147 #else
6148                                 BUG();
6149 #endif
6150                                 continue;
6151                         }
6152
6153                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6154                                 add_tree_backref(extent_cache, key.objectid, 0,
6155                                                  key.offset, 0);
6156                                 continue;
6157                         }
6158                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6159                                 add_tree_backref(extent_cache, key.objectid,
6160                                                  key.offset, 0, 0);
6161                                 continue;
6162                         }
6163                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6164                                 struct btrfs_extent_data_ref *ref;
6165                                 ref = btrfs_item_ptr(buf, i,
6166                                                 struct btrfs_extent_data_ref);
6167                                 add_data_backref(extent_cache,
6168                                         key.objectid, 0,
6169                                         btrfs_extent_data_ref_root(buf, ref),
6170                                         btrfs_extent_data_ref_objectid(buf,
6171                                                                        ref),
6172                                         btrfs_extent_data_ref_offset(buf, ref),
6173                                         btrfs_extent_data_ref_count(buf, ref),
6174                                         0, root->sectorsize);
6175                                 continue;
6176                         }
6177                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6178                                 struct btrfs_shared_data_ref *ref;
6179                                 ref = btrfs_item_ptr(buf, i,
6180                                                 struct btrfs_shared_data_ref);
6181                                 add_data_backref(extent_cache,
6182                                         key.objectid, key.offset, 0, 0, 0,
6183                                         btrfs_shared_data_ref_count(buf, ref),
6184                                         0, root->sectorsize);
6185                                 continue;
6186                         }
6187                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6188                                 struct bad_item *bad;
6189
6190                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6191                                         continue;
6192                                 if (!owner)
6193                                         continue;
6194                                 bad = malloc(sizeof(struct bad_item));
6195                                 if (!bad)
6196                                         continue;
6197                                 INIT_LIST_HEAD(&bad->list);
6198                                 memcpy(&bad->key, &key,
6199                                        sizeof(struct btrfs_key));
6200                                 bad->root_id = owner;
6201                                 list_add_tail(&bad->list, &delete_items);
6202                                 continue;
6203                         }
6204                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6205                                 continue;
6206                         fi = btrfs_item_ptr(buf, i,
6207                                             struct btrfs_file_extent_item);
6208                         if (btrfs_file_extent_type(buf, fi) ==
6209                             BTRFS_FILE_EXTENT_INLINE)
6210                                 continue;
6211                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6212                                 continue;
6213
6214                         data_bytes_allocated +=
6215                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6216                         if (data_bytes_allocated < root->sectorsize) {
6217                                 abort();
6218                         }
6219                         data_bytes_referenced +=
6220                                 btrfs_file_extent_num_bytes(buf, fi);
6221                         add_data_backref(extent_cache,
6222                                 btrfs_file_extent_disk_bytenr(buf, fi),
6223                                 parent, owner, key.objectid, key.offset -
6224                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6225                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6226                 }
6227         } else {
6228                 int level;
6229                 struct btrfs_key first_key;
6230
6231                 first_key.objectid = 0;
6232
6233                 if (nritems > 0)
6234                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6235                 level = btrfs_header_level(buf);
6236                 for (i = 0; i < nritems; i++) {
6237                         ptr = btrfs_node_blockptr(buf, i);
6238                         size = root->nodesize;
6239                         btrfs_node_key_to_cpu(buf, &key, i);
6240                         if (ri != NULL) {
6241                                 if ((level == ri->drop_level)
6242                                     && is_dropped_key(&key, &ri->drop_key)) {
6243                                         continue;
6244                                 }
6245                         }
6246                         ret = add_extent_rec(extent_cache, &key,
6247                                              btrfs_node_ptr_generation(buf, i),
6248                                              ptr, size, 0, 0, 1, 0, 1, 0,
6249                                              size);
6250                         BUG_ON(ret);
6251
6252                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6253
6254                         if (level > 1) {
6255                                 add_pending(nodes, seen, ptr, size);
6256                         } else {
6257                                 add_pending(pending, seen, ptr, size);
6258                         }
6259                 }
6260                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6261                                       nritems) * sizeof(struct btrfs_key_ptr);
6262         }
6263         total_btree_bytes += buf->len;
6264         if (fs_root_objectid(btrfs_header_owner(buf)))
6265                 total_fs_tree_bytes += buf->len;
6266         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6267                 total_extent_tree_bytes += buf->len;
6268         if (!found_old_backref &&
6269             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6270             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6271             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6272                 found_old_backref = 1;
6273 out:
6274         free_extent_buffer(buf);
6275         return ret;
6276 }
6277
6278 static int add_root_to_pending(struct extent_buffer *buf,
6279                                struct cache_tree *extent_cache,
6280                                struct cache_tree *pending,
6281                                struct cache_tree *seen,
6282                                struct cache_tree *nodes,
6283                                u64 objectid)
6284 {
6285         if (btrfs_header_level(buf) > 0)
6286                 add_pending(nodes, seen, buf->start, buf->len);
6287         else
6288                 add_pending(pending, seen, buf->start, buf->len);
6289         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6290                        0, 1, 1, 0, 1, 0, buf->len);
6291
6292         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6293             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6294                 add_tree_backref(extent_cache, buf->start, buf->start,
6295                                  0, 1);
6296         else
6297                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6298         return 0;
6299 }
6300
6301 /* as we fix the tree, we might be deleting blocks that
6302  * we're tracking for repair.  This hook makes sure we
6303  * remove any backrefs for blocks as we are fixing them.
6304  */
6305 static int free_extent_hook(struct btrfs_trans_handle *trans,
6306                             struct btrfs_root *root,
6307                             u64 bytenr, u64 num_bytes, u64 parent,
6308                             u64 root_objectid, u64 owner, u64 offset,
6309                             int refs_to_drop)
6310 {
6311         struct extent_record *rec;
6312         struct cache_extent *cache;
6313         int is_data;
6314         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6315
6316         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6317         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6318         if (!cache)
6319                 return 0;
6320
6321         rec = container_of(cache, struct extent_record, cache);
6322         if (is_data) {
6323                 struct data_backref *back;
6324                 back = find_data_backref(rec, parent, root_objectid, owner,
6325                                          offset, 1, bytenr, num_bytes);
6326                 if (!back)
6327                         goto out;
6328                 if (back->node.found_ref) {
6329                         back->found_ref -= refs_to_drop;
6330                         if (rec->refs)
6331                                 rec->refs -= refs_to_drop;
6332                 }
6333                 if (back->node.found_extent_tree) {
6334                         back->num_refs -= refs_to_drop;
6335                         if (rec->extent_item_refs)
6336                                 rec->extent_item_refs -= refs_to_drop;
6337                 }
6338                 if (back->found_ref == 0)
6339                         back->node.found_ref = 0;
6340                 if (back->num_refs == 0)
6341                         back->node.found_extent_tree = 0;
6342
6343                 if (!back->node.found_extent_tree && back->node.found_ref) {
6344                         list_del(&back->node.list);
6345                         free(back);
6346                 }
6347         } else {
6348                 struct tree_backref *back;
6349                 back = find_tree_backref(rec, parent, root_objectid);
6350                 if (!back)
6351                         goto out;
6352                 if (back->node.found_ref) {
6353                         if (rec->refs)
6354                                 rec->refs--;
6355                         back->node.found_ref = 0;
6356                 }
6357                 if (back->node.found_extent_tree) {
6358                         if (rec->extent_item_refs)
6359                                 rec->extent_item_refs--;
6360                         back->node.found_extent_tree = 0;
6361                 }
6362                 if (!back->node.found_extent_tree && back->node.found_ref) {
6363                         list_del(&back->node.list);
6364                         free(back);
6365                 }
6366         }
6367         maybe_free_extent_rec(extent_cache, rec);
6368 out:
6369         return 0;
6370 }
6371
6372 static int delete_extent_records(struct btrfs_trans_handle *trans,
6373                                  struct btrfs_root *root,
6374                                  struct btrfs_path *path,
6375                                  u64 bytenr, u64 new_len)
6376 {
6377         struct btrfs_key key;
6378         struct btrfs_key found_key;
6379         struct extent_buffer *leaf;
6380         int ret;
6381         int slot;
6382
6383
6384         key.objectid = bytenr;
6385         key.type = (u8)-1;
6386         key.offset = (u64)-1;
6387
6388         while(1) {
6389                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6390                                         &key, path, 0, 1);
6391                 if (ret < 0)
6392                         break;
6393
6394                 if (ret > 0) {
6395                         ret = 0;
6396                         if (path->slots[0] == 0)
6397                                 break;
6398                         path->slots[0]--;
6399                 }
6400                 ret = 0;
6401
6402                 leaf = path->nodes[0];
6403                 slot = path->slots[0];
6404
6405                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6406                 if (found_key.objectid != bytenr)
6407                         break;
6408
6409                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6410                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6411                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6412                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6413                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6414                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6415                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6416                         btrfs_release_path(path);
6417                         if (found_key.type == 0) {
6418                                 if (found_key.offset == 0)
6419                                         break;
6420                                 key.offset = found_key.offset - 1;
6421                                 key.type = found_key.type;
6422                         }
6423                         key.type = found_key.type - 1;
6424                         key.offset = (u64)-1;
6425                         continue;
6426                 }
6427
6428                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6429                         found_key.objectid, found_key.type, found_key.offset);
6430
6431                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6432                 if (ret)
6433                         break;
6434                 btrfs_release_path(path);
6435
6436                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6437                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6438                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6439                                 found_key.offset : root->nodesize;
6440
6441                         ret = btrfs_update_block_group(trans, root, bytenr,
6442                                                        bytes, 0, 0);
6443                         if (ret)
6444                                 break;
6445                 }
6446         }
6447
6448         btrfs_release_path(path);
6449         return ret;
6450 }
6451
6452 /*
6453  * for a single backref, this will allocate a new extent
6454  * and add the backref to it.
6455  */
6456 static int record_extent(struct btrfs_trans_handle *trans,
6457                          struct btrfs_fs_info *info,
6458                          struct btrfs_path *path,
6459                          struct extent_record *rec,
6460                          struct extent_backref *back,
6461                          int allocated, u64 flags)
6462 {
6463         int ret;
6464         struct btrfs_root *extent_root = info->extent_root;
6465         struct extent_buffer *leaf;
6466         struct btrfs_key ins_key;
6467         struct btrfs_extent_item *ei;
6468         struct tree_backref *tback;
6469         struct data_backref *dback;
6470         struct btrfs_tree_block_info *bi;
6471
6472         if (!back->is_data)
6473                 rec->max_size = max_t(u64, rec->max_size,
6474                                     info->extent_root->nodesize);
6475
6476         if (!allocated) {
6477                 u32 item_size = sizeof(*ei);
6478
6479                 if (!back->is_data)
6480                         item_size += sizeof(*bi);
6481
6482                 ins_key.objectid = rec->start;
6483                 ins_key.offset = rec->max_size;
6484                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6485
6486                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6487                                         &ins_key, item_size);
6488                 if (ret)
6489                         goto fail;
6490
6491                 leaf = path->nodes[0];
6492                 ei = btrfs_item_ptr(leaf, path->slots[0],
6493                                     struct btrfs_extent_item);
6494
6495                 btrfs_set_extent_refs(leaf, ei, 0);
6496                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6497
6498                 if (back->is_data) {
6499                         btrfs_set_extent_flags(leaf, ei,
6500                                                BTRFS_EXTENT_FLAG_DATA);
6501                 } else {
6502                         struct btrfs_disk_key copy_key;;
6503
6504                         tback = (struct tree_backref *)back;
6505                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6506                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6507                                              sizeof(*bi));
6508
6509                         btrfs_set_disk_key_objectid(&copy_key,
6510                                                     rec->info_objectid);
6511                         btrfs_set_disk_key_type(&copy_key, 0);
6512                         btrfs_set_disk_key_offset(&copy_key, 0);
6513
6514                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6515                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6516
6517                         btrfs_set_extent_flags(leaf, ei,
6518                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6519                 }
6520
6521                 btrfs_mark_buffer_dirty(leaf);
6522                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6523                                                rec->max_size, 1, 0);
6524                 if (ret)
6525                         goto fail;
6526                 btrfs_release_path(path);
6527         }
6528
6529         if (back->is_data) {
6530                 u64 parent;
6531                 int i;
6532
6533                 dback = (struct data_backref *)back;
6534                 if (back->full_backref)
6535                         parent = dback->parent;
6536                 else
6537                         parent = 0;
6538
6539                 for (i = 0; i < dback->found_ref; i++) {
6540                         /* if parent != 0, we're doing a full backref
6541                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6542                          * just makes the backref allocator create a data
6543                          * backref
6544                          */
6545                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6546                                                    rec->start, rec->max_size,
6547                                                    parent,
6548                                                    dback->root,
6549                                                    parent ?
6550                                                    BTRFS_FIRST_FREE_OBJECTID :
6551                                                    dback->owner,
6552                                                    dback->offset);
6553                         if (ret)
6554                                 break;
6555                 }
6556                 fprintf(stderr, "adding new data backref"
6557                                 " on %llu %s %llu owner %llu"
6558                                 " offset %llu found %d\n",
6559                                 (unsigned long long)rec->start,
6560                                 back->full_backref ?
6561                                 "parent" : "root",
6562                                 back->full_backref ?
6563                                 (unsigned long long)parent :
6564                                 (unsigned long long)dback->root,
6565                                 (unsigned long long)dback->owner,
6566                                 (unsigned long long)dback->offset,
6567                                 dback->found_ref);
6568         } else {
6569                 u64 parent;
6570
6571                 tback = (struct tree_backref *)back;
6572                 if (back->full_backref)
6573                         parent = tback->parent;
6574                 else
6575                         parent = 0;
6576
6577                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6578                                            rec->start, rec->max_size,
6579                                            parent, tback->root, 0, 0);
6580                 fprintf(stderr, "adding new tree backref on "
6581                         "start %llu len %llu parent %llu root %llu\n",
6582                         rec->start, rec->max_size, parent, tback->root);
6583         }
6584 fail:
6585         btrfs_release_path(path);
6586         return ret;
6587 }
6588
6589 static struct extent_entry *find_entry(struct list_head *entries,
6590                                        u64 bytenr, u64 bytes)
6591 {
6592         struct extent_entry *entry = NULL;
6593
6594         list_for_each_entry(entry, entries, list) {
6595                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6596                         return entry;
6597         }
6598
6599         return NULL;
6600 }
6601
6602 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6603 {
6604         struct extent_entry *entry, *best = NULL, *prev = NULL;
6605
6606         list_for_each_entry(entry, entries, list) {
6607                 if (!prev) {
6608                         prev = entry;
6609                         continue;
6610                 }
6611
6612                 /*
6613                  * If there are as many broken entries as entries then we know
6614                  * not to trust this particular entry.
6615                  */
6616                 if (entry->broken == entry->count)
6617                         continue;
6618
6619                 /*
6620                  * If our current entry == best then we can't be sure our best
6621                  * is really the best, so we need to keep searching.
6622                  */
6623                 if (best && best->count == entry->count) {
6624                         prev = entry;
6625                         best = NULL;
6626                         continue;
6627                 }
6628
6629                 /* Prev == entry, not good enough, have to keep searching */
6630                 if (!prev->broken && prev->count == entry->count)
6631                         continue;
6632
6633                 if (!best)
6634                         best = (prev->count > entry->count) ? prev : entry;
6635                 else if (best->count < entry->count)
6636                         best = entry;
6637                 prev = entry;
6638         }
6639
6640         return best;
6641 }
6642
6643 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6644                       struct data_backref *dback, struct extent_entry *entry)
6645 {
6646         struct btrfs_trans_handle *trans;
6647         struct btrfs_root *root;
6648         struct btrfs_file_extent_item *fi;
6649         struct extent_buffer *leaf;
6650         struct btrfs_key key;
6651         u64 bytenr, bytes;
6652         int ret, err;
6653
6654         key.objectid = dback->root;
6655         key.type = BTRFS_ROOT_ITEM_KEY;
6656         key.offset = (u64)-1;
6657         root = btrfs_read_fs_root(info, &key);
6658         if (IS_ERR(root)) {
6659                 fprintf(stderr, "Couldn't find root for our ref\n");
6660                 return -EINVAL;
6661         }
6662
6663         /*
6664          * The backref points to the original offset of the extent if it was
6665          * split, so we need to search down to the offset we have and then walk
6666          * forward until we find the backref we're looking for.
6667          */
6668         key.objectid = dback->owner;
6669         key.type = BTRFS_EXTENT_DATA_KEY;
6670         key.offset = dback->offset;
6671         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6672         if (ret < 0) {
6673                 fprintf(stderr, "Error looking up ref %d\n", ret);
6674                 return ret;
6675         }
6676
6677         while (1) {
6678                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6679                         ret = btrfs_next_leaf(root, path);
6680                         if (ret) {
6681                                 fprintf(stderr, "Couldn't find our ref, next\n");
6682                                 return -EINVAL;
6683                         }
6684                 }
6685                 leaf = path->nodes[0];
6686                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6687                 if (key.objectid != dback->owner ||
6688                     key.type != BTRFS_EXTENT_DATA_KEY) {
6689                         fprintf(stderr, "Couldn't find our ref, search\n");
6690                         return -EINVAL;
6691                 }
6692                 fi = btrfs_item_ptr(leaf, path->slots[0],
6693                                     struct btrfs_file_extent_item);
6694                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6695                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6696
6697                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6698                         break;
6699                 path->slots[0]++;
6700         }
6701
6702         btrfs_release_path(path);
6703
6704         trans = btrfs_start_transaction(root, 1);
6705         if (IS_ERR(trans))
6706                 return PTR_ERR(trans);
6707
6708         /*
6709          * Ok we have the key of the file extent we want to fix, now we can cow
6710          * down to the thing and fix it.
6711          */
6712         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6713         if (ret < 0) {
6714                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6715                         key.objectid, key.type, key.offset, ret);
6716                 goto out;
6717         }
6718         if (ret > 0) {
6719                 fprintf(stderr, "Well that's odd, we just found this key "
6720                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6721                         key.offset);
6722                 ret = -EINVAL;
6723                 goto out;
6724         }
6725         leaf = path->nodes[0];
6726         fi = btrfs_item_ptr(leaf, path->slots[0],
6727                             struct btrfs_file_extent_item);
6728
6729         if (btrfs_file_extent_compression(leaf, fi) &&
6730             dback->disk_bytenr != entry->bytenr) {
6731                 fprintf(stderr, "Ref doesn't match the record start and is "
6732                         "compressed, please take a btrfs-image of this file "
6733                         "system and send it to a btrfs developer so they can "
6734                         "complete this functionality for bytenr %Lu\n",
6735                         dback->disk_bytenr);
6736                 ret = -EINVAL;
6737                 goto out;
6738         }
6739
6740         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6741                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6742         } else if (dback->disk_bytenr > entry->bytenr) {
6743                 u64 off_diff, offset;
6744
6745                 off_diff = dback->disk_bytenr - entry->bytenr;
6746                 offset = btrfs_file_extent_offset(leaf, fi);
6747                 if (dback->disk_bytenr + offset +
6748                     btrfs_file_extent_num_bytes(leaf, fi) >
6749                     entry->bytenr + entry->bytes) {
6750                         fprintf(stderr, "Ref is past the entry end, please "
6751                                 "take a btrfs-image of this file system and "
6752                                 "send it to a btrfs developer, ref %Lu\n",
6753                                 dback->disk_bytenr);
6754                         ret = -EINVAL;
6755                         goto out;
6756                 }
6757                 offset += off_diff;
6758                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6759                 btrfs_set_file_extent_offset(leaf, fi, offset);
6760         } else if (dback->disk_bytenr < entry->bytenr) {
6761                 u64 offset;
6762
6763                 offset = btrfs_file_extent_offset(leaf, fi);
6764                 if (dback->disk_bytenr + offset < entry->bytenr) {
6765                         fprintf(stderr, "Ref is before the entry start, please"
6766                                 " take a btrfs-image of this file system and "
6767                                 "send it to a btrfs developer, ref %Lu\n",
6768                                 dback->disk_bytenr);
6769                         ret = -EINVAL;
6770                         goto out;
6771                 }
6772
6773                 offset += dback->disk_bytenr;
6774                 offset -= entry->bytenr;
6775                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6776                 btrfs_set_file_extent_offset(leaf, fi, offset);
6777         }
6778
6779         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6780
6781         /*
6782          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6783          * only do this if we aren't using compression, otherwise it's a
6784          * trickier case.
6785          */
6786         if (!btrfs_file_extent_compression(leaf, fi))
6787                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6788         else
6789                 printf("ram bytes may be wrong?\n");
6790         btrfs_mark_buffer_dirty(leaf);
6791 out:
6792         err = btrfs_commit_transaction(trans, root);
6793         btrfs_release_path(path);
6794         return ret ? ret : err;
6795 }
6796
6797 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6798                            struct extent_record *rec)
6799 {
6800         struct extent_backref *back;
6801         struct data_backref *dback;
6802         struct extent_entry *entry, *best = NULL;
6803         LIST_HEAD(entries);
6804         int nr_entries = 0;
6805         int broken_entries = 0;
6806         int ret = 0;
6807         short mismatch = 0;
6808
6809         /*
6810          * Metadata is easy and the backrefs should always agree on bytenr and
6811          * size, if not we've got bigger issues.
6812          */
6813         if (rec->metadata)
6814                 return 0;
6815
6816         list_for_each_entry(back, &rec->backrefs, list) {
6817                 if (back->full_backref || !back->is_data)
6818                         continue;
6819
6820                 dback = (struct data_backref *)back;
6821
6822                 /*
6823                  * We only pay attention to backrefs that we found a real
6824                  * backref for.
6825                  */
6826                 if (dback->found_ref == 0)
6827                         continue;
6828
6829                 /*
6830                  * For now we only catch when the bytes don't match, not the
6831                  * bytenr.  We can easily do this at the same time, but I want
6832                  * to have a fs image to test on before we just add repair
6833                  * functionality willy-nilly so we know we won't screw up the
6834                  * repair.
6835                  */
6836
6837                 entry = find_entry(&entries, dback->disk_bytenr,
6838                                    dback->bytes);
6839                 if (!entry) {
6840                         entry = malloc(sizeof(struct extent_entry));
6841                         if (!entry) {
6842                                 ret = -ENOMEM;
6843                                 goto out;
6844                         }
6845                         memset(entry, 0, sizeof(*entry));
6846                         entry->bytenr = dback->disk_bytenr;
6847                         entry->bytes = dback->bytes;
6848                         list_add_tail(&entry->list, &entries);
6849                         nr_entries++;
6850                 }
6851
6852                 /*
6853                  * If we only have on entry we may think the entries agree when
6854                  * in reality they don't so we have to do some extra checking.
6855                  */
6856                 if (dback->disk_bytenr != rec->start ||
6857                     dback->bytes != rec->nr || back->broken)
6858                         mismatch = 1;
6859
6860                 if (back->broken) {
6861                         entry->broken++;
6862                         broken_entries++;
6863                 }
6864
6865                 entry->count++;
6866         }
6867
6868         /* Yay all the backrefs agree, carry on good sir */
6869         if (nr_entries <= 1 && !mismatch)
6870                 goto out;
6871
6872         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6873                 "%Lu\n", rec->start);
6874
6875         /*
6876          * First we want to see if the backrefs can agree amongst themselves who
6877          * is right, so figure out which one of the entries has the highest
6878          * count.
6879          */
6880         best = find_most_right_entry(&entries);
6881
6882         /*
6883          * Ok so we may have an even split between what the backrefs think, so
6884          * this is where we use the extent ref to see what it thinks.
6885          */
6886         if (!best) {
6887                 entry = find_entry(&entries, rec->start, rec->nr);
6888                 if (!entry && (!broken_entries || !rec->found_rec)) {
6889                         fprintf(stderr, "Backrefs don't agree with each other "
6890                                 "and extent record doesn't agree with anybody,"
6891                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6892                                 rec->start, rec->nr);
6893                         ret = -EINVAL;
6894                         goto out;
6895                 } else if (!entry) {
6896                         /*
6897                          * Ok our backrefs were broken, we'll assume this is the
6898                          * correct value and add an entry for this range.
6899                          */
6900                         entry = malloc(sizeof(struct extent_entry));
6901                         if (!entry) {
6902                                 ret = -ENOMEM;
6903                                 goto out;
6904                         }
6905                         memset(entry, 0, sizeof(*entry));
6906                         entry->bytenr = rec->start;
6907                         entry->bytes = rec->nr;
6908                         list_add_tail(&entry->list, &entries);
6909                         nr_entries++;
6910                 }
6911                 entry->count++;
6912                 best = find_most_right_entry(&entries);
6913                 if (!best) {
6914                         fprintf(stderr, "Backrefs and extent record evenly "
6915                                 "split on who is right, this is going to "
6916                                 "require user input to fix bytenr %Lu bytes "
6917                                 "%Lu\n", rec->start, rec->nr);
6918                         ret = -EINVAL;
6919                         goto out;
6920                 }
6921         }
6922
6923         /*
6924          * I don't think this can happen currently as we'll abort() if we catch
6925          * this case higher up, but in case somebody removes that we still can't
6926          * deal with it properly here yet, so just bail out of that's the case.
6927          */
6928         if (best->bytenr != rec->start) {
6929                 fprintf(stderr, "Extent start and backref starts don't match, "
6930                         "please use btrfs-image on this file system and send "
6931                         "it to a btrfs developer so they can make fsck fix "
6932                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6933                         rec->start, rec->nr);
6934                 ret = -EINVAL;
6935                 goto out;
6936         }
6937
6938         /*
6939          * Ok great we all agreed on an extent record, let's go find the real
6940          * references and fix up the ones that don't match.
6941          */
6942         list_for_each_entry(back, &rec->backrefs, list) {
6943                 if (back->full_backref || !back->is_data)
6944                         continue;
6945
6946                 dback = (struct data_backref *)back;
6947
6948                 /*
6949                  * Still ignoring backrefs that don't have a real ref attached
6950                  * to them.
6951                  */
6952                 if (dback->found_ref == 0)
6953                         continue;
6954
6955                 if (dback->bytes == best->bytes &&
6956                     dback->disk_bytenr == best->bytenr)
6957                         continue;
6958
6959                 ret = repair_ref(info, path, dback, best);
6960                 if (ret)
6961                         goto out;
6962         }
6963
6964         /*
6965          * Ok we messed with the actual refs, which means we need to drop our
6966          * entire cache and go back and rescan.  I know this is a huge pain and
6967          * adds a lot of extra work, but it's the only way to be safe.  Once all
6968          * the backrefs agree we may not need to do anything to the extent
6969          * record itself.
6970          */
6971         ret = -EAGAIN;
6972 out:
6973         while (!list_empty(&entries)) {
6974                 entry = list_entry(entries.next, struct extent_entry, list);
6975                 list_del_init(&entry->list);
6976                 free(entry);
6977         }
6978         return ret;
6979 }
6980
6981 static int process_duplicates(struct btrfs_root *root,
6982                               struct cache_tree *extent_cache,
6983                               struct extent_record *rec)
6984 {
6985         struct extent_record *good, *tmp;
6986         struct cache_extent *cache;
6987         int ret;
6988
6989         /*
6990          * If we found a extent record for this extent then return, or if we
6991          * have more than one duplicate we are likely going to need to delete
6992          * something.
6993          */
6994         if (rec->found_rec || rec->num_duplicates > 1)
6995                 return 0;
6996
6997         /* Shouldn't happen but just in case */
6998         BUG_ON(!rec->num_duplicates);
6999
7000         /*
7001          * So this happens if we end up with a backref that doesn't match the
7002          * actual extent entry.  So either the backref is bad or the extent
7003          * entry is bad.  Either way we want to have the extent_record actually
7004          * reflect what we found in the extent_tree, so we need to take the
7005          * duplicate out and use that as the extent_record since the only way we
7006          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7007          */
7008         remove_cache_extent(extent_cache, &rec->cache);
7009
7010         good = list_entry(rec->dups.next, struct extent_record, list);
7011         list_del_init(&good->list);
7012         INIT_LIST_HEAD(&good->backrefs);
7013         INIT_LIST_HEAD(&good->dups);
7014         good->cache.start = good->start;
7015         good->cache.size = good->nr;
7016         good->content_checked = 0;
7017         good->owner_ref_checked = 0;
7018         good->num_duplicates = 0;
7019         good->refs = rec->refs;
7020         list_splice_init(&rec->backrefs, &good->backrefs);
7021         while (1) {
7022                 cache = lookup_cache_extent(extent_cache, good->start,
7023                                             good->nr);
7024                 if (!cache)
7025                         break;
7026                 tmp = container_of(cache, struct extent_record, cache);
7027
7028                 /*
7029                  * If we find another overlapping extent and it's found_rec is
7030                  * set then it's a duplicate and we need to try and delete
7031                  * something.
7032                  */
7033                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7034                         if (list_empty(&good->list))
7035                                 list_add_tail(&good->list,
7036                                               &duplicate_extents);
7037                         good->num_duplicates += tmp->num_duplicates + 1;
7038                         list_splice_init(&tmp->dups, &good->dups);
7039                         list_del_init(&tmp->list);
7040                         list_add_tail(&tmp->list, &good->dups);
7041                         remove_cache_extent(extent_cache, &tmp->cache);
7042                         continue;
7043                 }
7044
7045                 /*
7046                  * Ok we have another non extent item backed extent rec, so lets
7047                  * just add it to this extent and carry on like we did above.
7048                  */
7049                 good->refs += tmp->refs;
7050                 list_splice_init(&tmp->backrefs, &good->backrefs);
7051                 remove_cache_extent(extent_cache, &tmp->cache);
7052                 free(tmp);
7053         }
7054         ret = insert_cache_extent(extent_cache, &good->cache);
7055         BUG_ON(ret);
7056         free(rec);
7057         return good->num_duplicates ? 0 : 1;
7058 }
7059
7060 static int delete_duplicate_records(struct btrfs_root *root,
7061                                     struct extent_record *rec)
7062 {
7063         struct btrfs_trans_handle *trans;
7064         LIST_HEAD(delete_list);
7065         struct btrfs_path *path;
7066         struct extent_record *tmp, *good, *n;
7067         int nr_del = 0;
7068         int ret = 0, err;
7069         struct btrfs_key key;
7070
7071         path = btrfs_alloc_path();
7072         if (!path) {
7073                 ret = -ENOMEM;
7074                 goto out;
7075         }
7076
7077         good = rec;
7078         /* Find the record that covers all of the duplicates. */
7079         list_for_each_entry(tmp, &rec->dups, list) {
7080                 if (good->start < tmp->start)
7081                         continue;
7082                 if (good->nr > tmp->nr)
7083                         continue;
7084
7085                 if (tmp->start + tmp->nr < good->start + good->nr) {
7086                         fprintf(stderr, "Ok we have overlapping extents that "
7087                                 "aren't completely covered by eachother, this "
7088                                 "is going to require more careful thought.  "
7089                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7090                                 tmp->start, tmp->nr, good->start, good->nr);
7091                         abort();
7092                 }
7093                 good = tmp;
7094         }
7095
7096         if (good != rec)
7097                 list_add_tail(&rec->list, &delete_list);
7098
7099         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7100                 if (tmp == good)
7101                         continue;
7102                 list_move_tail(&tmp->list, &delete_list);
7103         }
7104
7105         root = root->fs_info->extent_root;
7106         trans = btrfs_start_transaction(root, 1);
7107         if (IS_ERR(trans)) {
7108                 ret = PTR_ERR(trans);
7109                 goto out;
7110         }
7111
7112         list_for_each_entry(tmp, &delete_list, list) {
7113                 if (tmp->found_rec == 0)
7114                         continue;
7115                 key.objectid = tmp->start;
7116                 key.type = BTRFS_EXTENT_ITEM_KEY;
7117                 key.offset = tmp->nr;
7118
7119                 /* Shouldn't happen but just in case */
7120                 if (tmp->metadata) {
7121                         fprintf(stderr, "Well this shouldn't happen, extent "
7122                                 "record overlaps but is metadata? "
7123                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7124                         abort();
7125                 }
7126
7127                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7128                 if (ret) {
7129                         if (ret > 0)
7130                                 ret = -EINVAL;
7131                         break;
7132                 }
7133                 ret = btrfs_del_item(trans, root, path);
7134                 if (ret)
7135                         break;
7136                 btrfs_release_path(path);
7137                 nr_del++;
7138         }
7139         err = btrfs_commit_transaction(trans, root);
7140         if (err && !ret)
7141                 ret = err;
7142 out:
7143         while (!list_empty(&delete_list)) {
7144                 tmp = list_entry(delete_list.next, struct extent_record, list);
7145                 list_del_init(&tmp->list);
7146                 if (tmp == rec)
7147                         continue;
7148                 free(tmp);
7149         }
7150
7151         while (!list_empty(&rec->dups)) {
7152                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7153                 list_del_init(&tmp->list);
7154                 free(tmp);
7155         }
7156
7157         btrfs_free_path(path);
7158
7159         if (!ret && !nr_del)
7160                 rec->num_duplicates = 0;
7161
7162         return ret ? ret : nr_del;
7163 }
7164
7165 static int find_possible_backrefs(struct btrfs_fs_info *info,
7166                                   struct btrfs_path *path,
7167                                   struct cache_tree *extent_cache,
7168                                   struct extent_record *rec)
7169 {
7170         struct btrfs_root *root;
7171         struct extent_backref *back;
7172         struct data_backref *dback;
7173         struct cache_extent *cache;
7174         struct btrfs_file_extent_item *fi;
7175         struct btrfs_key key;
7176         u64 bytenr, bytes;
7177         int ret;
7178
7179         list_for_each_entry(back, &rec->backrefs, list) {
7180                 /* Don't care about full backrefs (poor unloved backrefs) */
7181                 if (back->full_backref || !back->is_data)
7182                         continue;
7183
7184                 dback = (struct data_backref *)back;
7185
7186                 /* We found this one, we don't need to do a lookup */
7187                 if (dback->found_ref)
7188                         continue;
7189
7190                 key.objectid = dback->root;
7191                 key.type = BTRFS_ROOT_ITEM_KEY;
7192                 key.offset = (u64)-1;
7193
7194                 root = btrfs_read_fs_root(info, &key);
7195
7196                 /* No root, definitely a bad ref, skip */
7197                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7198                         continue;
7199                 /* Other err, exit */
7200                 if (IS_ERR(root))
7201                         return PTR_ERR(root);
7202
7203                 key.objectid = dback->owner;
7204                 key.type = BTRFS_EXTENT_DATA_KEY;
7205                 key.offset = dback->offset;
7206                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7207                 if (ret) {
7208                         btrfs_release_path(path);
7209                         if (ret < 0)
7210                                 return ret;
7211                         /* Didn't find it, we can carry on */
7212                         ret = 0;
7213                         continue;
7214                 }
7215
7216                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7217                                     struct btrfs_file_extent_item);
7218                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7219                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7220                 btrfs_release_path(path);
7221                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7222                 if (cache) {
7223                         struct extent_record *tmp;
7224                         tmp = container_of(cache, struct extent_record, cache);
7225
7226                         /*
7227                          * If we found an extent record for the bytenr for this
7228                          * particular backref then we can't add it to our
7229                          * current extent record.  We only want to add backrefs
7230                          * that don't have a corresponding extent item in the
7231                          * extent tree since they likely belong to this record
7232                          * and we need to fix it if it doesn't match bytenrs.
7233                          */
7234                         if  (tmp->found_rec)
7235                                 continue;
7236                 }
7237
7238                 dback->found_ref += 1;
7239                 dback->disk_bytenr = bytenr;
7240                 dback->bytes = bytes;
7241
7242                 /*
7243                  * Set this so the verify backref code knows not to trust the
7244                  * values in this backref.
7245                  */
7246                 back->broken = 1;
7247         }
7248
7249         return 0;
7250 }
7251
7252 /*
7253  * Record orphan data ref into corresponding root.
7254  *
7255  * Return 0 if the extent item contains data ref and recorded.
7256  * Return 1 if the extent item contains no useful data ref
7257  *   On that case, it may contains only shared_dataref or metadata backref
7258  *   or the file extent exists(this should be handled by the extent bytenr
7259  *   recovery routine)
7260  * Return <0 if something goes wrong.
7261  */
7262 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7263                                       struct extent_record *rec)
7264 {
7265         struct btrfs_key key;
7266         struct btrfs_root *dest_root;
7267         struct extent_backref *back;
7268         struct data_backref *dback;
7269         struct orphan_data_extent *orphan;
7270         struct btrfs_path *path;
7271         int recorded_data_ref = 0;
7272         int ret = 0;
7273
7274         if (rec->metadata)
7275                 return 1;
7276         path = btrfs_alloc_path();
7277         if (!path)
7278                 return -ENOMEM;
7279         list_for_each_entry(back, &rec->backrefs, list) {
7280                 if (back->full_backref || !back->is_data ||
7281                     !back->found_extent_tree)
7282                         continue;
7283                 dback = (struct data_backref *)back;
7284                 if (dback->found_ref)
7285                         continue;
7286                 key.objectid = dback->root;
7287                 key.type = BTRFS_ROOT_ITEM_KEY;
7288                 key.offset = (u64)-1;
7289
7290                 dest_root = btrfs_read_fs_root(fs_info, &key);
7291
7292                 /* For non-exist root we just skip it */
7293                 if (IS_ERR(dest_root) || !dest_root)
7294                         continue;
7295
7296                 key.objectid = dback->owner;
7297                 key.type = BTRFS_EXTENT_DATA_KEY;
7298                 key.offset = dback->offset;
7299
7300                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7301                 /*
7302                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7303                  * we need to record it for inode/file extent rebuild.
7304                  * For ret > 0, we record it only for file extent rebuild.
7305                  * For ret == 0, the file extent exists but only bytenr
7306                  * mismatch, let the original bytenr fix routine to handle,
7307                  * don't record it.
7308                  */
7309                 if (ret == 0)
7310                         continue;
7311                 ret = 0;
7312                 orphan = malloc(sizeof(*orphan));
7313                 if (!orphan) {
7314                         ret = -ENOMEM;
7315                         goto out;
7316                 }
7317                 INIT_LIST_HEAD(&orphan->list);
7318                 orphan->root = dback->root;
7319                 orphan->objectid = dback->owner;
7320                 orphan->offset = dback->offset;
7321                 orphan->disk_bytenr = rec->cache.start;
7322                 orphan->disk_len = rec->cache.size;
7323                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7324                 recorded_data_ref = 1;
7325         }
7326 out:
7327         btrfs_free_path(path);
7328         if (!ret)
7329                 return !recorded_data_ref;
7330         else
7331                 return ret;
7332 }
7333
7334 /*
7335  * when an incorrect extent item is found, this will delete
7336  * all of the existing entries for it and recreate them
7337  * based on what the tree scan found.
7338  */
7339 static int fixup_extent_refs(struct btrfs_fs_info *info,
7340                              struct cache_tree *extent_cache,
7341                              struct extent_record *rec)
7342 {
7343         struct btrfs_trans_handle *trans = NULL;
7344         int ret;
7345         struct btrfs_path *path;
7346         struct list_head *cur = rec->backrefs.next;
7347         struct cache_extent *cache;
7348         struct extent_backref *back;
7349         int allocated = 0;
7350         u64 flags = 0;
7351
7352         if (rec->flag_block_full_backref)
7353                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7354
7355         path = btrfs_alloc_path();
7356         if (!path)
7357                 return -ENOMEM;
7358
7359         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7360                 /*
7361                  * Sometimes the backrefs themselves are so broken they don't
7362                  * get attached to any meaningful rec, so first go back and
7363                  * check any of our backrefs that we couldn't find and throw
7364                  * them into the list if we find the backref so that
7365                  * verify_backrefs can figure out what to do.
7366                  */
7367                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7368                 if (ret < 0)
7369                         goto out;
7370         }
7371
7372         /* step one, make sure all of the backrefs agree */
7373         ret = verify_backrefs(info, path, rec);
7374         if (ret < 0)
7375                 goto out;
7376
7377         trans = btrfs_start_transaction(info->extent_root, 1);
7378         if (IS_ERR(trans)) {
7379                 ret = PTR_ERR(trans);
7380                 goto out;
7381         }
7382
7383         /* step two, delete all the existing records */
7384         ret = delete_extent_records(trans, info->extent_root, path,
7385                                     rec->start, rec->max_size);
7386
7387         if (ret < 0)
7388                 goto out;
7389
7390         /* was this block corrupt?  If so, don't add references to it */
7391         cache = lookup_cache_extent(info->corrupt_blocks,
7392                                     rec->start, rec->max_size);
7393         if (cache) {
7394                 ret = 0;
7395                 goto out;
7396         }
7397
7398         /* step three, recreate all the refs we did find */
7399         while(cur != &rec->backrefs) {
7400                 back = list_entry(cur, struct extent_backref, list);
7401                 cur = cur->next;
7402
7403                 /*
7404                  * if we didn't find any references, don't create a
7405                  * new extent record
7406                  */
7407                 if (!back->found_ref)
7408                         continue;
7409
7410                 rec->bad_full_backref = 0;
7411                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7412                 allocated = 1;
7413
7414                 if (ret)
7415                         goto out;
7416         }
7417 out:
7418         if (trans) {
7419                 int err = btrfs_commit_transaction(trans, info->extent_root);
7420                 if (!ret)
7421                         ret = err;
7422         }
7423
7424         btrfs_free_path(path);
7425         return ret;
7426 }
7427
7428 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7429                               struct extent_record *rec)
7430 {
7431         struct btrfs_trans_handle *trans;
7432         struct btrfs_root *root = fs_info->extent_root;
7433         struct btrfs_path *path;
7434         struct btrfs_extent_item *ei;
7435         struct btrfs_key key;
7436         u64 flags;
7437         int ret = 0;
7438
7439         key.objectid = rec->start;
7440         if (rec->metadata) {
7441                 key.type = BTRFS_METADATA_ITEM_KEY;
7442                 key.offset = rec->info_level;
7443         } else {
7444                 key.type = BTRFS_EXTENT_ITEM_KEY;
7445                 key.offset = rec->max_size;
7446         }
7447
7448         path = btrfs_alloc_path();
7449         if (!path)
7450                 return -ENOMEM;
7451
7452         trans = btrfs_start_transaction(root, 0);
7453         if (IS_ERR(trans)) {
7454                 btrfs_free_path(path);
7455                 return PTR_ERR(trans);
7456         }
7457
7458         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7459         if (ret < 0) {
7460                 btrfs_free_path(path);
7461                 btrfs_commit_transaction(trans, root);
7462                 return ret;
7463         } else if (ret) {
7464                 fprintf(stderr, "Didn't find extent for %llu\n",
7465                         (unsigned long long)rec->start);
7466                 btrfs_free_path(path);
7467                 btrfs_commit_transaction(trans, root);
7468                 return -ENOENT;
7469         }
7470
7471         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7472                             struct btrfs_extent_item);
7473         flags = btrfs_extent_flags(path->nodes[0], ei);
7474         if (rec->flag_block_full_backref) {
7475                 fprintf(stderr, "setting full backref on %llu\n",
7476                         (unsigned long long)key.objectid);
7477                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7478         } else {
7479                 fprintf(stderr, "clearing full backref on %llu\n",
7480                         (unsigned long long)key.objectid);
7481                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7482         }
7483         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7484         btrfs_mark_buffer_dirty(path->nodes[0]);
7485         btrfs_free_path(path);
7486         return btrfs_commit_transaction(trans, root);
7487 }
7488
7489 /* right now we only prune from the extent allocation tree */
7490 static int prune_one_block(struct btrfs_trans_handle *trans,
7491                            struct btrfs_fs_info *info,
7492                            struct btrfs_corrupt_block *corrupt)
7493 {
7494         int ret;
7495         struct btrfs_path path;
7496         struct extent_buffer *eb;
7497         u64 found;
7498         int slot;
7499         int nritems;
7500         int level = corrupt->level + 1;
7501
7502         btrfs_init_path(&path);
7503 again:
7504         /* we want to stop at the parent to our busted block */
7505         path.lowest_level = level;
7506
7507         ret = btrfs_search_slot(trans, info->extent_root,
7508                                 &corrupt->key, &path, -1, 1);
7509
7510         if (ret < 0)
7511                 goto out;
7512
7513         eb = path.nodes[level];
7514         if (!eb) {
7515                 ret = -ENOENT;
7516                 goto out;
7517         }
7518
7519         /*
7520          * hopefully the search gave us the block we want to prune,
7521          * lets try that first
7522          */
7523         slot = path.slots[level];
7524         found =  btrfs_node_blockptr(eb, slot);
7525         if (found == corrupt->cache.start)
7526                 goto del_ptr;
7527
7528         nritems = btrfs_header_nritems(eb);
7529
7530         /* the search failed, lets scan this node and hope we find it */
7531         for (slot = 0; slot < nritems; slot++) {
7532                 found =  btrfs_node_blockptr(eb, slot);
7533                 if (found == corrupt->cache.start)
7534                         goto del_ptr;
7535         }
7536         /*
7537          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7538          * to this block
7539          */
7540         if (eb == info->extent_root->node) {
7541                 ret = -ENOENT;
7542                 goto out;
7543         } else {
7544                 level++;
7545                 btrfs_release_path(&path);
7546                 goto again;
7547         }
7548
7549 del_ptr:
7550         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7551         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7552
7553 out:
7554         btrfs_release_path(&path);
7555         return ret;
7556 }
7557
7558 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7559 {
7560         struct btrfs_trans_handle *trans = NULL;
7561         struct cache_extent *cache;
7562         struct btrfs_corrupt_block *corrupt;
7563
7564         while (1) {
7565                 cache = search_cache_extent(info->corrupt_blocks, 0);
7566                 if (!cache)
7567                         break;
7568                 if (!trans) {
7569                         trans = btrfs_start_transaction(info->extent_root, 1);
7570                         if (IS_ERR(trans))
7571                                 return PTR_ERR(trans);
7572                 }
7573                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7574                 prune_one_block(trans, info, corrupt);
7575                 remove_cache_extent(info->corrupt_blocks, cache);
7576         }
7577         if (trans)
7578                 return btrfs_commit_transaction(trans, info->extent_root);
7579         return 0;
7580 }
7581
7582 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7583 {
7584         struct btrfs_block_group_cache *cache;
7585         u64 start, end;
7586         int ret;
7587
7588         while (1) {
7589                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7590                                             &start, &end, EXTENT_DIRTY);
7591                 if (ret)
7592                         break;
7593                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7594                                    GFP_NOFS);
7595         }
7596
7597         start = 0;
7598         while (1) {
7599                 cache = btrfs_lookup_first_block_group(fs_info, start);
7600                 if (!cache)
7601                         break;
7602                 if (cache->cached)
7603                         cache->cached = 0;
7604                 start = cache->key.objectid + cache->key.offset;
7605         }
7606 }
7607
7608 static int check_extent_refs(struct btrfs_root *root,
7609                              struct cache_tree *extent_cache)
7610 {
7611         struct extent_record *rec;
7612         struct cache_extent *cache;
7613         int err = 0;
7614         int ret = 0;
7615         int fixed = 0;
7616         int had_dups = 0;
7617         int recorded = 0;
7618
7619         if (repair) {
7620                 /*
7621                  * if we're doing a repair, we have to make sure
7622                  * we don't allocate from the problem extents.
7623                  * In the worst case, this will be all the
7624                  * extents in the FS
7625                  */
7626                 cache = search_cache_extent(extent_cache, 0);
7627                 while(cache) {
7628                         rec = container_of(cache, struct extent_record, cache);
7629                         set_extent_dirty(root->fs_info->excluded_extents,
7630                                          rec->start,
7631                                          rec->start + rec->max_size - 1,
7632                                          GFP_NOFS);
7633                         cache = next_cache_extent(cache);
7634                 }
7635
7636                 /* pin down all the corrupted blocks too */
7637                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7638                 while(cache) {
7639                         set_extent_dirty(root->fs_info->excluded_extents,
7640                                          cache->start,
7641                                          cache->start + cache->size - 1,
7642                                          GFP_NOFS);
7643                         cache = next_cache_extent(cache);
7644                 }
7645                 prune_corrupt_blocks(root->fs_info);
7646                 reset_cached_block_groups(root->fs_info);
7647         }
7648
7649         reset_cached_block_groups(root->fs_info);
7650
7651         /*
7652          * We need to delete any duplicate entries we find first otherwise we
7653          * could mess up the extent tree when we have backrefs that actually
7654          * belong to a different extent item and not the weird duplicate one.
7655          */
7656         while (repair && !list_empty(&duplicate_extents)) {
7657                 rec = list_entry(duplicate_extents.next, struct extent_record,
7658                                  list);
7659                 list_del_init(&rec->list);
7660
7661                 /* Sometimes we can find a backref before we find an actual
7662                  * extent, so we need to process it a little bit to see if there
7663                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7664                  * if this is a backref screwup.  If we need to delete stuff
7665                  * process_duplicates() will return 0, otherwise it will return
7666                  * 1 and we
7667                  */
7668                 if (process_duplicates(root, extent_cache, rec))
7669                         continue;
7670                 ret = delete_duplicate_records(root, rec);
7671                 if (ret < 0)
7672                         return ret;
7673                 /*
7674                  * delete_duplicate_records will return the number of entries
7675                  * deleted, so if it's greater than 0 then we know we actually
7676                  * did something and we need to remove.
7677                  */
7678                 if (ret)
7679                         had_dups = 1;
7680         }
7681
7682         if (had_dups)
7683                 return -EAGAIN;
7684
7685         while(1) {
7686                 int cur_err = 0;
7687
7688                 fixed = 0;
7689                 recorded = 0;
7690                 cache = search_cache_extent(extent_cache, 0);
7691                 if (!cache)
7692                         break;
7693                 rec = container_of(cache, struct extent_record, cache);
7694                 if (rec->num_duplicates) {
7695                         fprintf(stderr, "extent item %llu has multiple extent "
7696                                 "items\n", (unsigned long long)rec->start);
7697                         err = 1;
7698                         cur_err = 1;
7699                 }
7700
7701                 if (rec->refs != rec->extent_item_refs) {
7702                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7703                                 (unsigned long long)rec->start,
7704                                 (unsigned long long)rec->nr);
7705                         fprintf(stderr, "extent item %llu, found %llu\n",
7706                                 (unsigned long long)rec->extent_item_refs,
7707                                 (unsigned long long)rec->refs);
7708                         ret = record_orphan_data_extents(root->fs_info, rec);
7709                         if (ret < 0)
7710                                 goto repair_abort;
7711                         if (ret == 0) {
7712                                 recorded = 1;
7713                         } else {
7714                                 /*
7715                                  * we can't use the extent to repair file
7716                                  * extent, let the fallback method handle it.
7717                                  */
7718                                 if (!fixed && repair) {
7719                                         ret = fixup_extent_refs(
7720                                                         root->fs_info,
7721                                                         extent_cache, rec);
7722                                         if (ret)
7723                                                 goto repair_abort;
7724                                         fixed = 1;
7725                                 }
7726                         }
7727                         err = 1;
7728                         cur_err = 1;
7729                 }
7730                 if (all_backpointers_checked(rec, 1)) {
7731                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7732                                 (unsigned long long)rec->start,
7733                                 (unsigned long long)rec->nr);
7734
7735                         if (!fixed && !recorded && repair) {
7736                                 ret = fixup_extent_refs(root->fs_info,
7737                                                         extent_cache, rec);
7738                                 if (ret)
7739                                         goto repair_abort;
7740                                 fixed = 1;
7741                         }
7742                         cur_err = 1;
7743                         err = 1;
7744                 }
7745                 if (!rec->owner_ref_checked) {
7746                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7747                                 (unsigned long long)rec->start,
7748                                 (unsigned long long)rec->nr);
7749                         if (!fixed && !recorded && repair) {
7750                                 ret = fixup_extent_refs(root->fs_info,
7751                                                         extent_cache, rec);
7752                                 if (ret)
7753                                         goto repair_abort;
7754                                 fixed = 1;
7755                         }
7756                         err = 1;
7757                         cur_err = 1;
7758                 }
7759                 if (rec->bad_full_backref) {
7760                         fprintf(stderr, "bad full backref, on [%llu]\n",
7761                                 (unsigned long long)rec->start);
7762                         if (repair) {
7763                                 ret = fixup_extent_flags(root->fs_info, rec);
7764                                 if (ret)
7765                                         goto repair_abort;
7766                                 fixed = 1;
7767                         }
7768                         err = 1;
7769                         cur_err = 1;
7770                 }
7771                 /*
7772                  * Although it's not a extent ref's problem, we reuse this
7773                  * routine for error reporting.
7774                  * No repair function yet.
7775                  */
7776                 if (rec->crossing_stripes) {
7777                         fprintf(stderr,
7778                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7779                                 rec->start, rec->start + rec->max_size);
7780                         err = 1;
7781                         cur_err = 1;
7782                 }
7783
7784                 if (rec->wrong_chunk_type) {
7785                         fprintf(stderr,
7786                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7787                                 rec->start, rec->start + rec->max_size);
7788                         err = 1;
7789                         cur_err = 1;
7790                 }
7791
7792                 remove_cache_extent(extent_cache, cache);
7793                 free_all_extent_backrefs(rec);
7794                 if (!init_extent_tree && repair && (!cur_err || fixed))
7795                         clear_extent_dirty(root->fs_info->excluded_extents,
7796                                            rec->start,
7797                                            rec->start + rec->max_size - 1,
7798                                            GFP_NOFS);
7799                 free(rec);
7800         }
7801 repair_abort:
7802         if (repair) {
7803                 if (ret && ret != -EAGAIN) {
7804                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7805                         exit(1);
7806                 } else if (!ret) {
7807                         struct btrfs_trans_handle *trans;
7808
7809                         root = root->fs_info->extent_root;
7810                         trans = btrfs_start_transaction(root, 1);
7811                         if (IS_ERR(trans)) {
7812                                 ret = PTR_ERR(trans);
7813                                 goto repair_abort;
7814                         }
7815
7816                         btrfs_fix_block_accounting(trans, root);
7817                         ret = btrfs_commit_transaction(trans, root);
7818                         if (ret)
7819                                 goto repair_abort;
7820                 }
7821                 if (err)
7822                         fprintf(stderr, "repaired damaged extent references\n");
7823                 return ret;
7824         }
7825         return err;
7826 }
7827
7828 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7829 {
7830         u64 stripe_size;
7831
7832         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7833                 stripe_size = length;
7834                 stripe_size /= num_stripes;
7835         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7836                 stripe_size = length * 2;
7837                 stripe_size /= num_stripes;
7838         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7839                 stripe_size = length;
7840                 stripe_size /= (num_stripes - 1);
7841         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7842                 stripe_size = length;
7843                 stripe_size /= (num_stripes - 2);
7844         } else {
7845                 stripe_size = length;
7846         }
7847         return stripe_size;
7848 }
7849
7850 /*
7851  * Check the chunk with its block group/dev list ref:
7852  * Return 0 if all refs seems valid.
7853  * Return 1 if part of refs seems valid, need later check for rebuild ref
7854  * like missing block group and needs to search extent tree to rebuild them.
7855  * Return -1 if essential refs are missing and unable to rebuild.
7856  */
7857 static int check_chunk_refs(struct chunk_record *chunk_rec,
7858                             struct block_group_tree *block_group_cache,
7859                             struct device_extent_tree *dev_extent_cache,
7860                             int silent)
7861 {
7862         struct cache_extent *block_group_item;
7863         struct block_group_record *block_group_rec;
7864         struct cache_extent *dev_extent_item;
7865         struct device_extent_record *dev_extent_rec;
7866         u64 devid;
7867         u64 offset;
7868         u64 length;
7869         int metadump_v2 = 0;
7870         int i;
7871         int ret = 0;
7872
7873         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7874                                                chunk_rec->offset,
7875                                                chunk_rec->length);
7876         if (block_group_item) {
7877                 block_group_rec = container_of(block_group_item,
7878                                                struct block_group_record,
7879                                                cache);
7880                 if (chunk_rec->length != block_group_rec->offset ||
7881                     chunk_rec->offset != block_group_rec->objectid ||
7882                     (!metadump_v2 &&
7883                      chunk_rec->type_flags != block_group_rec->flags)) {
7884                         if (!silent)
7885                                 fprintf(stderr,
7886                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7887                                         chunk_rec->objectid,
7888                                         chunk_rec->type,
7889                                         chunk_rec->offset,
7890                                         chunk_rec->length,
7891                                         chunk_rec->offset,
7892                                         chunk_rec->type_flags,
7893                                         block_group_rec->objectid,
7894                                         block_group_rec->type,
7895                                         block_group_rec->offset,
7896                                         block_group_rec->offset,
7897                                         block_group_rec->objectid,
7898                                         block_group_rec->flags);
7899                         ret = -1;
7900                 } else {
7901                         list_del_init(&block_group_rec->list);
7902                         chunk_rec->bg_rec = block_group_rec;
7903                 }
7904         } else {
7905                 if (!silent)
7906                         fprintf(stderr,
7907                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7908                                 chunk_rec->objectid,
7909                                 chunk_rec->type,
7910                                 chunk_rec->offset,
7911                                 chunk_rec->length,
7912                                 chunk_rec->offset,
7913                                 chunk_rec->type_flags);
7914                 ret = 1;
7915         }
7916
7917         if (metadump_v2)
7918                 return ret;
7919
7920         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7921                                     chunk_rec->num_stripes);
7922         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7923                 devid = chunk_rec->stripes[i].devid;
7924                 offset = chunk_rec->stripes[i].offset;
7925                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7926                                                        devid, offset, length);
7927                 if (dev_extent_item) {
7928                         dev_extent_rec = container_of(dev_extent_item,
7929                                                 struct device_extent_record,
7930                                                 cache);
7931                         if (dev_extent_rec->objectid != devid ||
7932                             dev_extent_rec->offset != offset ||
7933                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7934                             dev_extent_rec->length != length) {
7935                                 if (!silent)
7936                                         fprintf(stderr,
7937                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7938                                                 chunk_rec->objectid,
7939                                                 chunk_rec->type,
7940                                                 chunk_rec->offset,
7941                                                 chunk_rec->stripes[i].devid,
7942                                                 chunk_rec->stripes[i].offset,
7943                                                 dev_extent_rec->objectid,
7944                                                 dev_extent_rec->offset,
7945                                                 dev_extent_rec->length);
7946                                 ret = -1;
7947                         } else {
7948                                 list_move(&dev_extent_rec->chunk_list,
7949                                           &chunk_rec->dextents);
7950                         }
7951                 } else {
7952                         if (!silent)
7953                                 fprintf(stderr,
7954                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7955                                         chunk_rec->objectid,
7956                                         chunk_rec->type,
7957                                         chunk_rec->offset,
7958                                         chunk_rec->stripes[i].devid,
7959                                         chunk_rec->stripes[i].offset);
7960                         ret = -1;
7961                 }
7962         }
7963         return ret;
7964 }
7965
7966 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7967 int check_chunks(struct cache_tree *chunk_cache,
7968                  struct block_group_tree *block_group_cache,
7969                  struct device_extent_tree *dev_extent_cache,
7970                  struct list_head *good, struct list_head *bad,
7971                  struct list_head *rebuild, int silent)
7972 {
7973         struct cache_extent *chunk_item;
7974         struct chunk_record *chunk_rec;
7975         struct block_group_record *bg_rec;
7976         struct device_extent_record *dext_rec;
7977         int err;
7978         int ret = 0;
7979
7980         chunk_item = first_cache_extent(chunk_cache);
7981         while (chunk_item) {
7982                 chunk_rec = container_of(chunk_item, struct chunk_record,
7983                                          cache);
7984                 err = check_chunk_refs(chunk_rec, block_group_cache,
7985                                        dev_extent_cache, silent);
7986                 if (err < 0)
7987                         ret = err;
7988                 if (err == 0 && good)
7989                         list_add_tail(&chunk_rec->list, good);
7990                 if (err > 0 && rebuild)
7991                         list_add_tail(&chunk_rec->list, rebuild);
7992                 if (err < 0 && bad)
7993                         list_add_tail(&chunk_rec->list, bad);
7994                 chunk_item = next_cache_extent(chunk_item);
7995         }
7996
7997         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7998                 if (!silent)
7999                         fprintf(stderr,
8000                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8001                                 bg_rec->objectid,
8002                                 bg_rec->offset,
8003                                 bg_rec->flags);
8004                 if (!ret)
8005                         ret = 1;
8006         }
8007
8008         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8009                             chunk_list) {
8010                 if (!silent)
8011                         fprintf(stderr,
8012                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8013                                 dext_rec->objectid,
8014                                 dext_rec->offset,
8015                                 dext_rec->length);
8016                 if (!ret)
8017                         ret = 1;
8018         }
8019         return ret;
8020 }
8021
8022
8023 static int check_device_used(struct device_record *dev_rec,
8024                              struct device_extent_tree *dext_cache)
8025 {
8026         struct cache_extent *cache;
8027         struct device_extent_record *dev_extent_rec;
8028         u64 total_byte = 0;
8029
8030         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8031         while (cache) {
8032                 dev_extent_rec = container_of(cache,
8033                                               struct device_extent_record,
8034                                               cache);
8035                 if (dev_extent_rec->objectid != dev_rec->devid)
8036                         break;
8037
8038                 list_del_init(&dev_extent_rec->device_list);
8039                 total_byte += dev_extent_rec->length;
8040                 cache = next_cache_extent(cache);
8041         }
8042
8043         if (total_byte != dev_rec->byte_used) {
8044                 fprintf(stderr,
8045                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8046                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8047                         dev_rec->type, dev_rec->offset);
8048                 return -1;
8049         } else {
8050                 return 0;
8051         }
8052 }
8053
8054 /* check btrfs_dev_item -> btrfs_dev_extent */
8055 static int check_devices(struct rb_root *dev_cache,
8056                          struct device_extent_tree *dev_extent_cache)
8057 {
8058         struct rb_node *dev_node;
8059         struct device_record *dev_rec;
8060         struct device_extent_record *dext_rec;
8061         int err;
8062         int ret = 0;
8063
8064         dev_node = rb_first(dev_cache);
8065         while (dev_node) {
8066                 dev_rec = container_of(dev_node, struct device_record, node);
8067                 err = check_device_used(dev_rec, dev_extent_cache);
8068                 if (err)
8069                         ret = err;
8070
8071                 dev_node = rb_next(dev_node);
8072         }
8073         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8074                             device_list) {
8075                 fprintf(stderr,
8076                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8077                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8078                 if (!ret)
8079                         ret = 1;
8080         }
8081         return ret;
8082 }
8083
8084 static int add_root_item_to_list(struct list_head *head,
8085                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8086                                   u8 level, u8 drop_level,
8087                                   int level_size, struct btrfs_key *drop_key)
8088 {
8089
8090         struct root_item_record *ri_rec;
8091         ri_rec = malloc(sizeof(*ri_rec));
8092         if (!ri_rec)
8093                 return -ENOMEM;
8094         ri_rec->bytenr = bytenr;
8095         ri_rec->objectid = objectid;
8096         ri_rec->level = level;
8097         ri_rec->level_size = level_size;
8098         ri_rec->drop_level = drop_level;
8099         ri_rec->last_snapshot = last_snapshot;
8100         if (drop_key)
8101                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8102         list_add_tail(&ri_rec->list, head);
8103
8104         return 0;
8105 }
8106
8107 static void free_root_item_list(struct list_head *list)
8108 {
8109         struct root_item_record *ri_rec;
8110
8111         while (!list_empty(list)) {
8112                 ri_rec = list_first_entry(list, struct root_item_record,
8113                                           list);
8114                 list_del_init(&ri_rec->list);
8115                 free(ri_rec);
8116         }
8117 }
8118
8119 static int deal_root_from_list(struct list_head *list,
8120                                struct btrfs_root *root,
8121                                struct block_info *bits,
8122                                int bits_nr,
8123                                struct cache_tree *pending,
8124                                struct cache_tree *seen,
8125                                struct cache_tree *reada,
8126                                struct cache_tree *nodes,
8127                                struct cache_tree *extent_cache,
8128                                struct cache_tree *chunk_cache,
8129                                struct rb_root *dev_cache,
8130                                struct block_group_tree *block_group_cache,
8131                                struct device_extent_tree *dev_extent_cache)
8132 {
8133         int ret = 0;
8134         u64 last;
8135
8136         while (!list_empty(list)) {
8137                 struct root_item_record *rec;
8138                 struct extent_buffer *buf;
8139                 rec = list_entry(list->next,
8140                                  struct root_item_record, list);
8141                 last = 0;
8142                 buf = read_tree_block(root->fs_info->tree_root,
8143                                       rec->bytenr, rec->level_size, 0);
8144                 if (!extent_buffer_uptodate(buf)) {
8145                         free_extent_buffer(buf);
8146                         ret = -EIO;
8147                         break;
8148                 }
8149                 add_root_to_pending(buf, extent_cache, pending,
8150                                     seen, nodes, rec->objectid);
8151                 /*
8152                  * To rebuild extent tree, we need deal with snapshot
8153                  * one by one, otherwise we deal with node firstly which
8154                  * can maximize readahead.
8155                  */
8156                 while (1) {
8157                         ret = run_next_block(root, bits, bits_nr, &last,
8158                                              pending, seen, reada, nodes,
8159                                              extent_cache, chunk_cache,
8160                                              dev_cache, block_group_cache,
8161                                              dev_extent_cache, rec);
8162                         if (ret != 0)
8163                                 break;
8164                 }
8165                 free_extent_buffer(buf);
8166                 list_del(&rec->list);
8167                 free(rec);
8168                 if (ret < 0)
8169                         break;
8170         }
8171         while (ret >= 0) {
8172                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8173                                      reada, nodes, extent_cache, chunk_cache,
8174                                      dev_cache, block_group_cache,
8175                                      dev_extent_cache, NULL);
8176                 if (ret != 0) {
8177                         if (ret > 0)
8178                                 ret = 0;
8179                         break;
8180                 }
8181         }
8182         return ret;
8183 }
8184
8185 static int check_chunks_and_extents(struct btrfs_root *root)
8186 {
8187         struct rb_root dev_cache;
8188         struct cache_tree chunk_cache;
8189         struct block_group_tree block_group_cache;
8190         struct device_extent_tree dev_extent_cache;
8191         struct cache_tree extent_cache;
8192         struct cache_tree seen;
8193         struct cache_tree pending;
8194         struct cache_tree reada;
8195         struct cache_tree nodes;
8196         struct extent_io_tree excluded_extents;
8197         struct cache_tree corrupt_blocks;
8198         struct btrfs_path path;
8199         struct btrfs_key key;
8200         struct btrfs_key found_key;
8201         int ret, err = 0;
8202         struct block_info *bits;
8203         int bits_nr;
8204         struct extent_buffer *leaf;
8205         int slot;
8206         struct btrfs_root_item ri;
8207         struct list_head dropping_trees;
8208         struct list_head normal_trees;
8209         struct btrfs_root *root1;
8210         u64 objectid;
8211         u32 level_size;
8212         u8 level;
8213
8214         dev_cache = RB_ROOT;
8215         cache_tree_init(&chunk_cache);
8216         block_group_tree_init(&block_group_cache);
8217         device_extent_tree_init(&dev_extent_cache);
8218
8219         cache_tree_init(&extent_cache);
8220         cache_tree_init(&seen);
8221         cache_tree_init(&pending);
8222         cache_tree_init(&nodes);
8223         cache_tree_init(&reada);
8224         cache_tree_init(&corrupt_blocks);
8225         extent_io_tree_init(&excluded_extents);
8226         INIT_LIST_HEAD(&dropping_trees);
8227         INIT_LIST_HEAD(&normal_trees);
8228
8229         if (repair) {
8230                 root->fs_info->excluded_extents = &excluded_extents;
8231                 root->fs_info->fsck_extent_cache = &extent_cache;
8232                 root->fs_info->free_extent_hook = free_extent_hook;
8233                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8234         }
8235
8236         bits_nr = 1024;
8237         bits = malloc(bits_nr * sizeof(struct block_info));
8238         if (!bits) {
8239                 perror("malloc");
8240                 exit(1);
8241         }
8242
8243         if (ctx.progress_enabled) {
8244                 ctx.tp = TASK_EXTENTS;
8245                 task_start(ctx.info);
8246         }
8247
8248 again:
8249         root1 = root->fs_info->tree_root;
8250         level = btrfs_header_level(root1->node);
8251         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8252                                     root1->node->start, 0, level, 0,
8253                                     root1->nodesize, NULL);
8254         if (ret < 0)
8255                 goto out;
8256         root1 = root->fs_info->chunk_root;
8257         level = btrfs_header_level(root1->node);
8258         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8259                                     root1->node->start, 0, level, 0,
8260                                     root1->nodesize, NULL);
8261         if (ret < 0)
8262                 goto out;
8263         btrfs_init_path(&path);
8264         key.offset = 0;
8265         key.objectid = 0;
8266         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8267         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8268                                         &key, &path, 0, 0);
8269         if (ret < 0)
8270                 goto out;
8271         while(1) {
8272                 leaf = path.nodes[0];
8273                 slot = path.slots[0];
8274                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8275                         ret = btrfs_next_leaf(root, &path);
8276                         if (ret != 0)
8277                                 break;
8278                         leaf = path.nodes[0];
8279                         slot = path.slots[0];
8280                 }
8281                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8282                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8283                         unsigned long offset;
8284                         u64 last_snapshot;
8285
8286                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8287                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8288                         last_snapshot = btrfs_root_last_snapshot(&ri);
8289                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8290                                 level = btrfs_root_level(&ri);
8291                                 level_size = root->nodesize;
8292                                 ret = add_root_item_to_list(&normal_trees,
8293                                                 found_key.objectid,
8294                                                 btrfs_root_bytenr(&ri),
8295                                                 last_snapshot, level,
8296                                                 0, level_size, NULL);
8297                                 if (ret < 0)
8298                                         goto out;
8299                         } else {
8300                                 level = btrfs_root_level(&ri);
8301                                 level_size = root->nodesize;
8302                                 objectid = found_key.objectid;
8303                                 btrfs_disk_key_to_cpu(&found_key,
8304                                                       &ri.drop_progress);
8305                                 ret = add_root_item_to_list(&dropping_trees,
8306                                                 objectid,
8307                                                 btrfs_root_bytenr(&ri),
8308                                                 last_snapshot, level,
8309                                                 ri.drop_level,
8310                                                 level_size, &found_key);
8311                                 if (ret < 0)
8312                                         goto out;
8313                         }
8314                 }
8315                 path.slots[0]++;
8316         }
8317         btrfs_release_path(&path);
8318
8319         /*
8320          * check_block can return -EAGAIN if it fixes something, please keep
8321          * this in mind when dealing with return values from these functions, if
8322          * we get -EAGAIN we want to fall through and restart the loop.
8323          */
8324         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8325                                   &seen, &reada, &nodes, &extent_cache,
8326                                   &chunk_cache, &dev_cache, &block_group_cache,
8327                                   &dev_extent_cache);
8328         if (ret < 0) {
8329                 if (ret == -EAGAIN)
8330                         goto loop;
8331                 goto out;
8332         }
8333         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8334                                   &pending, &seen, &reada, &nodes,
8335                                   &extent_cache, &chunk_cache, &dev_cache,
8336                                   &block_group_cache, &dev_extent_cache);
8337         if (ret < 0) {
8338                 if (ret == -EAGAIN)
8339                         goto loop;
8340                 goto out;
8341         }
8342
8343         ret = check_chunks(&chunk_cache, &block_group_cache,
8344                            &dev_extent_cache, NULL, NULL, NULL, 0);
8345         if (ret) {
8346                 if (ret == -EAGAIN)
8347                         goto loop;
8348                 err = ret;
8349         }
8350
8351         ret = check_extent_refs(root, &extent_cache);
8352         if (ret < 0) {
8353                 if (ret == -EAGAIN)
8354                         goto loop;
8355                 goto out;
8356         }
8357
8358         ret = check_devices(&dev_cache, &dev_extent_cache);
8359         if (ret && err)
8360                 ret = err;
8361
8362 out:
8363         task_stop(ctx.info);
8364         if (repair) {
8365                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8366                 extent_io_tree_cleanup(&excluded_extents);
8367                 root->fs_info->fsck_extent_cache = NULL;
8368                 root->fs_info->free_extent_hook = NULL;
8369                 root->fs_info->corrupt_blocks = NULL;
8370                 root->fs_info->excluded_extents = NULL;
8371         }
8372         free(bits);
8373         free_chunk_cache_tree(&chunk_cache);
8374         free_device_cache_tree(&dev_cache);
8375         free_block_group_tree(&block_group_cache);
8376         free_device_extent_tree(&dev_extent_cache);
8377         free_extent_cache_tree(&seen);
8378         free_extent_cache_tree(&pending);
8379         free_extent_cache_tree(&reada);
8380         free_extent_cache_tree(&nodes);
8381         return ret;
8382 loop:
8383         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8384         free_extent_cache_tree(&seen);
8385         free_extent_cache_tree(&pending);
8386         free_extent_cache_tree(&reada);
8387         free_extent_cache_tree(&nodes);
8388         free_chunk_cache_tree(&chunk_cache);
8389         free_block_group_tree(&block_group_cache);
8390         free_device_cache_tree(&dev_cache);
8391         free_device_extent_tree(&dev_extent_cache);
8392         free_extent_record_cache(root->fs_info, &extent_cache);
8393         free_root_item_list(&normal_trees);
8394         free_root_item_list(&dropping_trees);
8395         extent_io_tree_cleanup(&excluded_extents);
8396         goto again;
8397 }
8398
8399 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8400                            struct btrfs_root *root, int overwrite)
8401 {
8402         struct extent_buffer *c;
8403         struct extent_buffer *old = root->node;
8404         int level;
8405         int ret;
8406         struct btrfs_disk_key disk_key = {0,0,0};
8407
8408         level = 0;
8409
8410         if (overwrite) {
8411                 c = old;
8412                 extent_buffer_get(c);
8413                 goto init;
8414         }
8415         c = btrfs_alloc_free_block(trans, root,
8416                                    root->nodesize,
8417                                    root->root_key.objectid,
8418                                    &disk_key, level, 0, 0);
8419         if (IS_ERR(c)) {
8420                 c = old;
8421                 extent_buffer_get(c);
8422                 overwrite = 1;
8423         }
8424 init:
8425         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8426         btrfs_set_header_level(c, level);
8427         btrfs_set_header_bytenr(c, c->start);
8428         btrfs_set_header_generation(c, trans->transid);
8429         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8430         btrfs_set_header_owner(c, root->root_key.objectid);
8431
8432         write_extent_buffer(c, root->fs_info->fsid,
8433                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8434
8435         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8436                             btrfs_header_chunk_tree_uuid(c),
8437                             BTRFS_UUID_SIZE);
8438
8439         btrfs_mark_buffer_dirty(c);
8440         /*
8441          * this case can happen in the following case:
8442          *
8443          * 1.overwrite previous root.
8444          *
8445          * 2.reinit reloc data root, this is because we skip pin
8446          * down reloc data tree before which means we can allocate
8447          * same block bytenr here.
8448          */
8449         if (old->start == c->start) {
8450                 btrfs_set_root_generation(&root->root_item,
8451                                           trans->transid);
8452                 root->root_item.level = btrfs_header_level(root->node);
8453                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8454                                         &root->root_key, &root->root_item);
8455                 if (ret) {
8456                         free_extent_buffer(c);
8457                         return ret;
8458                 }
8459         }
8460         free_extent_buffer(old);
8461         root->node = c;
8462         add_root_to_dirty_list(root);
8463         return 0;
8464 }
8465
8466 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8467                                 struct extent_buffer *eb, int tree_root)
8468 {
8469         struct extent_buffer *tmp;
8470         struct btrfs_root_item *ri;
8471         struct btrfs_key key;
8472         u64 bytenr;
8473         u32 nodesize;
8474         int level = btrfs_header_level(eb);
8475         int nritems;
8476         int ret;
8477         int i;
8478
8479         /*
8480          * If we have pinned this block before, don't pin it again.
8481          * This can not only avoid forever loop with broken filesystem
8482          * but also give us some speedups.
8483          */
8484         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8485                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8486                 return 0;
8487
8488         btrfs_pin_extent(fs_info, eb->start, eb->len);
8489
8490         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8491         nritems = btrfs_header_nritems(eb);
8492         for (i = 0; i < nritems; i++) {
8493                 if (level == 0) {
8494                         btrfs_item_key_to_cpu(eb, &key, i);
8495                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8496                                 continue;
8497                         /* Skip the extent root and reloc roots */
8498                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8499                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8500                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8501                                 continue;
8502                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8503                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8504
8505                         /*
8506                          * If at any point we start needing the real root we
8507                          * will have to build a stump root for the root we are
8508                          * in, but for now this doesn't actually use the root so
8509                          * just pass in extent_root.
8510                          */
8511                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8512                                               nodesize, 0);
8513                         if (!extent_buffer_uptodate(tmp)) {
8514                                 fprintf(stderr, "Error reading root block\n");
8515                                 return -EIO;
8516                         }
8517                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8518                         free_extent_buffer(tmp);
8519                         if (ret)
8520                                 return ret;
8521                 } else {
8522                         bytenr = btrfs_node_blockptr(eb, i);
8523
8524                         /* If we aren't the tree root don't read the block */
8525                         if (level == 1 && !tree_root) {
8526                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8527                                 continue;
8528                         }
8529
8530                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8531                                               nodesize, 0);
8532                         if (!extent_buffer_uptodate(tmp)) {
8533                                 fprintf(stderr, "Error reading tree block\n");
8534                                 return -EIO;
8535                         }
8536                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8537                         free_extent_buffer(tmp);
8538                         if (ret)
8539                                 return ret;
8540                 }
8541         }
8542
8543         return 0;
8544 }
8545
8546 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8547 {
8548         int ret;
8549
8550         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8551         if (ret)
8552                 return ret;
8553
8554         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8555 }
8556
8557 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8558 {
8559         struct btrfs_block_group_cache *cache;
8560         struct btrfs_path *path;
8561         struct extent_buffer *leaf;
8562         struct btrfs_chunk *chunk;
8563         struct btrfs_key key;
8564         int ret;
8565         u64 start;
8566
8567         path = btrfs_alloc_path();
8568         if (!path)
8569                 return -ENOMEM;
8570
8571         key.objectid = 0;
8572         key.type = BTRFS_CHUNK_ITEM_KEY;
8573         key.offset = 0;
8574
8575         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8576         if (ret < 0) {
8577                 btrfs_free_path(path);
8578                 return ret;
8579         }
8580
8581         /*
8582          * We do this in case the block groups were screwed up and had alloc
8583          * bits that aren't actually set on the chunks.  This happens with
8584          * restored images every time and could happen in real life I guess.
8585          */
8586         fs_info->avail_data_alloc_bits = 0;
8587         fs_info->avail_metadata_alloc_bits = 0;
8588         fs_info->avail_system_alloc_bits = 0;
8589
8590         /* First we need to create the in-memory block groups */
8591         while (1) {
8592                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8593                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8594                         if (ret < 0) {
8595                                 btrfs_free_path(path);
8596                                 return ret;
8597                         }
8598                         if (ret) {
8599                                 ret = 0;
8600                                 break;
8601                         }
8602                 }
8603                 leaf = path->nodes[0];
8604                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8605                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8606                         path->slots[0]++;
8607                         continue;
8608                 }
8609
8610                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8611                                        struct btrfs_chunk);
8612                 btrfs_add_block_group(fs_info, 0,
8613                                       btrfs_chunk_type(leaf, chunk),
8614                                       key.objectid, key.offset,
8615                                       btrfs_chunk_length(leaf, chunk));
8616                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8617                                  key.offset + btrfs_chunk_length(leaf, chunk),
8618                                  GFP_NOFS);
8619                 path->slots[0]++;
8620         }
8621         start = 0;
8622         while (1) {
8623                 cache = btrfs_lookup_first_block_group(fs_info, start);
8624                 if (!cache)
8625                         break;
8626                 cache->cached = 1;
8627                 start = cache->key.objectid + cache->key.offset;
8628         }
8629
8630         btrfs_free_path(path);
8631         return 0;
8632 }
8633
8634 static int reset_balance(struct btrfs_trans_handle *trans,
8635                          struct btrfs_fs_info *fs_info)
8636 {
8637         struct btrfs_root *root = fs_info->tree_root;
8638         struct btrfs_path *path;
8639         struct extent_buffer *leaf;
8640         struct btrfs_key key;
8641         int del_slot, del_nr = 0;
8642         int ret;
8643         int found = 0;
8644
8645         path = btrfs_alloc_path();
8646         if (!path)
8647                 return -ENOMEM;
8648
8649         key.objectid = BTRFS_BALANCE_OBJECTID;
8650         key.type = BTRFS_BALANCE_ITEM_KEY;
8651         key.offset = 0;
8652
8653         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8654         if (ret) {
8655                 if (ret > 0)
8656                         ret = 0;
8657                 if (!ret)
8658                         goto reinit_data_reloc;
8659                 else
8660                         goto out;
8661         }
8662
8663         ret = btrfs_del_item(trans, root, path);
8664         if (ret)
8665                 goto out;
8666         btrfs_release_path(path);
8667
8668         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8669         key.type = BTRFS_ROOT_ITEM_KEY;
8670         key.offset = 0;
8671
8672         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8673         if (ret < 0)
8674                 goto out;
8675         while (1) {
8676                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8677                         if (!found)
8678                                 break;
8679
8680                         if (del_nr) {
8681                                 ret = btrfs_del_items(trans, root, path,
8682                                                       del_slot, del_nr);
8683                                 del_nr = 0;
8684                                 if (ret)
8685                                         goto out;
8686                         }
8687                         key.offset++;
8688                         btrfs_release_path(path);
8689
8690                         found = 0;
8691                         ret = btrfs_search_slot(trans, root, &key, path,
8692                                                 -1, 1);
8693                         if (ret < 0)
8694                                 goto out;
8695                         continue;
8696                 }
8697                 found = 1;
8698                 leaf = path->nodes[0];
8699                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8700                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8701                         break;
8702                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8703                         path->slots[0]++;
8704                         continue;
8705                 }
8706                 if (!del_nr) {
8707                         del_slot = path->slots[0];
8708                         del_nr = 1;
8709                 } else {
8710                         del_nr++;
8711                 }
8712                 path->slots[0]++;
8713         }
8714
8715         if (del_nr) {
8716                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8717                 if (ret)
8718                         goto out;
8719         }
8720         btrfs_release_path(path);
8721
8722 reinit_data_reloc:
8723         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8724         key.type = BTRFS_ROOT_ITEM_KEY;
8725         key.offset = (u64)-1;
8726         root = btrfs_read_fs_root(fs_info, &key);
8727         if (IS_ERR(root)) {
8728                 fprintf(stderr, "Error reading data reloc tree\n");
8729                 ret = PTR_ERR(root);
8730                 goto out;
8731         }
8732         record_root_in_trans(trans, root);
8733         ret = btrfs_fsck_reinit_root(trans, root, 0);
8734         if (ret)
8735                 goto out;
8736         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8737 out:
8738         btrfs_free_path(path);
8739         return ret;
8740 }
8741
8742 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8743                               struct btrfs_fs_info *fs_info)
8744 {
8745         u64 start = 0;
8746         int ret;
8747
8748         /*
8749          * The only reason we don't do this is because right now we're just
8750          * walking the trees we find and pinning down their bytes, we don't look
8751          * at any of the leaves.  In order to do mixed groups we'd have to check
8752          * the leaves of any fs roots and pin down the bytes for any file
8753          * extents we find.  Not hard but why do it if we don't have to?
8754          */
8755         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8756                 fprintf(stderr, "We don't support re-initing the extent tree "
8757                         "for mixed block groups yet, please notify a btrfs "
8758                         "developer you want to do this so they can add this "
8759                         "functionality.\n");
8760                 return -EINVAL;
8761         }
8762
8763         /*
8764          * first we need to walk all of the trees except the extent tree and pin
8765          * down the bytes that are in use so we don't overwrite any existing
8766          * metadata.
8767          */
8768         ret = pin_metadata_blocks(fs_info);
8769         if (ret) {
8770                 fprintf(stderr, "error pinning down used bytes\n");
8771                 return ret;
8772         }
8773
8774         /*
8775          * Need to drop all the block groups since we're going to recreate all
8776          * of them again.
8777          */
8778         btrfs_free_block_groups(fs_info);
8779         ret = reset_block_groups(fs_info);
8780         if (ret) {
8781                 fprintf(stderr, "error resetting the block groups\n");
8782                 return ret;
8783         }
8784
8785         /* Ok we can allocate now, reinit the extent root */
8786         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8787         if (ret) {
8788                 fprintf(stderr, "extent root initialization failed\n");
8789                 /*
8790                  * When the transaction code is updated we should end the
8791                  * transaction, but for now progs only knows about commit so
8792                  * just return an error.
8793                  */
8794                 return ret;
8795         }
8796
8797         /*
8798          * Now we have all the in-memory block groups setup so we can make
8799          * allocations properly, and the metadata we care about is safe since we
8800          * pinned all of it above.
8801          */
8802         while (1) {
8803                 struct btrfs_block_group_cache *cache;
8804
8805                 cache = btrfs_lookup_first_block_group(fs_info, start);
8806                 if (!cache)
8807                         break;
8808                 start = cache->key.objectid + cache->key.offset;
8809                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8810                                         &cache->key, &cache->item,
8811                                         sizeof(cache->item));
8812                 if (ret) {
8813                         fprintf(stderr, "Error adding block group\n");
8814                         return ret;
8815                 }
8816                 btrfs_extent_post_op(trans, fs_info->extent_root);
8817         }
8818
8819         ret = reset_balance(trans, fs_info);
8820         if (ret)
8821                 fprintf(stderr, "error reseting the pending balance\n");
8822
8823         return ret;
8824 }
8825
8826 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8827 {
8828         struct btrfs_path *path;
8829         struct btrfs_trans_handle *trans;
8830         struct btrfs_key key;
8831         int ret;
8832
8833         printf("Recowing metadata block %llu\n", eb->start);
8834         key.objectid = btrfs_header_owner(eb);
8835         key.type = BTRFS_ROOT_ITEM_KEY;
8836         key.offset = (u64)-1;
8837
8838         root = btrfs_read_fs_root(root->fs_info, &key);
8839         if (IS_ERR(root)) {
8840                 fprintf(stderr, "Couldn't find owner root %llu\n",
8841                         key.objectid);
8842                 return PTR_ERR(root);
8843         }
8844
8845         path = btrfs_alloc_path();
8846         if (!path)
8847                 return -ENOMEM;
8848
8849         trans = btrfs_start_transaction(root, 1);
8850         if (IS_ERR(trans)) {
8851                 btrfs_free_path(path);
8852                 return PTR_ERR(trans);
8853         }
8854
8855         path->lowest_level = btrfs_header_level(eb);
8856         if (path->lowest_level)
8857                 btrfs_node_key_to_cpu(eb, &key, 0);
8858         else
8859                 btrfs_item_key_to_cpu(eb, &key, 0);
8860
8861         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8862         btrfs_commit_transaction(trans, root);
8863         btrfs_free_path(path);
8864         return ret;
8865 }
8866
8867 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8868 {
8869         struct btrfs_path *path;
8870         struct btrfs_trans_handle *trans;
8871         struct btrfs_key key;
8872         int ret;
8873
8874         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8875                bad->key.type, bad->key.offset);
8876         key.objectid = bad->root_id;
8877         key.type = BTRFS_ROOT_ITEM_KEY;
8878         key.offset = (u64)-1;
8879
8880         root = btrfs_read_fs_root(root->fs_info, &key);
8881         if (IS_ERR(root)) {
8882                 fprintf(stderr, "Couldn't find owner root %llu\n",
8883                         key.objectid);
8884                 return PTR_ERR(root);
8885         }
8886
8887         path = btrfs_alloc_path();
8888         if (!path)
8889                 return -ENOMEM;
8890
8891         trans = btrfs_start_transaction(root, 1);
8892         if (IS_ERR(trans)) {
8893                 btrfs_free_path(path);
8894                 return PTR_ERR(trans);
8895         }
8896
8897         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8898         if (ret) {
8899                 if (ret > 0)
8900                         ret = 0;
8901                 goto out;
8902         }
8903         ret = btrfs_del_item(trans, root, path);
8904 out:
8905         btrfs_commit_transaction(trans, root);
8906         btrfs_free_path(path);
8907         return ret;
8908 }
8909
8910 static int zero_log_tree(struct btrfs_root *root)
8911 {
8912         struct btrfs_trans_handle *trans;
8913         int ret;
8914
8915         trans = btrfs_start_transaction(root, 1);
8916         if (IS_ERR(trans)) {
8917                 ret = PTR_ERR(trans);
8918                 return ret;
8919         }
8920         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8921         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8922         ret = btrfs_commit_transaction(trans, root);
8923         return ret;
8924 }
8925
8926 static int populate_csum(struct btrfs_trans_handle *trans,
8927                          struct btrfs_root *csum_root, char *buf, u64 start,
8928                          u64 len)
8929 {
8930         u64 offset = 0;
8931         u64 sectorsize;
8932         int ret = 0;
8933
8934         while (offset < len) {
8935                 sectorsize = csum_root->sectorsize;
8936                 ret = read_extent_data(csum_root, buf, start + offset,
8937                                        &sectorsize, 0);
8938                 if (ret)
8939                         break;
8940                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8941                                             start + offset, buf, sectorsize);
8942                 if (ret)
8943                         break;
8944                 offset += sectorsize;
8945         }
8946         return ret;
8947 }
8948
8949 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8950                                       struct btrfs_root *csum_root,
8951                                       struct btrfs_root *cur_root)
8952 {
8953         struct btrfs_path *path;
8954         struct btrfs_key key;
8955         struct extent_buffer *node;
8956         struct btrfs_file_extent_item *fi;
8957         char *buf = NULL;
8958         u64 start = 0;
8959         u64 len = 0;
8960         int slot = 0;
8961         int ret = 0;
8962
8963         path = btrfs_alloc_path();
8964         if (!path)
8965                 return -ENOMEM;
8966         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8967         if (!buf) {
8968                 ret = -ENOMEM;
8969                 goto out;
8970         }
8971
8972         key.objectid = 0;
8973         key.offset = 0;
8974         key.type = 0;
8975
8976         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8977         if (ret < 0)
8978                 goto out;
8979         /* Iterate all regular file extents and fill its csum */
8980         while (1) {
8981                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8982
8983                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8984                         goto next;
8985                 node = path->nodes[0];
8986                 slot = path->slots[0];
8987                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8988                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8989                         goto next;
8990                 start = btrfs_file_extent_disk_bytenr(node, fi);
8991                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8992
8993                 ret = populate_csum(trans, csum_root, buf, start, len);
8994                 if (ret == -EEXIST)
8995                         ret = 0;
8996                 if (ret < 0)
8997                         goto out;
8998 next:
8999                 /*
9000                  * TODO: if next leaf is corrupted, jump to nearest next valid
9001                  * leaf.
9002                  */
9003                 ret = btrfs_next_item(cur_root, path);
9004                 if (ret < 0)
9005                         goto out;
9006                 if (ret > 0) {
9007                         ret = 0;
9008                         goto out;
9009                 }
9010         }
9011
9012 out:
9013         btrfs_free_path(path);
9014         free(buf);
9015         return ret;
9016 }
9017
9018 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9019                                   struct btrfs_root *csum_root)
9020 {
9021         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9022         struct btrfs_path *path;
9023         struct btrfs_root *tree_root = fs_info->tree_root;
9024         struct btrfs_root *cur_root;
9025         struct extent_buffer *node;
9026         struct btrfs_key key;
9027         int slot = 0;
9028         int ret = 0;
9029
9030         path = btrfs_alloc_path();
9031         if (!path)
9032                 return -ENOMEM;
9033
9034         key.objectid = BTRFS_FS_TREE_OBJECTID;
9035         key.offset = 0;
9036         key.type = BTRFS_ROOT_ITEM_KEY;
9037
9038         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9039         if (ret < 0)
9040                 goto out;
9041         if (ret > 0) {
9042                 ret = -ENOENT;
9043                 goto out;
9044         }
9045
9046         while (1) {
9047                 node = path->nodes[0];
9048                 slot = path->slots[0];
9049                 btrfs_item_key_to_cpu(node, &key, slot);
9050                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9051                         goto out;
9052                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9053                         goto next;
9054                 if (!is_fstree(key.objectid))
9055                         goto next;
9056                 key.offset = (u64)-1;
9057
9058                 cur_root = btrfs_read_fs_root(fs_info, &key);
9059                 if (IS_ERR(cur_root) || !cur_root) {
9060                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9061                                 key.objectid);
9062                         goto out;
9063                 }
9064                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9065                                 cur_root);
9066                 if (ret < 0)
9067                         goto out;
9068 next:
9069                 ret = btrfs_next_item(tree_root, path);
9070                 if (ret > 0) {
9071                         ret = 0;
9072                         goto out;
9073                 }
9074                 if (ret < 0)
9075                         goto out;
9076         }
9077
9078 out:
9079         btrfs_free_path(path);
9080         return ret;
9081 }
9082
9083 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9084                                       struct btrfs_root *csum_root)
9085 {
9086         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9087         struct btrfs_path *path;
9088         struct btrfs_extent_item *ei;
9089         struct extent_buffer *leaf;
9090         char *buf;
9091         struct btrfs_key key;
9092         int ret;
9093
9094         path = btrfs_alloc_path();
9095         if (!path)
9096                 return -ENOMEM;
9097
9098         key.objectid = 0;
9099         key.type = BTRFS_EXTENT_ITEM_KEY;
9100         key.offset = 0;
9101
9102         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9103         if (ret < 0) {
9104                 btrfs_free_path(path);
9105                 return ret;
9106         }
9107
9108         buf = malloc(csum_root->sectorsize);
9109         if (!buf) {
9110                 btrfs_free_path(path);
9111                 return -ENOMEM;
9112         }
9113
9114         while (1) {
9115                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9116                         ret = btrfs_next_leaf(extent_root, path);
9117                         if (ret < 0)
9118                                 break;
9119                         if (ret) {
9120                                 ret = 0;
9121                                 break;
9122                         }
9123                 }
9124                 leaf = path->nodes[0];
9125
9126                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9127                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9128                         path->slots[0]++;
9129                         continue;
9130                 }
9131
9132                 ei = btrfs_item_ptr(leaf, path->slots[0],
9133                                     struct btrfs_extent_item);
9134                 if (!(btrfs_extent_flags(leaf, ei) &
9135                       BTRFS_EXTENT_FLAG_DATA)) {
9136                         path->slots[0]++;
9137                         continue;
9138                 }
9139
9140                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9141                                     key.offset);
9142                 if (ret)
9143                         break;
9144                 path->slots[0]++;
9145         }
9146
9147         btrfs_free_path(path);
9148         free(buf);
9149         return ret;
9150 }
9151
9152 /*
9153  * Recalculate the csum and put it into the csum tree.
9154  *
9155  * Extent tree init will wipe out all the extent info, so in that case, we
9156  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9157  * will use fs/subvol trees to init the csum tree.
9158  */
9159 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9160                           struct btrfs_root *csum_root,
9161                           int search_fs_tree)
9162 {
9163         if (search_fs_tree)
9164                 return fill_csum_tree_from_fs(trans, csum_root);
9165         else
9166                 return fill_csum_tree_from_extent(trans, csum_root);
9167 }
9168
9169 static void free_roots_info_cache(void)
9170 {
9171         if (!roots_info_cache)
9172                 return;
9173
9174         while (!cache_tree_empty(roots_info_cache)) {
9175                 struct cache_extent *entry;
9176                 struct root_item_info *rii;
9177
9178                 entry = first_cache_extent(roots_info_cache);
9179                 if (!entry)
9180                         break;
9181                 remove_cache_extent(roots_info_cache, entry);
9182                 rii = container_of(entry, struct root_item_info, cache_extent);
9183                 free(rii);
9184         }
9185
9186         free(roots_info_cache);
9187         roots_info_cache = NULL;
9188 }
9189
9190 static int build_roots_info_cache(struct btrfs_fs_info *info)
9191 {
9192         int ret = 0;
9193         struct btrfs_key key;
9194         struct extent_buffer *leaf;
9195         struct btrfs_path *path;
9196
9197         if (!roots_info_cache) {
9198                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9199                 if (!roots_info_cache)
9200                         return -ENOMEM;
9201                 cache_tree_init(roots_info_cache);
9202         }
9203
9204         path = btrfs_alloc_path();
9205         if (!path)
9206                 return -ENOMEM;
9207
9208         key.objectid = 0;
9209         key.type = BTRFS_EXTENT_ITEM_KEY;
9210         key.offset = 0;
9211
9212         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9213         if (ret < 0)
9214                 goto out;
9215         leaf = path->nodes[0];
9216
9217         while (1) {
9218                 struct btrfs_key found_key;
9219                 struct btrfs_extent_item *ei;
9220                 struct btrfs_extent_inline_ref *iref;
9221                 int slot = path->slots[0];
9222                 int type;
9223                 u64 flags;
9224                 u64 root_id;
9225                 u8 level;
9226                 struct cache_extent *entry;
9227                 struct root_item_info *rii;
9228
9229                 if (slot >= btrfs_header_nritems(leaf)) {
9230                         ret = btrfs_next_leaf(info->extent_root, path);
9231                         if (ret < 0) {
9232                                 break;
9233                         } else if (ret) {
9234                                 ret = 0;
9235                                 break;
9236                         }
9237                         leaf = path->nodes[0];
9238                         slot = path->slots[0];
9239                 }
9240
9241                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9242
9243                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9244                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9245                         goto next;
9246
9247                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9248                 flags = btrfs_extent_flags(leaf, ei);
9249
9250                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9251                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9252                         goto next;
9253
9254                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9255                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9256                         level = found_key.offset;
9257                 } else {
9258                         struct btrfs_tree_block_info *binfo;
9259
9260                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9261                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9262                         level = btrfs_tree_block_level(leaf, binfo);
9263                 }
9264
9265                 /*
9266                  * For a root extent, it must be of the following type and the
9267                  * first (and only one) iref in the item.
9268                  */
9269                 type = btrfs_extent_inline_ref_type(leaf, iref);
9270                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9271                         goto next;
9272
9273                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9274                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9275                 if (!entry) {
9276                         rii = malloc(sizeof(struct root_item_info));
9277                         if (!rii) {
9278                                 ret = -ENOMEM;
9279                                 goto out;
9280                         }
9281                         rii->cache_extent.start = root_id;
9282                         rii->cache_extent.size = 1;
9283                         rii->level = (u8)-1;
9284                         entry = &rii->cache_extent;
9285                         ret = insert_cache_extent(roots_info_cache, entry);
9286                         ASSERT(ret == 0);
9287                 } else {
9288                         rii = container_of(entry, struct root_item_info,
9289                                            cache_extent);
9290                 }
9291
9292                 ASSERT(rii->cache_extent.start == root_id);
9293                 ASSERT(rii->cache_extent.size == 1);
9294
9295                 if (level > rii->level || rii->level == (u8)-1) {
9296                         rii->level = level;
9297                         rii->bytenr = found_key.objectid;
9298                         rii->gen = btrfs_extent_generation(leaf, ei);
9299                         rii->node_count = 1;
9300                 } else if (level == rii->level) {
9301                         rii->node_count++;
9302                 }
9303 next:
9304                 path->slots[0]++;
9305         }
9306
9307 out:
9308         btrfs_free_path(path);
9309
9310         return ret;
9311 }
9312
9313 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9314                                   struct btrfs_path *path,
9315                                   const struct btrfs_key *root_key,
9316                                   const int read_only_mode)
9317 {
9318         const u64 root_id = root_key->objectid;
9319         struct cache_extent *entry;
9320         struct root_item_info *rii;
9321         struct btrfs_root_item ri;
9322         unsigned long offset;
9323
9324         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9325         if (!entry) {
9326                 fprintf(stderr,
9327                         "Error: could not find extent items for root %llu\n",
9328                         root_key->objectid);
9329                 return -ENOENT;
9330         }
9331
9332         rii = container_of(entry, struct root_item_info, cache_extent);
9333         ASSERT(rii->cache_extent.start == root_id);
9334         ASSERT(rii->cache_extent.size == 1);
9335
9336         if (rii->node_count != 1) {
9337                 fprintf(stderr,
9338                         "Error: could not find btree root extent for root %llu\n",
9339                         root_id);
9340                 return -ENOENT;
9341         }
9342
9343         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9344         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9345
9346         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9347             btrfs_root_level(&ri) != rii->level ||
9348             btrfs_root_generation(&ri) != rii->gen) {
9349
9350                 /*
9351                  * If we're in repair mode but our caller told us to not update
9352                  * the root item, i.e. just check if it needs to be updated, don't
9353                  * print this message, since the caller will call us again shortly
9354                  * for the same root item without read only mode (the caller will
9355                  * open a transaction first).
9356                  */
9357                 if (!(read_only_mode && repair))
9358                         fprintf(stderr,
9359                                 "%sroot item for root %llu,"
9360                                 " current bytenr %llu, current gen %llu, current level %u,"
9361                                 " new bytenr %llu, new gen %llu, new level %u\n",
9362                                 (read_only_mode ? "" : "fixing "),
9363                                 root_id,
9364                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9365                                 btrfs_root_level(&ri),
9366                                 rii->bytenr, rii->gen, rii->level);
9367
9368                 if (btrfs_root_generation(&ri) > rii->gen) {
9369                         fprintf(stderr,
9370                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9371                                 root_id, btrfs_root_generation(&ri), rii->gen);
9372                         return -EINVAL;
9373                 }
9374
9375                 if (!read_only_mode) {
9376                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9377                         btrfs_set_root_level(&ri, rii->level);
9378                         btrfs_set_root_generation(&ri, rii->gen);
9379                         write_extent_buffer(path->nodes[0], &ri,
9380                                             offset, sizeof(ri));
9381                 }
9382
9383                 return 1;
9384         }
9385
9386         return 0;
9387 }
9388
9389 /*
9390  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9391  * caused read-only snapshots to be corrupted if they were created at a moment
9392  * when the source subvolume/snapshot had orphan items. The issue was that the
9393  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9394  * node instead of the post orphan cleanup root node.
9395  * So this function, and its callees, just detects and fixes those cases. Even
9396  * though the regression was for read-only snapshots, this function applies to
9397  * any snapshot/subvolume root.
9398  * This must be run before any other repair code - not doing it so, makes other
9399  * repair code delete or modify backrefs in the extent tree for example, which
9400  * will result in an inconsistent fs after repairing the root items.
9401  */
9402 static int repair_root_items(struct btrfs_fs_info *info)
9403 {
9404         struct btrfs_path *path = NULL;
9405         struct btrfs_key key;
9406         struct extent_buffer *leaf;
9407         struct btrfs_trans_handle *trans = NULL;
9408         int ret = 0;
9409         int bad_roots = 0;
9410         int need_trans = 0;
9411
9412         ret = build_roots_info_cache(info);
9413         if (ret)
9414                 goto out;
9415
9416         path = btrfs_alloc_path();
9417         if (!path) {
9418                 ret = -ENOMEM;
9419                 goto out;
9420         }
9421
9422         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9423         key.type = BTRFS_ROOT_ITEM_KEY;
9424         key.offset = 0;
9425
9426 again:
9427         /*
9428          * Avoid opening and committing transactions if a leaf doesn't have
9429          * any root items that need to be fixed, so that we avoid rotating
9430          * backup roots unnecessarily.
9431          */
9432         if (need_trans) {
9433                 trans = btrfs_start_transaction(info->tree_root, 1);
9434                 if (IS_ERR(trans)) {
9435                         ret = PTR_ERR(trans);
9436                         goto out;
9437                 }
9438         }
9439
9440         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9441                                 0, trans ? 1 : 0);
9442         if (ret < 0)
9443                 goto out;
9444         leaf = path->nodes[0];
9445
9446         while (1) {
9447                 struct btrfs_key found_key;
9448
9449                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9450                         int no_more_keys = find_next_key(path, &key);
9451
9452                         btrfs_release_path(path);
9453                         if (trans) {
9454                                 ret = btrfs_commit_transaction(trans,
9455                                                                info->tree_root);
9456                                 trans = NULL;
9457                                 if (ret < 0)
9458                                         goto out;
9459                         }
9460                         need_trans = 0;
9461                         if (no_more_keys)
9462                                 break;
9463                         goto again;
9464                 }
9465
9466                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9467
9468                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9469                         goto next;
9470                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9471                         goto next;
9472
9473                 ret = maybe_repair_root_item(info, path, &found_key,
9474                                              trans ? 0 : 1);
9475                 if (ret < 0)
9476                         goto out;
9477                 if (ret) {
9478                         if (!trans && repair) {
9479                                 need_trans = 1;
9480                                 key = found_key;
9481                                 btrfs_release_path(path);
9482                                 goto again;
9483                         }
9484                         bad_roots++;
9485                 }
9486 next:
9487                 path->slots[0]++;
9488         }
9489         ret = 0;
9490 out:
9491         free_roots_info_cache();
9492         btrfs_free_path(path);
9493         if (trans)
9494                 btrfs_commit_transaction(trans, info->tree_root);
9495         if (ret < 0)
9496                 return ret;
9497
9498         return bad_roots;
9499 }
9500
9501 const char * const cmd_check_usage[] = {
9502         "btrfs check [options] <device>",
9503         "Check structural inegrity of a filesystem (unmounted).",
9504         "Check structural inegrity of an unmounted filesystem. Verify internal",
9505         "trees' consistency and item connectivity. In the repair mode try to",
9506         "fix the problems found.",
9507         "WARNING: the repair mode is considered dangerous",
9508         "",
9509         "-s|--super <superblock>     use this superblock copy",
9510         "-b|--backup                 use the first valid backup root copy",
9511         "--repair                    try to repair the filesystem",
9512         "--readonly                  run in read-only mode (default)",
9513         "--init-csum-tree            create a new CRC tree",
9514         "--init-extent-tree          create a new extent tree",
9515         "--check-data-csum           verify checkums of data blocks",
9516         "-Q|--qgroup-report           print a report on qgroup consistency",
9517         "-E|--subvol-extents <subvolid>",
9518         "                            print subvolume extents and sharing state",
9519         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9520         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9521         "-p|--progress               indicate progress",
9522         NULL
9523 };
9524
9525 int cmd_check(int argc, char **argv)
9526 {
9527         struct cache_tree root_cache;
9528         struct btrfs_root *root;
9529         struct btrfs_fs_info *info;
9530         u64 bytenr = 0;
9531         u64 subvolid = 0;
9532         u64 tree_root_bytenr = 0;
9533         u64 chunk_root_bytenr = 0;
9534         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9535         int ret;
9536         u64 num;
9537         int init_csum_tree = 0;
9538         int readonly = 0;
9539         int qgroup_report = 0;
9540         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9541
9542         while(1) {
9543                 int c;
9544                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9545                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9546                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9547                 static const struct option long_options[] = {
9548                         { "super", required_argument, NULL, 's' },
9549                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9550                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9551                         { "init-csum-tree", no_argument, NULL,
9552                                 GETOPT_VAL_INIT_CSUM },
9553                         { "init-extent-tree", no_argument, NULL,
9554                                 GETOPT_VAL_INIT_EXTENT },
9555                         { "check-data-csum", no_argument, NULL,
9556                                 GETOPT_VAL_CHECK_CSUM },
9557                         { "backup", no_argument, NULL, 'b' },
9558                         { "subvol-extents", required_argument, NULL, 'E' },
9559                         { "qgroup-report", no_argument, NULL, 'Q' },
9560                         { "tree-root", required_argument, NULL, 'r' },
9561                         { "chunk-root", required_argument, NULL,
9562                                 GETOPT_VAL_CHUNK_TREE },
9563                         { "progress", no_argument, NULL, 'p' },
9564                         { NULL, 0, NULL, 0}
9565                 };
9566
9567                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9568                 if (c < 0)
9569                         break;
9570                 switch(c) {
9571                         case 'a': /* ignored */ break;
9572                         case 'b':
9573                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9574                                 break;
9575                         case 's':
9576                                 num = arg_strtou64(optarg);
9577                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9578                                         fprintf(stderr,
9579                                                 "ERROR: super mirror should be less than: %d\n",
9580                                                 BTRFS_SUPER_MIRROR_MAX);
9581                                         exit(1);
9582                                 }
9583                                 bytenr = btrfs_sb_offset(((int)num));
9584                                 printf("using SB copy %llu, bytenr %llu\n", num,
9585                                        (unsigned long long)bytenr);
9586                                 break;
9587                         case 'Q':
9588                                 qgroup_report = 1;
9589                                 break;
9590                         case 'E':
9591                                 subvolid = arg_strtou64(optarg);
9592                                 break;
9593                         case 'r':
9594                                 tree_root_bytenr = arg_strtou64(optarg);
9595                                 break;
9596                         case GETOPT_VAL_CHUNK_TREE:
9597                                 chunk_root_bytenr = arg_strtou64(optarg);
9598                                 break;
9599                         case 'p':
9600                                 ctx.progress_enabled = true;
9601                                 break;
9602                         case '?':
9603                         case 'h':
9604                                 usage(cmd_check_usage);
9605                         case GETOPT_VAL_REPAIR:
9606                                 printf("enabling repair mode\n");
9607                                 repair = 1;
9608                                 ctree_flags |= OPEN_CTREE_WRITES;
9609                                 break;
9610                         case GETOPT_VAL_READONLY:
9611                                 readonly = 1;
9612                                 break;
9613                         case GETOPT_VAL_INIT_CSUM:
9614                                 printf("Creating a new CRC tree\n");
9615                                 init_csum_tree = 1;
9616                                 repair = 1;
9617                                 ctree_flags |= OPEN_CTREE_WRITES;
9618                                 break;
9619                         case GETOPT_VAL_INIT_EXTENT:
9620                                 init_extent_tree = 1;
9621                                 ctree_flags |= (OPEN_CTREE_WRITES |
9622                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9623                                 repair = 1;
9624                                 break;
9625                         case GETOPT_VAL_CHECK_CSUM:
9626                                 check_data_csum = 1;
9627                                 break;
9628                 }
9629         }
9630
9631         if (check_argc_exact(argc - optind, 1))
9632                 usage(cmd_check_usage);
9633
9634         if (ctx.progress_enabled) {
9635                 ctx.tp = TASK_NOTHING;
9636                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9637         }
9638
9639         /* This check is the only reason for --readonly to exist */
9640         if (readonly && repair) {
9641                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9642                 exit(1);
9643         }
9644
9645         radix_tree_init();
9646         cache_tree_init(&root_cache);
9647
9648         if((ret = check_mounted(argv[optind])) < 0) {
9649                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9650                 goto err_out;
9651         } else if(ret) {
9652                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9653                 ret = -EBUSY;
9654                 goto err_out;
9655         }
9656
9657         /* only allow partial opening under repair mode */
9658         if (repair)
9659                 ctree_flags |= OPEN_CTREE_PARTIAL;
9660
9661         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9662                                   chunk_root_bytenr, ctree_flags);
9663         if (!info) {
9664                 fprintf(stderr, "Couldn't open file system\n");
9665                 ret = -EIO;
9666                 goto err_out;
9667         }
9668
9669         global_info = info;
9670         root = info->fs_root;
9671
9672         /*
9673          * repair mode will force us to commit transaction which
9674          * will make us fail to load log tree when mounting.
9675          */
9676         if (repair && btrfs_super_log_root(info->super_copy)) {
9677                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9678                 if (!ret) {
9679                         ret = 1;
9680                         goto close_out;
9681                 }
9682                 ret = zero_log_tree(root);
9683                 if (ret) {
9684                         fprintf(stderr, "fail to zero log tree\n");
9685                         goto close_out;
9686                 }
9687         }
9688
9689         uuid_unparse(info->super_copy->fsid, uuidbuf);
9690         if (qgroup_report) {
9691                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9692                        uuidbuf);
9693                 ret = qgroup_verify_all(info);
9694                 if (ret == 0)
9695                         ret = report_qgroups(1);
9696                 goto close_out;
9697         }
9698         if (subvolid) {
9699                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9700                        subvolid, argv[optind], uuidbuf);
9701                 ret = print_extent_state(info, subvolid);
9702                 goto close_out;
9703         }
9704         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9705
9706         if (!extent_buffer_uptodate(info->tree_root->node) ||
9707             !extent_buffer_uptodate(info->dev_root->node) ||
9708             !extent_buffer_uptodate(info->chunk_root->node)) {
9709                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9710                 ret = -EIO;
9711                 goto close_out;
9712         }
9713
9714         if (init_extent_tree || init_csum_tree) {
9715                 struct btrfs_trans_handle *trans;
9716
9717                 trans = btrfs_start_transaction(info->extent_root, 0);
9718                 if (IS_ERR(trans)) {
9719                         fprintf(stderr, "Error starting transaction\n");
9720                         ret = PTR_ERR(trans);
9721                         goto close_out;
9722                 }
9723
9724                 if (init_extent_tree) {
9725                         printf("Creating a new extent tree\n");
9726                         ret = reinit_extent_tree(trans, info);
9727                         if (ret)
9728                                 goto close_out;
9729                 }
9730
9731                 if (init_csum_tree) {
9732                         fprintf(stderr, "Reinit crc root\n");
9733                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9734                         if (ret) {
9735                                 fprintf(stderr, "crc root initialization failed\n");
9736                                 ret = -EIO;
9737                                 goto close_out;
9738                         }
9739
9740                         ret = fill_csum_tree(trans, info->csum_root,
9741                                              init_extent_tree);
9742                         if (ret) {
9743                                 fprintf(stderr, "crc refilling failed\n");
9744                                 return -EIO;
9745                         }
9746                 }
9747                 /*
9748                  * Ok now we commit and run the normal fsck, which will add
9749                  * extent entries for all of the items it finds.
9750                  */
9751                 ret = btrfs_commit_transaction(trans, info->extent_root);
9752                 if (ret)
9753                         goto close_out;
9754         }
9755         if (!extent_buffer_uptodate(info->extent_root->node)) {
9756                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9757                 ret = -EIO;
9758                 goto close_out;
9759         }
9760         if (!extent_buffer_uptodate(info->csum_root->node)) {
9761                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9762                 ret = -EIO;
9763                 goto close_out;
9764         }
9765
9766         if (!ctx.progress_enabled)
9767                 fprintf(stderr, "checking extents\n");
9768         ret = check_chunks_and_extents(root);
9769         if (ret)
9770                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9771
9772         ret = repair_root_items(info);
9773         if (ret < 0)
9774                 goto close_out;
9775         if (repair) {
9776                 fprintf(stderr, "Fixed %d roots.\n", ret);
9777                 ret = 0;
9778         } else if (ret > 0) {
9779                 fprintf(stderr,
9780                        "Found %d roots with an outdated root item.\n",
9781                        ret);
9782                 fprintf(stderr,
9783                         "Please run a filesystem check with the option --repair to fix them.\n");
9784                 ret = 1;
9785                 goto close_out;
9786         }
9787
9788         if (!ctx.progress_enabled) {
9789                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9790                         fprintf(stderr, "checking free space tree\n");
9791                 else
9792                         fprintf(stderr, "checking free space cache\n");
9793         }
9794         ret = check_space_cache(root);
9795         if (ret)
9796                 goto out;
9797
9798         /*
9799          * We used to have to have these hole extents in between our real
9800          * extents so if we don't have this flag set we need to make sure there
9801          * are no gaps in the file extents for inodes, otherwise we can just
9802          * ignore it when this happens.
9803          */
9804         no_holes = btrfs_fs_incompat(root->fs_info,
9805                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9806         if (!ctx.progress_enabled)
9807                 fprintf(stderr, "checking fs roots\n");
9808         ret = check_fs_roots(root, &root_cache);
9809         if (ret)
9810                 goto out;
9811
9812         fprintf(stderr, "checking csums\n");
9813         ret = check_csums(root);
9814         if (ret)
9815                 goto out;
9816
9817         fprintf(stderr, "checking root refs\n");
9818         ret = check_root_refs(root, &root_cache);
9819         if (ret)
9820                 goto out;
9821
9822         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9823                 struct extent_buffer *eb;
9824
9825                 eb = list_first_entry(&root->fs_info->recow_ebs,
9826                                       struct extent_buffer, recow);
9827                 list_del_init(&eb->recow);
9828                 ret = recow_extent_buffer(root, eb);
9829                 if (ret)
9830                         break;
9831         }
9832
9833         while (!list_empty(&delete_items)) {
9834                 struct bad_item *bad;
9835
9836                 bad = list_first_entry(&delete_items, struct bad_item, list);
9837                 list_del_init(&bad->list);
9838                 if (repair)
9839                         ret = delete_bad_item(root, bad);
9840                 free(bad);
9841         }
9842
9843         if (info->quota_enabled) {
9844                 int err;
9845                 fprintf(stderr, "checking quota groups\n");
9846                 err = qgroup_verify_all(info);
9847                 if (err)
9848                         goto out;
9849         }
9850
9851         if (!list_empty(&root->fs_info->recow_ebs)) {
9852                 fprintf(stderr, "Transid errors in file system\n");
9853                 ret = 1;
9854         }
9855 out:
9856         /* Don't override original ret */
9857         if (ret)
9858                 report_qgroups(0);
9859         else
9860                 ret = report_qgroups(0);
9861         if (found_old_backref) { /*
9862                  * there was a disk format change when mixed
9863                  * backref was in testing tree. The old format
9864                  * existed about one week.
9865                  */
9866                 printf("\n * Found old mixed backref format. "
9867                        "The old format is not supported! *"
9868                        "\n * Please mount the FS in readonly mode, "
9869                        "backup data and re-format the FS. *\n\n");
9870                 ret = 1;
9871         }
9872         printf("found %llu bytes used err is %d\n",
9873                (unsigned long long)bytes_used, ret);
9874         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9875         printf("total tree bytes: %llu\n",
9876                (unsigned long long)total_btree_bytes);
9877         printf("total fs tree bytes: %llu\n",
9878                (unsigned long long)total_fs_tree_bytes);
9879         printf("total extent tree bytes: %llu\n",
9880                (unsigned long long)total_extent_tree_bytes);
9881         printf("btree space waste bytes: %llu\n",
9882                (unsigned long long)btree_space_waste);
9883         printf("file data blocks allocated: %llu\n referenced %llu\n",
9884                 (unsigned long long)data_bytes_allocated,
9885                 (unsigned long long)data_bytes_referenced);
9886
9887         free_root_recs_tree(&root_cache);
9888 close_out:
9889         close_ctree(root);
9890 err_out:
9891         if (ctx.progress_enabled)
9892                 task_deinit(ctx.info);
9893
9894         return ret;
9895 }