Btrfs-progs: fsck: deal with really corrupted extent tree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41
42 static u64 bytes_used = 0;
43 static u64 total_csum_bytes = 0;
44 static u64 total_btree_bytes = 0;
45 static u64 total_fs_tree_bytes = 0;
46 static u64 total_extent_tree_bytes = 0;
47 static u64 btree_space_waste = 0;
48 static u64 data_bytes_allocated = 0;
49 static u64 data_bytes_referenced = 0;
50 static int found_old_backref = 0;
51 static LIST_HEAD(duplicate_extents);
52 static LIST_HEAD(delete_items);
53 static int repair = 0;
54 static int no_holes = 0;
55
56 struct extent_backref {
57         struct list_head list;
58         unsigned int is_data:1;
59         unsigned int found_extent_tree:1;
60         unsigned int full_backref:1;
61         unsigned int found_ref:1;
62         unsigned int broken:1;
63 };
64
65 struct data_backref {
66         struct extent_backref node;
67         union {
68                 u64 parent;
69                 u64 root;
70         };
71         u64 owner;
72         u64 offset;
73         u64 disk_bytenr;
74         u64 bytes;
75         u64 ram_bytes;
76         u32 num_refs;
77         u32 found_ref;
78 };
79
80 struct tree_backref {
81         struct extent_backref node;
82         union {
83                 u64 parent;
84                 u64 root;
85         };
86 };
87
88 struct extent_record {
89         struct list_head backrefs;
90         struct list_head dups;
91         struct list_head list;
92         struct cache_extent cache;
93         struct btrfs_disk_key parent_key;
94         unsigned int found_rec;
95         u64 start;
96         u64 max_size;
97         u64 nr;
98         u64 refs;
99         u64 extent_item_refs;
100         u64 generation;
101         u64 parent_generation;
102         u64 info_objectid;
103         u64 num_duplicates;
104         u8 info_level;
105         unsigned int content_checked:1;
106         unsigned int owner_ref_checked:1;
107         unsigned int is_root:1;
108         unsigned int metadata:1;
109 };
110
111 struct inode_backref {
112         struct list_head list;
113         unsigned int found_dir_item:1;
114         unsigned int found_dir_index:1;
115         unsigned int found_inode_ref:1;
116         unsigned int filetype:8;
117         int errors;
118         unsigned int ref_type;
119         u64 dir;
120         u64 index;
121         u16 namelen;
122         char name[0];
123 };
124
125 struct dropping_root_item_record {
126         struct list_head list;
127         struct btrfs_root_item ri;
128         struct btrfs_key found_key;
129 };
130
131 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
132 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
133 #define REF_ERR_NO_INODE_REF            (1 << 2)
134 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
135 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
136 #define REF_ERR_DUP_INODE_REF           (1 << 5)
137 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
138 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
139 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
140 #define REF_ERR_NO_ROOT_REF             (1 << 9)
141 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
142 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
143 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
144
145 struct inode_record {
146         struct list_head backrefs;
147         unsigned int checked:1;
148         unsigned int merging:1;
149         unsigned int found_inode_item:1;
150         unsigned int found_dir_item:1;
151         unsigned int found_file_extent:1;
152         unsigned int found_csum_item:1;
153         unsigned int some_csum_missing:1;
154         unsigned int nodatasum:1;
155         int errors;
156
157         u64 ino;
158         u32 nlink;
159         u32 imode;
160         u64 isize;
161         u64 nbytes;
162
163         u32 found_link;
164         u64 found_size;
165         u64 extent_start;
166         u64 extent_end;
167         u64 first_extent_gap;
168
169         u32 refs;
170 };
171
172 #define I_ERR_NO_INODE_ITEM             (1 << 0)
173 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
174 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
175 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
176 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
177 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
178 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
179 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
180 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
181 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
182 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
183 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
184 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
185 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
186
187 struct root_backref {
188         struct list_head list;
189         unsigned int found_dir_item:1;
190         unsigned int found_dir_index:1;
191         unsigned int found_back_ref:1;
192         unsigned int found_forward_ref:1;
193         unsigned int reachable:1;
194         int errors;
195         u64 ref_root;
196         u64 dir;
197         u64 index;
198         u16 namelen;
199         char name[0];
200 };
201
202 struct root_record {
203         struct list_head backrefs;
204         struct cache_extent cache;
205         unsigned int found_root_item:1;
206         u64 objectid;
207         u32 found_ref;
208 };
209
210 struct ptr_node {
211         struct cache_extent cache;
212         void *data;
213 };
214
215 struct shared_node {
216         struct cache_extent cache;
217         struct cache_tree root_cache;
218         struct cache_tree inode_cache;
219         struct inode_record *current;
220         u32 refs;
221 };
222
223 struct block_info {
224         u64 start;
225         u32 size;
226 };
227
228 struct walk_control {
229         struct cache_tree shared;
230         struct shared_node *nodes[BTRFS_MAX_LEVEL];
231         int active_node;
232         int root_level;
233 };
234
235 struct bad_item {
236         struct btrfs_key key;
237         u64 root_id;
238         struct list_head list;
239 };
240
241 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
242
243 static u8 imode_to_type(u32 imode)
244 {
245 #define S_SHIFT 12
246         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
247                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
248                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
249                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
250                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
251                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
252                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
253                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
254         };
255
256         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
257 #undef S_SHIFT
258 }
259
260 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
261 {
262         struct device_record *rec1;
263         struct device_record *rec2;
264
265         rec1 = rb_entry(node1, struct device_record, node);
266         rec2 = rb_entry(node2, struct device_record, node);
267         if (rec1->devid > rec2->devid)
268                 return -1;
269         else if (rec1->devid < rec2->devid)
270                 return 1;
271         else
272                 return 0;
273 }
274
275 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
276 {
277         struct inode_record *rec;
278         struct inode_backref *backref;
279         struct inode_backref *orig;
280         size_t size;
281
282         rec = malloc(sizeof(*rec));
283         memcpy(rec, orig_rec, sizeof(*rec));
284         rec->refs = 1;
285         INIT_LIST_HEAD(&rec->backrefs);
286
287         list_for_each_entry(orig, &orig_rec->backrefs, list) {
288                 size = sizeof(*orig) + orig->namelen + 1;
289                 backref = malloc(size);
290                 memcpy(backref, orig, size);
291                 list_add_tail(&backref->list, &rec->backrefs);
292         }
293         return rec;
294 }
295
296 static void print_inode_error(int errors)
297 {
298         if (errors & I_ERR_NO_INODE_ITEM)
299                 fprintf(stderr, ", no inode item");
300         if (errors & I_ERR_NO_ORPHAN_ITEM)
301                 fprintf(stderr, ", no orphan item");
302         if (errors & I_ERR_DUP_INODE_ITEM)
303                 fprintf(stderr, ", dup inode item");
304         if (errors & I_ERR_DUP_DIR_INDEX)
305                 fprintf(stderr, ", dup dir index");
306         if (errors & I_ERR_ODD_DIR_ITEM)
307                 fprintf(stderr, ", odd dir item");
308         if (errors & I_ERR_ODD_FILE_EXTENT)
309                 fprintf(stderr, ", odd file extent");
310         if (errors & I_ERR_BAD_FILE_EXTENT)
311                 fprintf(stderr, ", bad file extent");
312         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
313                 fprintf(stderr, ", file extent overlap");
314         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
315                 fprintf(stderr, ", file extent discount");
316         if (errors & I_ERR_DIR_ISIZE_WRONG)
317                 fprintf(stderr, ", dir isize wrong");
318         if (errors & I_ERR_FILE_NBYTES_WRONG)
319                 fprintf(stderr, ", nbytes wrong");
320         if (errors & I_ERR_ODD_CSUM_ITEM)
321                 fprintf(stderr, ", odd csum item");
322         if (errors & I_ERR_SOME_CSUM_MISSING)
323                 fprintf(stderr, ", some csum missing");
324         if (errors & I_ERR_LINK_COUNT_WRONG)
325                 fprintf(stderr, ", link count wrong");
326         fprintf(stderr, "\n");
327 }
328
329 static void print_ref_error(int errors)
330 {
331         if (errors & REF_ERR_NO_DIR_ITEM)
332                 fprintf(stderr, ", no dir item");
333         if (errors & REF_ERR_NO_DIR_INDEX)
334                 fprintf(stderr, ", no dir index");
335         if (errors & REF_ERR_NO_INODE_REF)
336                 fprintf(stderr, ", no inode ref");
337         if (errors & REF_ERR_DUP_DIR_ITEM)
338                 fprintf(stderr, ", dup dir item");
339         if (errors & REF_ERR_DUP_DIR_INDEX)
340                 fprintf(stderr, ", dup dir index");
341         if (errors & REF_ERR_DUP_INODE_REF)
342                 fprintf(stderr, ", dup inode ref");
343         if (errors & REF_ERR_INDEX_UNMATCH)
344                 fprintf(stderr, ", index unmatch");
345         if (errors & REF_ERR_FILETYPE_UNMATCH)
346                 fprintf(stderr, ", filetype unmatch");
347         if (errors & REF_ERR_NAME_TOO_LONG)
348                 fprintf(stderr, ", name too long");
349         if (errors & REF_ERR_NO_ROOT_REF)
350                 fprintf(stderr, ", no root ref");
351         if (errors & REF_ERR_NO_ROOT_BACKREF)
352                 fprintf(stderr, ", no root backref");
353         if (errors & REF_ERR_DUP_ROOT_REF)
354                 fprintf(stderr, ", dup root ref");
355         if (errors & REF_ERR_DUP_ROOT_BACKREF)
356                 fprintf(stderr, ", dup root backref");
357         fprintf(stderr, "\n");
358 }
359
360 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
361                                           u64 ino, int mod)
362 {
363         struct ptr_node *node;
364         struct cache_extent *cache;
365         struct inode_record *rec = NULL;
366         int ret;
367
368         cache = lookup_cache_extent(inode_cache, ino, 1);
369         if (cache) {
370                 node = container_of(cache, struct ptr_node, cache);
371                 rec = node->data;
372                 if (mod && rec->refs > 1) {
373                         node->data = clone_inode_rec(rec);
374                         rec->refs--;
375                         rec = node->data;
376                 }
377         } else if (mod) {
378                 rec = calloc(1, sizeof(*rec));
379                 rec->ino = ino;
380                 rec->extent_start = (u64)-1;
381                 rec->first_extent_gap = (u64)-1;
382                 rec->refs = 1;
383                 INIT_LIST_HEAD(&rec->backrefs);
384
385                 node = malloc(sizeof(*node));
386                 node->cache.start = ino;
387                 node->cache.size = 1;
388                 node->data = rec;
389
390                 if (ino == BTRFS_FREE_INO_OBJECTID)
391                         rec->found_link = 1;
392
393                 ret = insert_cache_extent(inode_cache, &node->cache);
394                 BUG_ON(ret);
395         }
396         return rec;
397 }
398
399 static void free_inode_rec(struct inode_record *rec)
400 {
401         struct inode_backref *backref;
402
403         if (--rec->refs > 0)
404                 return;
405
406         while (!list_empty(&rec->backrefs)) {
407                 backref = list_entry(rec->backrefs.next,
408                                      struct inode_backref, list);
409                 list_del(&backref->list);
410                 free(backref);
411         }
412         free(rec);
413 }
414
415 static int can_free_inode_rec(struct inode_record *rec)
416 {
417         if (!rec->errors && rec->checked && rec->found_inode_item &&
418             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
419                 return 1;
420         return 0;
421 }
422
423 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
424                                  struct inode_record *rec)
425 {
426         struct cache_extent *cache;
427         struct inode_backref *tmp, *backref;
428         struct ptr_node *node;
429         unsigned char filetype;
430
431         if (!rec->found_inode_item)
432                 return;
433
434         filetype = imode_to_type(rec->imode);
435         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
436                 if (backref->found_dir_item && backref->found_dir_index) {
437                         if (backref->filetype != filetype)
438                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
439                         if (!backref->errors && backref->found_inode_ref) {
440                                 list_del(&backref->list);
441                                 free(backref);
442                         }
443                 }
444         }
445
446         if (!rec->checked || rec->merging)
447                 return;
448
449         if (S_ISDIR(rec->imode)) {
450                 if (rec->found_size != rec->isize)
451                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
452                 if (rec->found_file_extent)
453                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
454         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
455                 if (rec->found_dir_item)
456                         rec->errors |= I_ERR_ODD_DIR_ITEM;
457                 if (rec->found_size != rec->nbytes)
458                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
459                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
460                         rec->first_extent_gap = 0;
461                 if (rec->nlink > 0 && !no_holes &&
462                     (rec->extent_end < rec->isize ||
463                      rec->first_extent_gap < rec->isize))
464                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
465         }
466
467         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
468                 if (rec->found_csum_item && rec->nodatasum)
469                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
470                 if (rec->some_csum_missing && !rec->nodatasum)
471                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
472         }
473
474         BUG_ON(rec->refs != 1);
475         if (can_free_inode_rec(rec)) {
476                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
477                 node = container_of(cache, struct ptr_node, cache);
478                 BUG_ON(node->data != rec);
479                 remove_cache_extent(inode_cache, &node->cache);
480                 free(node);
481                 free_inode_rec(rec);
482         }
483 }
484
485 static int check_orphan_item(struct btrfs_root *root, u64 ino)
486 {
487         struct btrfs_path path;
488         struct btrfs_key key;
489         int ret;
490
491         key.objectid = BTRFS_ORPHAN_OBJECTID;
492         key.type = BTRFS_ORPHAN_ITEM_KEY;
493         key.offset = ino;
494
495         btrfs_init_path(&path);
496         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
497         btrfs_release_path(&path);
498         if (ret > 0)
499                 ret = -ENOENT;
500         return ret;
501 }
502
503 static int process_inode_item(struct extent_buffer *eb,
504                               int slot, struct btrfs_key *key,
505                               struct shared_node *active_node)
506 {
507         struct inode_record *rec;
508         struct btrfs_inode_item *item;
509
510         rec = active_node->current;
511         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
512         if (rec->found_inode_item) {
513                 rec->errors |= I_ERR_DUP_INODE_ITEM;
514                 return 1;
515         }
516         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
517         rec->nlink = btrfs_inode_nlink(eb, item);
518         rec->isize = btrfs_inode_size(eb, item);
519         rec->nbytes = btrfs_inode_nbytes(eb, item);
520         rec->imode = btrfs_inode_mode(eb, item);
521         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
522                 rec->nodatasum = 1;
523         rec->found_inode_item = 1;
524         if (rec->nlink == 0)
525                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
526         maybe_free_inode_rec(&active_node->inode_cache, rec);
527         return 0;
528 }
529
530 static struct inode_backref *get_inode_backref(struct inode_record *rec,
531                                                 const char *name,
532                                                 int namelen, u64 dir)
533 {
534         struct inode_backref *backref;
535
536         list_for_each_entry(backref, &rec->backrefs, list) {
537                 if (backref->dir != dir || backref->namelen != namelen)
538                         continue;
539                 if (memcmp(name, backref->name, namelen))
540                         continue;
541                 return backref;
542         }
543
544         backref = malloc(sizeof(*backref) + namelen + 1);
545         memset(backref, 0, sizeof(*backref));
546         backref->dir = dir;
547         backref->namelen = namelen;
548         memcpy(backref->name, name, namelen);
549         backref->name[namelen] = '\0';
550         list_add_tail(&backref->list, &rec->backrefs);
551         return backref;
552 }
553
554 static int add_inode_backref(struct cache_tree *inode_cache,
555                              u64 ino, u64 dir, u64 index,
556                              const char *name, int namelen,
557                              int filetype, int itemtype, int errors)
558 {
559         struct inode_record *rec;
560         struct inode_backref *backref;
561
562         rec = get_inode_rec(inode_cache, ino, 1);
563         backref = get_inode_backref(rec, name, namelen, dir);
564         if (errors)
565                 backref->errors |= errors;
566         if (itemtype == BTRFS_DIR_INDEX_KEY) {
567                 if (backref->found_dir_index)
568                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
569                 if (backref->found_inode_ref && backref->index != index)
570                         backref->errors |= REF_ERR_INDEX_UNMATCH;
571                 if (backref->found_dir_item && backref->filetype != filetype)
572                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
573
574                 backref->index = index;
575                 backref->filetype = filetype;
576                 backref->found_dir_index = 1;
577         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
578                 rec->found_link++;
579                 if (backref->found_dir_item)
580                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
581                 if (backref->found_dir_index && backref->filetype != filetype)
582                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
583
584                 backref->filetype = filetype;
585                 backref->found_dir_item = 1;
586         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
587                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
588                 if (backref->found_inode_ref)
589                         backref->errors |= REF_ERR_DUP_INODE_REF;
590                 if (backref->found_dir_index && backref->index != index)
591                         backref->errors |= REF_ERR_INDEX_UNMATCH;
592
593                 backref->ref_type = itemtype;
594                 backref->index = index;
595                 backref->found_inode_ref = 1;
596         } else {
597                 BUG_ON(1);
598         }
599
600         maybe_free_inode_rec(inode_cache, rec);
601         return 0;
602 }
603
604 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
605                             struct cache_tree *dst_cache)
606 {
607         struct inode_backref *backref;
608         u32 dir_count = 0;
609
610         dst->merging = 1;
611         list_for_each_entry(backref, &src->backrefs, list) {
612                 if (backref->found_dir_index) {
613                         add_inode_backref(dst_cache, dst->ino, backref->dir,
614                                         backref->index, backref->name,
615                                         backref->namelen, backref->filetype,
616                                         BTRFS_DIR_INDEX_KEY, backref->errors);
617                 }
618                 if (backref->found_dir_item) {
619                         dir_count++;
620                         add_inode_backref(dst_cache, dst->ino,
621                                         backref->dir, 0, backref->name,
622                                         backref->namelen, backref->filetype,
623                                         BTRFS_DIR_ITEM_KEY, backref->errors);
624                 }
625                 if (backref->found_inode_ref) {
626                         add_inode_backref(dst_cache, dst->ino,
627                                         backref->dir, backref->index,
628                                         backref->name, backref->namelen, 0,
629                                         backref->ref_type, backref->errors);
630                 }
631         }
632
633         if (src->found_dir_item)
634                 dst->found_dir_item = 1;
635         if (src->found_file_extent)
636                 dst->found_file_extent = 1;
637         if (src->found_csum_item)
638                 dst->found_csum_item = 1;
639         if (src->some_csum_missing)
640                 dst->some_csum_missing = 1;
641         if (dst->first_extent_gap > src->first_extent_gap)
642                 dst->first_extent_gap = src->first_extent_gap;
643
644         BUG_ON(src->found_link < dir_count);
645         dst->found_link += src->found_link - dir_count;
646         dst->found_size += src->found_size;
647         if (src->extent_start != (u64)-1) {
648                 if (dst->extent_start == (u64)-1) {
649                         dst->extent_start = src->extent_start;
650                         dst->extent_end = src->extent_end;
651                 } else {
652                         if (dst->extent_end > src->extent_start)
653                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
654                         else if (dst->extent_end < src->extent_start &&
655                                  dst->extent_end < dst->first_extent_gap)
656                                 dst->first_extent_gap = dst->extent_end;
657                         if (dst->extent_end < src->extent_end)
658                                 dst->extent_end = src->extent_end;
659                 }
660         }
661
662         dst->errors |= src->errors;
663         if (src->found_inode_item) {
664                 if (!dst->found_inode_item) {
665                         dst->nlink = src->nlink;
666                         dst->isize = src->isize;
667                         dst->nbytes = src->nbytes;
668                         dst->imode = src->imode;
669                         dst->nodatasum = src->nodatasum;
670                         dst->found_inode_item = 1;
671                 } else {
672                         dst->errors |= I_ERR_DUP_INODE_ITEM;
673                 }
674         }
675         dst->merging = 0;
676
677         return 0;
678 }
679
680 static int splice_shared_node(struct shared_node *src_node,
681                               struct shared_node *dst_node)
682 {
683         struct cache_extent *cache;
684         struct ptr_node *node, *ins;
685         struct cache_tree *src, *dst;
686         struct inode_record *rec, *conflict;
687         u64 current_ino = 0;
688         int splice = 0;
689         int ret;
690
691         if (--src_node->refs == 0)
692                 splice = 1;
693         if (src_node->current)
694                 current_ino = src_node->current->ino;
695
696         src = &src_node->root_cache;
697         dst = &dst_node->root_cache;
698 again:
699         cache = search_cache_extent(src, 0);
700         while (cache) {
701                 node = container_of(cache, struct ptr_node, cache);
702                 rec = node->data;
703                 cache = next_cache_extent(cache);
704
705                 if (splice) {
706                         remove_cache_extent(src, &node->cache);
707                         ins = node;
708                 } else {
709                         ins = malloc(sizeof(*ins));
710                         ins->cache.start = node->cache.start;
711                         ins->cache.size = node->cache.size;
712                         ins->data = rec;
713                         rec->refs++;
714                 }
715                 ret = insert_cache_extent(dst, &ins->cache);
716                 if (ret == -EEXIST) {
717                         conflict = get_inode_rec(dst, rec->ino, 1);
718                         merge_inode_recs(rec, conflict, dst);
719                         if (rec->checked) {
720                                 conflict->checked = 1;
721                                 if (dst_node->current == conflict)
722                                         dst_node->current = NULL;
723                         }
724                         maybe_free_inode_rec(dst, conflict);
725                         free_inode_rec(rec);
726                         free(ins);
727                 } else {
728                         BUG_ON(ret);
729                 }
730         }
731
732         if (src == &src_node->root_cache) {
733                 src = &src_node->inode_cache;
734                 dst = &dst_node->inode_cache;
735                 goto again;
736         }
737
738         if (current_ino > 0 && (!dst_node->current ||
739             current_ino > dst_node->current->ino)) {
740                 if (dst_node->current) {
741                         dst_node->current->checked = 1;
742                         maybe_free_inode_rec(dst, dst_node->current);
743                 }
744                 dst_node->current = get_inode_rec(dst, current_ino, 1);
745         }
746         return 0;
747 }
748
749 static void free_inode_ptr(struct cache_extent *cache)
750 {
751         struct ptr_node *node;
752         struct inode_record *rec;
753
754         node = container_of(cache, struct ptr_node, cache);
755         rec = node->data;
756         free_inode_rec(rec);
757         free(node);
758 }
759
760 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
761
762 static struct shared_node *find_shared_node(struct cache_tree *shared,
763                                             u64 bytenr)
764 {
765         struct cache_extent *cache;
766         struct shared_node *node;
767
768         cache = lookup_cache_extent(shared, bytenr, 1);
769         if (cache) {
770                 node = container_of(cache, struct shared_node, cache);
771                 return node;
772         }
773         return NULL;
774 }
775
776 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
777 {
778         int ret;
779         struct shared_node *node;
780
781         node = calloc(1, sizeof(*node));
782         node->cache.start = bytenr;
783         node->cache.size = 1;
784         cache_tree_init(&node->root_cache);
785         cache_tree_init(&node->inode_cache);
786         node->refs = refs;
787
788         ret = insert_cache_extent(shared, &node->cache);
789         BUG_ON(ret);
790         return 0;
791 }
792
793 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
794                              struct walk_control *wc, int level)
795 {
796         struct shared_node *node;
797         struct shared_node *dest;
798
799         if (level == wc->active_node)
800                 return 0;
801
802         BUG_ON(wc->active_node <= level);
803         node = find_shared_node(&wc->shared, bytenr);
804         if (!node) {
805                 add_shared_node(&wc->shared, bytenr, refs);
806                 node = find_shared_node(&wc->shared, bytenr);
807                 wc->nodes[level] = node;
808                 wc->active_node = level;
809                 return 0;
810         }
811
812         if (wc->root_level == wc->active_node &&
813             btrfs_root_refs(&root->root_item) == 0) {
814                 if (--node->refs == 0) {
815                         free_inode_recs_tree(&node->root_cache);
816                         free_inode_recs_tree(&node->inode_cache);
817                         remove_cache_extent(&wc->shared, &node->cache);
818                         free(node);
819                 }
820                 return 1;
821         }
822
823         dest = wc->nodes[wc->active_node];
824         splice_shared_node(node, dest);
825         if (node->refs == 0) {
826                 remove_cache_extent(&wc->shared, &node->cache);
827                 free(node);
828         }
829         return 1;
830 }
831
832 static int leave_shared_node(struct btrfs_root *root,
833                              struct walk_control *wc, int level)
834 {
835         struct shared_node *node;
836         struct shared_node *dest;
837         int i;
838
839         if (level == wc->root_level)
840                 return 0;
841
842         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
843                 if (wc->nodes[i])
844                         break;
845         }
846         BUG_ON(i >= BTRFS_MAX_LEVEL);
847
848         node = wc->nodes[wc->active_node];
849         wc->nodes[wc->active_node] = NULL;
850         wc->active_node = i;
851
852         dest = wc->nodes[wc->active_node];
853         if (wc->active_node < wc->root_level ||
854             btrfs_root_refs(&root->root_item) > 0) {
855                 BUG_ON(node->refs <= 1);
856                 splice_shared_node(node, dest);
857         } else {
858                 BUG_ON(node->refs < 2);
859                 node->refs--;
860         }
861         return 0;
862 }
863
864 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
865                          u64 child_root_id)
866 {
867         struct btrfs_path path;
868         struct btrfs_key key;
869         struct extent_buffer *leaf;
870         int has_parent = 0;
871         int ret;
872
873         btrfs_init_path(&path);
874
875         key.objectid = parent_root_id;
876         key.type = BTRFS_ROOT_REF_KEY;
877         key.offset = child_root_id;
878         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
879                                 0, 0);
880         BUG_ON(ret < 0);
881         btrfs_release_path(&path);
882         if (!ret)
883                 return 1;
884
885         key.objectid = child_root_id;
886         key.type = BTRFS_ROOT_BACKREF_KEY;
887         key.offset = 0;
888         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
889                                 0, 0);
890         BUG_ON(ret <= 0);
891
892         while (1) {
893                 leaf = path.nodes[0];
894                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
895                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
896                         BUG_ON(ret < 0);
897
898                         if (ret > 0)
899                                 break;
900                         leaf = path.nodes[0];
901                 }
902
903                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
904                 if (key.objectid != child_root_id ||
905                     key.type != BTRFS_ROOT_BACKREF_KEY)
906                         break;
907
908                 has_parent = 1;
909
910                 if (key.offset == parent_root_id) {
911                         btrfs_release_path(&path);
912                         return 1;
913                 }
914
915                 path.slots[0]++;
916         }
917
918         btrfs_release_path(&path);
919         return has_parent? 0 : -1;
920 }
921
922 static int process_dir_item(struct btrfs_root *root,
923                             struct extent_buffer *eb,
924                             int slot, struct btrfs_key *key,
925                             struct shared_node *active_node)
926 {
927         u32 total;
928         u32 cur = 0;
929         u32 len;
930         u32 name_len;
931         u32 data_len;
932         int error;
933         int nritems = 0;
934         int filetype;
935         struct btrfs_dir_item *di;
936         struct inode_record *rec;
937         struct cache_tree *root_cache;
938         struct cache_tree *inode_cache;
939         struct btrfs_key location;
940         char namebuf[BTRFS_NAME_LEN];
941
942         root_cache = &active_node->root_cache;
943         inode_cache = &active_node->inode_cache;
944         rec = active_node->current;
945         rec->found_dir_item = 1;
946
947         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
948         total = btrfs_item_size_nr(eb, slot);
949         while (cur < total) {
950                 nritems++;
951                 btrfs_dir_item_key_to_cpu(eb, di, &location);
952                 name_len = btrfs_dir_name_len(eb, di);
953                 data_len = btrfs_dir_data_len(eb, di);
954                 filetype = btrfs_dir_type(eb, di);
955
956                 rec->found_size += name_len;
957                 if (name_len <= BTRFS_NAME_LEN) {
958                         len = name_len;
959                         error = 0;
960                 } else {
961                         len = BTRFS_NAME_LEN;
962                         error = REF_ERR_NAME_TOO_LONG;
963                 }
964                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
965
966                 if (location.type == BTRFS_INODE_ITEM_KEY) {
967                         add_inode_backref(inode_cache, location.objectid,
968                                           key->objectid, key->offset, namebuf,
969                                           len, filetype, key->type, error);
970                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
971                         add_inode_backref(root_cache, location.objectid,
972                                           key->objectid, key->offset,
973                                           namebuf, len, filetype,
974                                           key->type, error);
975                 } else {
976                         fprintf(stderr, "warning line %d\n", __LINE__);
977                 }
978
979                 len = sizeof(*di) + name_len + data_len;
980                 di = (struct btrfs_dir_item *)((char *)di + len);
981                 cur += len;
982         }
983         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
984                 rec->errors |= I_ERR_DUP_DIR_INDEX;
985
986         return 0;
987 }
988
989 static int process_inode_ref(struct extent_buffer *eb,
990                              int slot, struct btrfs_key *key,
991                              struct shared_node *active_node)
992 {
993         u32 total;
994         u32 cur = 0;
995         u32 len;
996         u32 name_len;
997         u64 index;
998         int error;
999         struct cache_tree *inode_cache;
1000         struct btrfs_inode_ref *ref;
1001         char namebuf[BTRFS_NAME_LEN];
1002
1003         inode_cache = &active_node->inode_cache;
1004
1005         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1006         total = btrfs_item_size_nr(eb, slot);
1007         while (cur < total) {
1008                 name_len = btrfs_inode_ref_name_len(eb, ref);
1009                 index = btrfs_inode_ref_index(eb, ref);
1010                 if (name_len <= BTRFS_NAME_LEN) {
1011                         len = name_len;
1012                         error = 0;
1013                 } else {
1014                         len = BTRFS_NAME_LEN;
1015                         error = REF_ERR_NAME_TOO_LONG;
1016                 }
1017                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1018                 add_inode_backref(inode_cache, key->objectid, key->offset,
1019                                   index, namebuf, len, 0, key->type, error);
1020
1021                 len = sizeof(*ref) + name_len;
1022                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1023                 cur += len;
1024         }
1025         return 0;
1026 }
1027
1028 static int process_inode_extref(struct extent_buffer *eb,
1029                                 int slot, struct btrfs_key *key,
1030                                 struct shared_node *active_node)
1031 {
1032         u32 total;
1033         u32 cur = 0;
1034         u32 len;
1035         u32 name_len;
1036         u64 index;
1037         u64 parent;
1038         int error;
1039         struct cache_tree *inode_cache;
1040         struct btrfs_inode_extref *extref;
1041         char namebuf[BTRFS_NAME_LEN];
1042
1043         inode_cache = &active_node->inode_cache;
1044
1045         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1046         total = btrfs_item_size_nr(eb, slot);
1047         while (cur < total) {
1048                 name_len = btrfs_inode_extref_name_len(eb, extref);
1049                 index = btrfs_inode_extref_index(eb, extref);
1050                 parent = btrfs_inode_extref_parent(eb, extref);
1051                 if (name_len <= BTRFS_NAME_LEN) {
1052                         len = name_len;
1053                         error = 0;
1054                 } else {
1055                         len = BTRFS_NAME_LEN;
1056                         error = REF_ERR_NAME_TOO_LONG;
1057                 }
1058                 read_extent_buffer(eb, namebuf,
1059                                    (unsigned long)(extref + 1), len);
1060                 add_inode_backref(inode_cache, key->objectid, parent,
1061                                   index, namebuf, len, 0, key->type, error);
1062
1063                 len = sizeof(*extref) + name_len;
1064                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1065                 cur += len;
1066         }
1067         return 0;
1068
1069 }
1070
1071 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
1072 {
1073         struct btrfs_key key;
1074         struct btrfs_path path;
1075         struct extent_buffer *leaf;
1076         int ret ;
1077         size_t size;
1078         u64 found = 0;
1079         u64 csum_end;
1080         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1081
1082         btrfs_init_path(&path);
1083
1084         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1085         key.offset = start;
1086         key.type = BTRFS_EXTENT_CSUM_KEY;
1087
1088         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1089                                 &key, &path, 0, 0);
1090         BUG_ON(ret < 0);
1091         if (ret > 0 && path.slots[0] > 0) {
1092                 leaf = path.nodes[0];
1093                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1094                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1095                     key.type == BTRFS_EXTENT_CSUM_KEY)
1096                         path.slots[0]--;
1097         }
1098
1099         while (len > 0) {
1100                 leaf = path.nodes[0];
1101                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1102                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1103                         BUG_ON(ret < 0);
1104                         if (ret > 0)
1105                                 break;
1106                         leaf = path.nodes[0];
1107                 }
1108
1109                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1110                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1111                     key.type != BTRFS_EXTENT_CSUM_KEY)
1112                         break;
1113
1114                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1115                 if (key.offset >= start + len)
1116                         break;
1117
1118                 if (key.offset > start)
1119                         start = key.offset;
1120
1121                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1122                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1123                 if (csum_end > start) {
1124                         size = min(csum_end - start, len);
1125                         len -= size;
1126                         start += size;
1127                         found += size;
1128                 }
1129
1130                 path.slots[0]++;
1131         }
1132         btrfs_release_path(&path);
1133         return found;
1134 }
1135
1136 static int process_file_extent(struct btrfs_root *root,
1137                                 struct extent_buffer *eb,
1138                                 int slot, struct btrfs_key *key,
1139                                 struct shared_node *active_node)
1140 {
1141         struct inode_record *rec;
1142         struct btrfs_file_extent_item *fi;
1143         u64 num_bytes = 0;
1144         u64 disk_bytenr = 0;
1145         u64 extent_offset = 0;
1146         u64 mask = root->sectorsize - 1;
1147         int extent_type;
1148
1149         rec = active_node->current;
1150         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1151         rec->found_file_extent = 1;
1152
1153         if (rec->extent_start == (u64)-1) {
1154                 rec->extent_start = key->offset;
1155                 rec->extent_end = key->offset;
1156         }
1157
1158         if (rec->extent_end > key->offset)
1159                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1160         else if (rec->extent_end < key->offset &&
1161                  rec->extent_end < rec->first_extent_gap)
1162                 rec->first_extent_gap = rec->extent_end;
1163
1164         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1165         extent_type = btrfs_file_extent_type(eb, fi);
1166
1167         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1168                 num_bytes = btrfs_file_extent_inline_len(eb, fi);
1169                 if (num_bytes == 0)
1170                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1171                 rec->found_size += num_bytes;
1172                 num_bytes = (num_bytes + mask) & ~mask;
1173         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1174                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1175                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1176                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1177                 extent_offset = btrfs_file_extent_offset(eb, fi);
1178                 if (num_bytes == 0 || (num_bytes & mask))
1179                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1180                 if (num_bytes + extent_offset >
1181                     btrfs_file_extent_ram_bytes(eb, fi))
1182                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1183                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1184                     (btrfs_file_extent_compression(eb, fi) ||
1185                      btrfs_file_extent_encryption(eb, fi) ||
1186                      btrfs_file_extent_other_encoding(eb, fi)))
1187                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1188                 if (disk_bytenr > 0)
1189                         rec->found_size += num_bytes;
1190         } else {
1191                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1192         }
1193         rec->extent_end = key->offset + num_bytes;
1194
1195         if (disk_bytenr > 0) {
1196                 u64 found;
1197                 if (btrfs_file_extent_compression(eb, fi))
1198                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1199                 else
1200                         disk_bytenr += extent_offset;
1201
1202                 found = count_csum_range(root, disk_bytenr, num_bytes);
1203                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1204                         if (found > 0)
1205                                 rec->found_csum_item = 1;
1206                         if (found < num_bytes)
1207                                 rec->some_csum_missing = 1;
1208                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1209                         if (found > 0)
1210                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1211                 }
1212         }
1213         return 0;
1214 }
1215
1216 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1217                             struct walk_control *wc)
1218 {
1219         struct btrfs_key key;
1220         u32 nritems;
1221         int i;
1222         int ret = 0;
1223         int error = 0;
1224         struct cache_tree *inode_cache;
1225         struct shared_node *active_node;
1226
1227         if (wc->root_level == wc->active_node &&
1228             btrfs_root_refs(&root->root_item) == 0)
1229                 return 0;
1230
1231         active_node = wc->nodes[wc->active_node];
1232         inode_cache = &active_node->inode_cache;
1233         nritems = btrfs_header_nritems(eb);
1234         for (i = 0; i < nritems; i++) {
1235                 btrfs_item_key_to_cpu(eb, &key, i);
1236
1237                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1238                         continue;
1239                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1240                         continue;
1241
1242                 if (active_node->current == NULL ||
1243                     active_node->current->ino < key.objectid) {
1244                         if (active_node->current) {
1245                                 active_node->current->checked = 1;
1246                                 maybe_free_inode_rec(inode_cache,
1247                                                      active_node->current);
1248                         }
1249                         active_node->current = get_inode_rec(inode_cache,
1250                                                              key.objectid, 1);
1251                 }
1252                 switch (key.type) {
1253                 case BTRFS_DIR_ITEM_KEY:
1254                 case BTRFS_DIR_INDEX_KEY:
1255                         ret = process_dir_item(root, eb, i, &key, active_node);
1256                         break;
1257                 case BTRFS_INODE_REF_KEY:
1258                         ret = process_inode_ref(eb, i, &key, active_node);
1259                         break;
1260                 case BTRFS_INODE_EXTREF_KEY:
1261                         ret = process_inode_extref(eb, i, &key, active_node);
1262                         break;
1263                 case BTRFS_INODE_ITEM_KEY:
1264                         ret = process_inode_item(eb, i, &key, active_node);
1265                         break;
1266                 case BTRFS_EXTENT_DATA_KEY:
1267                         ret = process_file_extent(root, eb, i, &key,
1268                                                   active_node);
1269                         break;
1270                 default:
1271                         break;
1272                 };
1273                 if (ret != 0)
1274                         error = 1;
1275         }
1276         return error;
1277 }
1278
1279 static void reada_walk_down(struct btrfs_root *root,
1280                             struct extent_buffer *node, int slot)
1281 {
1282         u64 bytenr;
1283         u64 ptr_gen;
1284         u32 nritems;
1285         u32 blocksize;
1286         int i;
1287         int ret;
1288         int level;
1289
1290         level = btrfs_header_level(node);
1291         if (level != 1)
1292                 return;
1293
1294         nritems = btrfs_header_nritems(node);
1295         blocksize = btrfs_level_size(root, level - 1);
1296         for (i = slot; i < nritems; i++) {
1297                 bytenr = btrfs_node_blockptr(node, i);
1298                 ptr_gen = btrfs_node_ptr_generation(node, i);
1299                 ret = readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1300                 if (ret)
1301                         break;
1302         }
1303 }
1304
1305 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1306                           struct walk_control *wc, int *level)
1307 {
1308         u64 bytenr;
1309         u64 ptr_gen;
1310         struct extent_buffer *next;
1311         struct extent_buffer *cur;
1312         u32 blocksize;
1313         int ret, err = 0;
1314         u64 refs;
1315
1316         WARN_ON(*level < 0);
1317         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1318         ret = btrfs_lookup_extent_info(NULL, root,
1319                                        path->nodes[*level]->start,
1320                                        *level, 1, &refs, NULL);
1321         if (ret < 0) {
1322                 err = ret;
1323                 goto out;
1324         }
1325
1326         if (refs > 1) {
1327                 ret = enter_shared_node(root, path->nodes[*level]->start,
1328                                         refs, wc, *level);
1329                 if (ret > 0) {
1330                         err = ret;
1331                         goto out;
1332                 }
1333         }
1334
1335         while (*level >= 0) {
1336                 WARN_ON(*level < 0);
1337                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1338                 cur = path->nodes[*level];
1339
1340                 if (btrfs_header_level(cur) != *level)
1341                         WARN_ON(1);
1342
1343                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1344                         break;
1345                 if (*level == 0) {
1346                         ret = process_one_leaf(root, cur, wc);
1347                         break;
1348                 }
1349                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1350                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1351                 blocksize = btrfs_level_size(root, *level - 1);
1352                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1353                                                1, &refs, NULL);
1354                 if (ret < 0)
1355                         refs = 0;
1356
1357                 if (refs > 1) {
1358                         ret = enter_shared_node(root, bytenr, refs,
1359                                                 wc, *level - 1);
1360                         if (ret > 0) {
1361                                 path->slots[*level]++;
1362                                 continue;
1363                         }
1364                 }
1365
1366                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1367                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1368                         free_extent_buffer(next);
1369                         reada_walk_down(root, cur, path->slots[*level]);
1370                         next = read_tree_block(root, bytenr, blocksize,
1371                                                ptr_gen);
1372                         if (!next) {
1373                                 err = -EIO;
1374                                 goto out;
1375                         }
1376                 }
1377
1378                 *level = *level - 1;
1379                 free_extent_buffer(path->nodes[*level]);
1380                 path->nodes[*level] = next;
1381                 path->slots[*level] = 0;
1382         }
1383 out:
1384         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1385         return err;
1386 }
1387
1388 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1389                         struct walk_control *wc, int *level)
1390 {
1391         int i;
1392         struct extent_buffer *leaf;
1393
1394         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1395                 leaf = path->nodes[i];
1396                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1397                         path->slots[i]++;
1398                         *level = i;
1399                         return 0;
1400                 } else {
1401                         free_extent_buffer(path->nodes[*level]);
1402                         path->nodes[*level] = NULL;
1403                         BUG_ON(*level > wc->active_node);
1404                         if (*level == wc->active_node)
1405                                 leave_shared_node(root, wc, *level);
1406                         *level = i + 1;
1407                 }
1408         }
1409         return 1;
1410 }
1411
1412 static int check_root_dir(struct inode_record *rec)
1413 {
1414         struct inode_backref *backref;
1415         int ret = -1;
1416
1417         if (!rec->found_inode_item || rec->errors)
1418                 goto out;
1419         if (rec->nlink != 1 || rec->found_link != 0)
1420                 goto out;
1421         if (list_empty(&rec->backrefs))
1422                 goto out;
1423         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1424         if (!backref->found_inode_ref)
1425                 goto out;
1426         if (backref->index != 0 || backref->namelen != 2 ||
1427             memcmp(backref->name, "..", 2))
1428                 goto out;
1429         if (backref->found_dir_index || backref->found_dir_item)
1430                 goto out;
1431         ret = 0;
1432 out:
1433         return ret;
1434 }
1435
1436 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1437                               struct btrfs_root *root, struct btrfs_path *path,
1438                               struct inode_record *rec)
1439 {
1440         struct btrfs_inode_item *ei;
1441         struct btrfs_key key;
1442         int ret;
1443
1444         key.objectid = rec->ino;
1445         key.type = BTRFS_INODE_ITEM_KEY;
1446         key.offset = (u64)-1;
1447
1448         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1449         if (ret < 0)
1450                 goto out;
1451         if (ret) {
1452                 if (!path->slots[0]) {
1453                         ret = -ENOENT;
1454                         goto out;
1455                 }
1456                 path->slots[0]--;
1457                 ret = 0;
1458         }
1459         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1460         if (key.objectid != rec->ino) {
1461                 ret = -ENOENT;
1462                 goto out;
1463         }
1464
1465         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1466                             struct btrfs_inode_item);
1467         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1468         btrfs_mark_buffer_dirty(path->nodes[0]);
1469         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1470         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1471                root->root_key.objectid);
1472 out:
1473         btrfs_release_path(path);
1474         return ret;
1475 }
1476
1477 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1478                                     struct btrfs_root *root,
1479                                     struct btrfs_path *path,
1480                                     struct inode_record *rec)
1481 {
1482         struct btrfs_key key;
1483         int ret;
1484
1485         key.objectid = BTRFS_ORPHAN_OBJECTID;
1486         key.type = BTRFS_ORPHAN_ITEM_KEY;
1487         key.offset = rec->ino;
1488
1489         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1490         btrfs_release_path(path);
1491         if (!ret)
1492                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1493         return ret;
1494 }
1495
1496 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1497 {
1498         struct btrfs_trans_handle *trans;
1499         struct btrfs_path *path;
1500         int ret = 0;
1501
1502         /* So far we just fix dir isize wrong */
1503         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1504                 return 1;
1505
1506         path = btrfs_alloc_path();
1507         if (!path)
1508                 return -ENOMEM;
1509
1510         trans = btrfs_start_transaction(root, 1);
1511         if (IS_ERR(trans)) {
1512                 btrfs_free_path(path);
1513                 return PTR_ERR(trans);
1514         }
1515
1516         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1517                 ret = repair_inode_isize(trans, root, path, rec);
1518         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1519                 ret = repair_inode_orphan_item(trans, root, path, rec);
1520         btrfs_commit_transaction(trans, root);
1521         btrfs_free_path(path);
1522         return ret;
1523 }
1524
1525 static int check_inode_recs(struct btrfs_root *root,
1526                             struct cache_tree *inode_cache)
1527 {
1528         struct cache_extent *cache;
1529         struct ptr_node *node;
1530         struct inode_record *rec;
1531         struct inode_backref *backref;
1532         int ret;
1533         u64 error = 0;
1534         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1535
1536         if (btrfs_root_refs(&root->root_item) == 0) {
1537                 if (!cache_tree_empty(inode_cache))
1538                         fprintf(stderr, "warning line %d\n", __LINE__);
1539                 return 0;
1540         }
1541
1542         rec = get_inode_rec(inode_cache, root_dirid, 0);
1543         if (rec) {
1544                 ret = check_root_dir(rec);
1545                 if (ret) {
1546                         fprintf(stderr, "root %llu root dir %llu error\n",
1547                                 (unsigned long long)root->root_key.objectid,
1548                                 (unsigned long long)root_dirid);
1549                         error++;
1550                 }
1551         } else {
1552                 fprintf(stderr, "root %llu root dir %llu not found\n",
1553                         (unsigned long long)root->root_key.objectid,
1554                         (unsigned long long)root_dirid);
1555         }
1556
1557         while (1) {
1558                 cache = search_cache_extent(inode_cache, 0);
1559                 if (!cache)
1560                         break;
1561                 node = container_of(cache, struct ptr_node, cache);
1562                 rec = node->data;
1563                 remove_cache_extent(inode_cache, &node->cache);
1564                 free(node);
1565                 if (rec->ino == root_dirid ||
1566                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1567                         free_inode_rec(rec);
1568                         continue;
1569                 }
1570
1571                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1572                         ret = check_orphan_item(root, rec->ino);
1573                         if (ret == 0)
1574                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1575                         if (can_free_inode_rec(rec)) {
1576                                 free_inode_rec(rec);
1577                                 continue;
1578                         }
1579                 }
1580
1581                 if (repair) {
1582                         ret = try_repair_inode(root, rec);
1583                         if (ret == 0 && can_free_inode_rec(rec)) {
1584                                 free_inode_rec(rec);
1585                                 continue;
1586                         }
1587                         ret = 0;
1588                 }
1589
1590                 error++;
1591                 if (!rec->found_inode_item)
1592                         rec->errors |= I_ERR_NO_INODE_ITEM;
1593                 if (rec->found_link != rec->nlink)
1594                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1595                 fprintf(stderr, "root %llu inode %llu errors %x",
1596                         (unsigned long long) root->root_key.objectid,
1597                         (unsigned long long) rec->ino, rec->errors);
1598                 print_inode_error(rec->errors);
1599                 list_for_each_entry(backref, &rec->backrefs, list) {
1600                         if (!backref->found_dir_item)
1601                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1602                         if (!backref->found_dir_index)
1603                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1604                         if (!backref->found_inode_ref)
1605                                 backref->errors |= REF_ERR_NO_INODE_REF;
1606                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1607                                 " namelen %u name %s filetype %d error %x",
1608                                 (unsigned long long)backref->dir,
1609                                 (unsigned long long)backref->index,
1610                                 backref->namelen, backref->name,
1611                                 backref->filetype, backref->errors);
1612                         print_ref_error(backref->errors);
1613                 }
1614                 free_inode_rec(rec);
1615         }
1616         return (error > 0) ? -1 : 0;
1617 }
1618
1619 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1620                                         u64 objectid)
1621 {
1622         struct cache_extent *cache;
1623         struct root_record *rec = NULL;
1624         int ret;
1625
1626         cache = lookup_cache_extent(root_cache, objectid, 1);
1627         if (cache) {
1628                 rec = container_of(cache, struct root_record, cache);
1629         } else {
1630                 rec = calloc(1, sizeof(*rec));
1631                 rec->objectid = objectid;
1632                 INIT_LIST_HEAD(&rec->backrefs);
1633                 rec->cache.start = objectid;
1634                 rec->cache.size = 1;
1635
1636                 ret = insert_cache_extent(root_cache, &rec->cache);
1637                 BUG_ON(ret);
1638         }
1639         return rec;
1640 }
1641
1642 static struct root_backref *get_root_backref(struct root_record *rec,
1643                                              u64 ref_root, u64 dir, u64 index,
1644                                              const char *name, int namelen)
1645 {
1646         struct root_backref *backref;
1647
1648         list_for_each_entry(backref, &rec->backrefs, list) {
1649                 if (backref->ref_root != ref_root || backref->dir != dir ||
1650                     backref->namelen != namelen)
1651                         continue;
1652                 if (memcmp(name, backref->name, namelen))
1653                         continue;
1654                 return backref;
1655         }
1656
1657         backref = malloc(sizeof(*backref) + namelen + 1);
1658         memset(backref, 0, sizeof(*backref));
1659         backref->ref_root = ref_root;
1660         backref->dir = dir;
1661         backref->index = index;
1662         backref->namelen = namelen;
1663         memcpy(backref->name, name, namelen);
1664         backref->name[namelen] = '\0';
1665         list_add_tail(&backref->list, &rec->backrefs);
1666         return backref;
1667 }
1668
1669 static void free_root_record(struct cache_extent *cache)
1670 {
1671         struct root_record *rec;
1672         struct root_backref *backref;
1673
1674         rec = container_of(cache, struct root_record, cache);
1675         while (!list_empty(&rec->backrefs)) {
1676                 backref = list_entry(rec->backrefs.next,
1677                                      struct root_backref, list);
1678                 list_del(&backref->list);
1679                 free(backref);
1680         }
1681
1682         kfree(rec);
1683 }
1684
1685 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1686
1687 static int add_root_backref(struct cache_tree *root_cache,
1688                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1689                             const char *name, int namelen,
1690                             int item_type, int errors)
1691 {
1692         struct root_record *rec;
1693         struct root_backref *backref;
1694
1695         rec = get_root_rec(root_cache, root_id);
1696         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1697
1698         backref->errors |= errors;
1699
1700         if (item_type != BTRFS_DIR_ITEM_KEY) {
1701                 if (backref->found_dir_index || backref->found_back_ref ||
1702                     backref->found_forward_ref) {
1703                         if (backref->index != index)
1704                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1705                 } else {
1706                         backref->index = index;
1707                 }
1708         }
1709
1710         if (item_type == BTRFS_DIR_ITEM_KEY) {
1711                 if (backref->found_forward_ref)
1712                         rec->found_ref++;
1713                 backref->found_dir_item = 1;
1714         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1715                 backref->found_dir_index = 1;
1716         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1717                 if (backref->found_forward_ref)
1718                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1719                 else if (backref->found_dir_item)
1720                         rec->found_ref++;
1721                 backref->found_forward_ref = 1;
1722         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1723                 if (backref->found_back_ref)
1724                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1725                 backref->found_back_ref = 1;
1726         } else {
1727                 BUG_ON(1);
1728         }
1729
1730         if (backref->found_forward_ref && backref->found_dir_item)
1731                 backref->reachable = 1;
1732         return 0;
1733 }
1734
1735 static int merge_root_recs(struct btrfs_root *root,
1736                            struct cache_tree *src_cache,
1737                            struct cache_tree *dst_cache)
1738 {
1739         struct cache_extent *cache;
1740         struct ptr_node *node;
1741         struct inode_record *rec;
1742         struct inode_backref *backref;
1743
1744         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1745                 free_inode_recs_tree(src_cache);
1746                 return 0;
1747         }
1748
1749         while (1) {
1750                 cache = search_cache_extent(src_cache, 0);
1751                 if (!cache)
1752                         break;
1753                 node = container_of(cache, struct ptr_node, cache);
1754                 rec = node->data;
1755                 remove_cache_extent(src_cache, &node->cache);
1756                 free(node);
1757
1758                 if (!is_child_root(root, root->objectid, rec->ino))
1759                         goto skip;
1760
1761                 list_for_each_entry(backref, &rec->backrefs, list) {
1762                         BUG_ON(backref->found_inode_ref);
1763                         if (backref->found_dir_item)
1764                                 add_root_backref(dst_cache, rec->ino,
1765                                         root->root_key.objectid, backref->dir,
1766                                         backref->index, backref->name,
1767                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1768                                         backref->errors);
1769                         if (backref->found_dir_index)
1770                                 add_root_backref(dst_cache, rec->ino,
1771                                         root->root_key.objectid, backref->dir,
1772                                         backref->index, backref->name,
1773                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1774                                         backref->errors);
1775                 }
1776 skip:
1777                 free_inode_rec(rec);
1778         }
1779         return 0;
1780 }
1781
1782 static int check_root_refs(struct btrfs_root *root,
1783                            struct cache_tree *root_cache)
1784 {
1785         struct root_record *rec;
1786         struct root_record *ref_root;
1787         struct root_backref *backref;
1788         struct cache_extent *cache;
1789         int loop = 1;
1790         int ret;
1791         int error;
1792         int errors = 0;
1793
1794         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1795         rec->found_ref = 1;
1796
1797         /* fixme: this can not detect circular references */
1798         while (loop) {
1799                 loop = 0;
1800                 cache = search_cache_extent(root_cache, 0);
1801                 while (1) {
1802                         if (!cache)
1803                                 break;
1804                         rec = container_of(cache, struct root_record, cache);
1805                         cache = next_cache_extent(cache);
1806
1807                         if (rec->found_ref == 0)
1808                                 continue;
1809
1810                         list_for_each_entry(backref, &rec->backrefs, list) {
1811                                 if (!backref->reachable)
1812                                         continue;
1813
1814                                 ref_root = get_root_rec(root_cache,
1815                                                         backref->ref_root);
1816                                 if (ref_root->found_ref > 0)
1817                                         continue;
1818
1819                                 backref->reachable = 0;
1820                                 rec->found_ref--;
1821                                 if (rec->found_ref == 0)
1822                                         loop = 1;
1823                         }
1824                 }
1825         }
1826
1827         cache = search_cache_extent(root_cache, 0);
1828         while (1) {
1829                 if (!cache)
1830                         break;
1831                 rec = container_of(cache, struct root_record, cache);
1832                 cache = next_cache_extent(cache);
1833
1834                 if (rec->found_ref == 0 &&
1835                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1836                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1837                         ret = check_orphan_item(root->fs_info->tree_root,
1838                                                 rec->objectid);
1839                         if (ret == 0)
1840                                 continue;
1841
1842                         /*
1843                          * If we don't have a root item then we likely just have
1844                          * a dir item in a snapshot for this root but no actual
1845                          * ref key or anything so it's meaningless.
1846                          */
1847                         if (!rec->found_root_item)
1848                                 continue;
1849                         errors++;
1850                         fprintf(stderr, "fs tree %llu not referenced\n",
1851                                 (unsigned long long)rec->objectid);
1852                 }
1853
1854                 error = 0;
1855                 if (rec->found_ref > 0 && !rec->found_root_item)
1856                         error = 1;
1857                 list_for_each_entry(backref, &rec->backrefs, list) {
1858                         if (!backref->found_dir_item)
1859                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1860                         if (!backref->found_dir_index)
1861                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1862                         if (!backref->found_back_ref)
1863                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1864                         if (!backref->found_forward_ref)
1865                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1866                         if (backref->reachable && backref->errors)
1867                                 error = 1;
1868                 }
1869                 if (!error)
1870                         continue;
1871
1872                 errors++;
1873                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1874                         (unsigned long long)rec->objectid, rec->found_ref,
1875                          rec->found_root_item ? "" : "not found");
1876
1877                 list_for_each_entry(backref, &rec->backrefs, list) {
1878                         if (!backref->reachable)
1879                                 continue;
1880                         if (!backref->errors && rec->found_root_item)
1881                                 continue;
1882                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1883                                 " index %llu namelen %u name %s error %x\n",
1884                                 (unsigned long long)backref->ref_root,
1885                                 (unsigned long long)backref->dir,
1886                                 (unsigned long long)backref->index,
1887                                 backref->namelen, backref->name,
1888                                 backref->errors);
1889                 }
1890         }
1891         return errors > 0 ? 1 : 0;
1892 }
1893
1894 static int process_root_ref(struct extent_buffer *eb, int slot,
1895                             struct btrfs_key *key,
1896                             struct cache_tree *root_cache)
1897 {
1898         u64 dirid;
1899         u64 index;
1900         u32 len;
1901         u32 name_len;
1902         struct btrfs_root_ref *ref;
1903         char namebuf[BTRFS_NAME_LEN];
1904         int error;
1905
1906         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1907
1908         dirid = btrfs_root_ref_dirid(eb, ref);
1909         index = btrfs_root_ref_sequence(eb, ref);
1910         name_len = btrfs_root_ref_name_len(eb, ref);
1911
1912         if (name_len <= BTRFS_NAME_LEN) {
1913                 len = name_len;
1914                 error = 0;
1915         } else {
1916                 len = BTRFS_NAME_LEN;
1917                 error = REF_ERR_NAME_TOO_LONG;
1918         }
1919         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1920
1921         if (key->type == BTRFS_ROOT_REF_KEY) {
1922                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1923                                  index, namebuf, len, key->type, error);
1924         } else {
1925                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1926                                  index, namebuf, len, key->type, error);
1927         }
1928         return 0;
1929 }
1930
1931 static int check_fs_root(struct btrfs_root *root,
1932                          struct cache_tree *root_cache,
1933                          struct walk_control *wc)
1934 {
1935         int ret = 0;
1936         int wret;
1937         int level;
1938         struct btrfs_path path;
1939         struct shared_node root_node;
1940         struct root_record *rec;
1941         struct btrfs_root_item *root_item = &root->root_item;
1942
1943         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1944                 rec = get_root_rec(root_cache, root->root_key.objectid);
1945                 if (btrfs_root_refs(root_item) > 0)
1946                         rec->found_root_item = 1;
1947         }
1948
1949         btrfs_init_path(&path);
1950         memset(&root_node, 0, sizeof(root_node));
1951         cache_tree_init(&root_node.root_cache);
1952         cache_tree_init(&root_node.inode_cache);
1953
1954         level = btrfs_header_level(root->node);
1955         memset(wc->nodes, 0, sizeof(wc->nodes));
1956         wc->nodes[level] = &root_node;
1957         wc->active_node = level;
1958         wc->root_level = level;
1959
1960         if (btrfs_root_refs(root_item) > 0 ||
1961             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1962                 path.nodes[level] = root->node;
1963                 extent_buffer_get(root->node);
1964                 path.slots[level] = 0;
1965         } else {
1966                 struct btrfs_key key;
1967                 struct btrfs_disk_key found_key;
1968
1969                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1970                 level = root_item->drop_level;
1971                 path.lowest_level = level;
1972                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1973                 BUG_ON(wret < 0);
1974                 btrfs_node_key(path.nodes[level], &found_key,
1975                                 path.slots[level]);
1976                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1977                                         sizeof(found_key)));
1978         }
1979
1980         while (1) {
1981                 wret = walk_down_tree(root, &path, wc, &level);
1982                 if (wret < 0)
1983                         ret = wret;
1984                 if (wret != 0)
1985                         break;
1986
1987                 wret = walk_up_tree(root, &path, wc, &level);
1988                 if (wret < 0)
1989                         ret = wret;
1990                 if (wret != 0)
1991                         break;
1992         }
1993         btrfs_release_path(&path);
1994
1995         merge_root_recs(root, &root_node.root_cache, root_cache);
1996
1997         if (root_node.current) {
1998                 root_node.current->checked = 1;
1999                 maybe_free_inode_rec(&root_node.inode_cache,
2000                                 root_node.current);
2001         }
2002
2003         ret = check_inode_recs(root, &root_node.inode_cache);
2004         return ret;
2005 }
2006
2007 static int fs_root_objectid(u64 objectid)
2008 {
2009         if (objectid == BTRFS_FS_TREE_OBJECTID ||
2010             objectid == BTRFS_TREE_RELOC_OBJECTID ||
2011             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
2012             (objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2013              objectid <= BTRFS_LAST_FREE_OBJECTID))
2014                 return 1;
2015         return 0;
2016 }
2017
2018 static int check_fs_roots(struct btrfs_root *root,
2019                           struct cache_tree *root_cache)
2020 {
2021         struct btrfs_path path;
2022         struct btrfs_key key;
2023         struct walk_control wc;
2024         struct extent_buffer *leaf;
2025         struct btrfs_root *tmp_root;
2026         struct btrfs_root *tree_root = root->fs_info->tree_root;
2027         int ret;
2028         int err = 0;
2029
2030         /*
2031          * Just in case we made any changes to the extent tree that weren't
2032          * reflected into the free space cache yet.
2033          */
2034         if (repair)
2035                 reset_cached_block_groups(root->fs_info);
2036         memset(&wc, 0, sizeof(wc));
2037         cache_tree_init(&wc.shared);
2038         btrfs_init_path(&path);
2039
2040         key.offset = 0;
2041         key.objectid = 0;
2042         key.type = BTRFS_ROOT_ITEM_KEY;
2043         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2044         BUG_ON(ret < 0);
2045         while (1) {
2046                 leaf = path.nodes[0];
2047                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2048                         ret = btrfs_next_leaf(tree_root, &path);
2049                         if (ret != 0)
2050                                 break;
2051                         leaf = path.nodes[0];
2052                 }
2053                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2054                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2055                     fs_root_objectid(key.objectid)) {
2056                         key.offset = (u64)-1;
2057                         tmp_root = btrfs_read_fs_root(root->fs_info, &key);
2058                         if (IS_ERR(tmp_root)) {
2059                                 err = 1;
2060                                 goto next;
2061                         }
2062                         ret = check_fs_root(tmp_root, root_cache, &wc);
2063                         if (ret)
2064                                 err = 1;
2065                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2066                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2067                         process_root_ref(leaf, path.slots[0], &key,
2068                                          root_cache);
2069                 }
2070 next:
2071                 path.slots[0]++;
2072         }
2073         btrfs_release_path(&path);
2074
2075         if (!cache_tree_empty(&wc.shared))
2076                 fprintf(stderr, "warning line %d\n", __LINE__);
2077
2078         return err;
2079 }
2080
2081 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2082 {
2083         struct list_head *cur = rec->backrefs.next;
2084         struct extent_backref *back;
2085         struct tree_backref *tback;
2086         struct data_backref *dback;
2087         u64 found = 0;
2088         int err = 0;
2089
2090         while(cur != &rec->backrefs) {
2091                 back = list_entry(cur, struct extent_backref, list);
2092                 cur = cur->next;
2093                 if (!back->found_extent_tree) {
2094                         err = 1;
2095                         if (!print_errs)
2096                                 goto out;
2097                         if (back->is_data) {
2098                                 dback = (struct data_backref *)back;
2099                                 fprintf(stderr, "Backref %llu %s %llu"
2100                                         " owner %llu offset %llu num_refs %lu"
2101                                         " not found in extent tree\n",
2102                                         (unsigned long long)rec->start,
2103                                         back->full_backref ?
2104                                         "parent" : "root",
2105                                         back->full_backref ?
2106                                         (unsigned long long)dback->parent:
2107                                         (unsigned long long)dback->root,
2108                                         (unsigned long long)dback->owner,
2109                                         (unsigned long long)dback->offset,
2110                                         (unsigned long)dback->num_refs);
2111                         } else {
2112                                 tback = (struct tree_backref *)back;
2113                                 fprintf(stderr, "Backref %llu parent %llu"
2114                                         " root %llu not found in extent tree\n",
2115                                         (unsigned long long)rec->start,
2116                                         (unsigned long long)tback->parent,
2117                                         (unsigned long long)tback->root);
2118                         }
2119                 }
2120                 if (!back->is_data && !back->found_ref) {
2121                         err = 1;
2122                         if (!print_errs)
2123                                 goto out;
2124                         tback = (struct tree_backref *)back;
2125                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2126                                 (unsigned long long)rec->start,
2127                                 back->full_backref ? "parent" : "root",
2128                                 back->full_backref ?
2129                                 (unsigned long long)tback->parent :
2130                                 (unsigned long long)tback->root, back);
2131                 }
2132                 if (back->is_data) {
2133                         dback = (struct data_backref *)back;
2134                         if (dback->found_ref != dback->num_refs) {
2135                                 err = 1;
2136                                 if (!print_errs)
2137                                         goto out;
2138                                 fprintf(stderr, "Incorrect local backref count"
2139                                         " on %llu %s %llu owner %llu"
2140                                         " offset %llu found %u wanted %u back %p\n",
2141                                         (unsigned long long)rec->start,
2142                                         back->full_backref ?
2143                                         "parent" : "root",
2144                                         back->full_backref ?
2145                                         (unsigned long long)dback->parent:
2146                                         (unsigned long long)dback->root,
2147                                         (unsigned long long)dback->owner,
2148                                         (unsigned long long)dback->offset,
2149                                         dback->found_ref, dback->num_refs, back);
2150                         }
2151                         if (dback->disk_bytenr != rec->start) {
2152                                 err = 1;
2153                                 if (!print_errs)
2154                                         goto out;
2155                                 fprintf(stderr, "Backref disk bytenr does not"
2156                                         " match extent record, bytenr=%llu, "
2157                                         "ref bytenr=%llu\n",
2158                                         (unsigned long long)rec->start,
2159                                         (unsigned long long)dback->disk_bytenr);
2160                         }
2161
2162                         if (dback->bytes != rec->nr) {
2163                                 err = 1;
2164                                 if (!print_errs)
2165                                         goto out;
2166                                 fprintf(stderr, "Backref bytes do not match "
2167                                         "extent backref, bytenr=%llu, ref "
2168                                         "bytes=%llu, backref bytes=%llu\n",
2169                                         (unsigned long long)rec->start,
2170                                         (unsigned long long)rec->nr,
2171                                         (unsigned long long)dback->bytes);
2172                         }
2173                 }
2174                 if (!back->is_data) {
2175                         found += 1;
2176                 } else {
2177                         dback = (struct data_backref *)back;
2178                         found += dback->found_ref;
2179                 }
2180         }
2181         if (found != rec->refs) {
2182                 err = 1;
2183                 if (!print_errs)
2184                         goto out;
2185                 fprintf(stderr, "Incorrect global backref count "
2186                         "on %llu found %llu wanted %llu\n",
2187                         (unsigned long long)rec->start,
2188                         (unsigned long long)found,
2189                         (unsigned long long)rec->refs);
2190         }
2191 out:
2192         return err;
2193 }
2194
2195 static int free_all_extent_backrefs(struct extent_record *rec)
2196 {
2197         struct extent_backref *back;
2198         struct list_head *cur;
2199         while (!list_empty(&rec->backrefs)) {
2200                 cur = rec->backrefs.next;
2201                 back = list_entry(cur, struct extent_backref, list);
2202                 list_del(cur);
2203                 free(back);
2204         }
2205         return 0;
2206 }
2207
2208 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2209                                      struct cache_tree *extent_cache)
2210 {
2211         struct cache_extent *cache;
2212         struct extent_record *rec;
2213
2214         while (1) {
2215                 cache = first_cache_extent(extent_cache);
2216                 if (!cache)
2217                         break;
2218                 rec = container_of(cache, struct extent_record, cache);
2219                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2220                 remove_cache_extent(extent_cache, cache);
2221                 free_all_extent_backrefs(rec);
2222                 free(rec);
2223         }
2224 }
2225
2226 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2227                                  struct extent_record *rec)
2228 {
2229         if (rec->content_checked && rec->owner_ref_checked &&
2230             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2231             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2232                 remove_cache_extent(extent_cache, &rec->cache);
2233                 free_all_extent_backrefs(rec);
2234                 list_del_init(&rec->list);
2235                 free(rec);
2236         }
2237         return 0;
2238 }
2239
2240 static int check_owner_ref(struct btrfs_root *root,
2241                             struct extent_record *rec,
2242                             struct extent_buffer *buf)
2243 {
2244         struct extent_backref *node;
2245         struct tree_backref *back;
2246         struct btrfs_root *ref_root;
2247         struct btrfs_key key;
2248         struct btrfs_path path;
2249         struct extent_buffer *parent;
2250         int level;
2251         int found = 0;
2252         int ret;
2253
2254         list_for_each_entry(node, &rec->backrefs, list) {
2255                 if (node->is_data)
2256                         continue;
2257                 if (!node->found_ref)
2258                         continue;
2259                 if (node->full_backref)
2260                         continue;
2261                 back = (struct tree_backref *)node;
2262                 if (btrfs_header_owner(buf) == back->root)
2263                         return 0;
2264         }
2265         BUG_ON(rec->is_root);
2266
2267         /* try to find the block by search corresponding fs tree */
2268         key.objectid = btrfs_header_owner(buf);
2269         key.type = BTRFS_ROOT_ITEM_KEY;
2270         key.offset = (u64)-1;
2271
2272         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2273         if (IS_ERR(ref_root))
2274                 return 1;
2275
2276         level = btrfs_header_level(buf);
2277         if (level == 0)
2278                 btrfs_item_key_to_cpu(buf, &key, 0);
2279         else
2280                 btrfs_node_key_to_cpu(buf, &key, 0);
2281
2282         btrfs_init_path(&path);
2283         path.lowest_level = level + 1;
2284         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2285         if (ret < 0)
2286                 return 0;
2287
2288         parent = path.nodes[level + 1];
2289         if (parent && buf->start == btrfs_node_blockptr(parent,
2290                                                         path.slots[level + 1]))
2291                 found = 1;
2292
2293         btrfs_release_path(&path);
2294         return found ? 0 : 1;
2295 }
2296
2297 static int is_extent_tree_record(struct extent_record *rec)
2298 {
2299         struct list_head *cur = rec->backrefs.next;
2300         struct extent_backref *node;
2301         struct tree_backref *back;
2302         int is_extent = 0;
2303
2304         while(cur != &rec->backrefs) {
2305                 node = list_entry(cur, struct extent_backref, list);
2306                 cur = cur->next;
2307                 if (node->is_data)
2308                         return 0;
2309                 back = (struct tree_backref *)node;
2310                 if (node->full_backref)
2311                         return 0;
2312                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2313                         is_extent = 1;
2314         }
2315         return is_extent;
2316 }
2317
2318
2319 static int record_bad_block_io(struct btrfs_fs_info *info,
2320                                struct cache_tree *extent_cache,
2321                                u64 start, u64 len)
2322 {
2323         struct extent_record *rec;
2324         struct cache_extent *cache;
2325         struct btrfs_key key;
2326
2327         cache = lookup_cache_extent(extent_cache, start, len);
2328         if (!cache)
2329                 return 0;
2330
2331         rec = container_of(cache, struct extent_record, cache);
2332         if (!is_extent_tree_record(rec))
2333                 return 0;
2334
2335         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2336         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2337 }
2338
2339 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2340                        struct extent_buffer *buf, int slot)
2341 {
2342         if (btrfs_header_level(buf)) {
2343                 struct btrfs_key_ptr ptr1, ptr2;
2344
2345                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2346                                    sizeof(struct btrfs_key_ptr));
2347                 read_extent_buffer(buf, &ptr2,
2348                                    btrfs_node_key_ptr_offset(slot + 1),
2349                                    sizeof(struct btrfs_key_ptr));
2350                 write_extent_buffer(buf, &ptr1,
2351                                     btrfs_node_key_ptr_offset(slot + 1),
2352                                     sizeof(struct btrfs_key_ptr));
2353                 write_extent_buffer(buf, &ptr2,
2354                                     btrfs_node_key_ptr_offset(slot),
2355                                     sizeof(struct btrfs_key_ptr));
2356                 if (slot == 0) {
2357                         struct btrfs_disk_key key;
2358                         btrfs_node_key(buf, &key, 0);
2359                         btrfs_fixup_low_keys(root, path, &key,
2360                                              btrfs_header_level(buf) + 1);
2361                 }
2362         } else {
2363                 struct btrfs_item *item1, *item2;
2364                 struct btrfs_key k1, k2;
2365                 char *item1_data, *item2_data;
2366                 u32 item1_offset, item2_offset, item1_size, item2_size;
2367
2368                 item1 = btrfs_item_nr(slot);
2369                 item2 = btrfs_item_nr(slot + 1);
2370                 btrfs_item_key_to_cpu(buf, &k1, slot);
2371                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2372                 item1_offset = btrfs_item_offset(buf, item1);
2373                 item2_offset = btrfs_item_offset(buf, item2);
2374                 item1_size = btrfs_item_size(buf, item1);
2375                 item2_size = btrfs_item_size(buf, item2);
2376
2377                 item1_data = malloc(item1_size);
2378                 if (!item1_data)
2379                         return -ENOMEM;
2380                 item2_data = malloc(item2_size);
2381                 if (!item2_data) {
2382                         free(item2_data);
2383                         return -ENOMEM;
2384                 }
2385
2386                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2387                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2388
2389                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2390                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2391                 free(item1_data);
2392                 free(item2_data);
2393
2394                 btrfs_set_item_offset(buf, item1, item2_offset);
2395                 btrfs_set_item_offset(buf, item2, item1_offset);
2396                 btrfs_set_item_size(buf, item1, item2_size);
2397                 btrfs_set_item_size(buf, item2, item1_size);
2398
2399                 path->slots[0] = slot;
2400                 btrfs_set_item_key_unsafe(root, path, &k2);
2401                 path->slots[0] = slot + 1;
2402                 btrfs_set_item_key_unsafe(root, path, &k1);
2403         }
2404         return 0;
2405 }
2406
2407 /*
2408  * Attempt to fix basic block failures.  Currently we only handle bad key
2409  * orders, we will cycle through the keys and swap them if necessary.
2410  */
2411 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2412                                 struct btrfs_root *root,
2413                                 struct extent_buffer *buf,
2414                                 struct btrfs_disk_key *parent_key,
2415                                 enum btrfs_tree_block_status status)
2416 {
2417         struct btrfs_path *path;
2418         struct btrfs_key k1, k2;
2419         int i;
2420         int ret;
2421
2422         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2423                 return -EIO;
2424
2425         k1.objectid = btrfs_header_owner(buf);
2426         k1.type = BTRFS_ROOT_ITEM_KEY;
2427         k1.offset = (u64)-1;
2428
2429         root = btrfs_read_fs_root(root->fs_info, &k1);
2430         if (IS_ERR(root))
2431                 return -EIO;
2432
2433         path = btrfs_alloc_path();
2434         if (!path)
2435                 return -EIO;
2436
2437         path->lowest_level = btrfs_header_level(buf);
2438         path->skip_check_block = 1;
2439         if (btrfs_header_level(buf))
2440                 btrfs_node_key_to_cpu(buf, &k1, 0);
2441         else
2442                 btrfs_item_key_to_cpu(buf, &k1, 0);
2443
2444         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2445         if (ret) {
2446                 btrfs_free_path(path);
2447                 return -EIO;
2448         }
2449
2450         buf = path->nodes[0];
2451         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2452                 if (btrfs_header_level(buf)) {
2453                         btrfs_node_key_to_cpu(buf, &k1, i);
2454                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2455                 } else {
2456                         btrfs_item_key_to_cpu(buf, &k1, i);
2457                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2458                 }
2459                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2460                         continue;
2461                 ret = swap_values(root, path, buf, i);
2462                 if (ret)
2463                         break;
2464                 btrfs_mark_buffer_dirty(buf);
2465                 i = 0;
2466         }
2467
2468         btrfs_free_path(path);
2469         return ret;
2470 }
2471
2472 static int check_block(struct btrfs_trans_handle *trans,
2473                        struct btrfs_root *root,
2474                        struct cache_tree *extent_cache,
2475                        struct extent_buffer *buf, u64 flags)
2476 {
2477         struct extent_record *rec;
2478         struct cache_extent *cache;
2479         struct btrfs_key key;
2480         enum btrfs_tree_block_status status;
2481         int ret = 0;
2482         int level;
2483
2484         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2485         if (!cache)
2486                 return 1;
2487         rec = container_of(cache, struct extent_record, cache);
2488         rec->generation = btrfs_header_generation(buf);
2489
2490         level = btrfs_header_level(buf);
2491         if (btrfs_header_nritems(buf) > 0) {
2492
2493                 if (level == 0)
2494                         btrfs_item_key_to_cpu(buf, &key, 0);
2495                 else
2496                         btrfs_node_key_to_cpu(buf, &key, 0);
2497
2498                 rec->info_objectid = key.objectid;
2499         }
2500         rec->info_level = level;
2501
2502         if (btrfs_is_leaf(buf))
2503                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2504         else
2505                 status = btrfs_check_node(root, &rec->parent_key, buf);
2506
2507         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2508                 if (repair)
2509                         status = try_to_fix_bad_block(trans, root, buf,
2510                                                       &rec->parent_key,
2511                                                       status);
2512                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2513                         ret = -EIO;
2514                         fprintf(stderr, "bad block %llu\n",
2515                                 (unsigned long long)buf->start);
2516                 } else {
2517                         /*
2518                          * Signal to callers we need to start the scan over
2519                          * again since we'll have cow'ed blocks.
2520                          */
2521                         ret = -EAGAIN;
2522                 }
2523         } else {
2524                 rec->content_checked = 1;
2525                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2526                         rec->owner_ref_checked = 1;
2527                 else {
2528                         ret = check_owner_ref(root, rec, buf);
2529                         if (!ret)
2530                                 rec->owner_ref_checked = 1;
2531                 }
2532         }
2533         if (!ret)
2534                 maybe_free_extent_rec(extent_cache, rec);
2535         return ret;
2536 }
2537
2538 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2539                                                 u64 parent, u64 root)
2540 {
2541         struct list_head *cur = rec->backrefs.next;
2542         struct extent_backref *node;
2543         struct tree_backref *back;
2544
2545         while(cur != &rec->backrefs) {
2546                 node = list_entry(cur, struct extent_backref, list);
2547                 cur = cur->next;
2548                 if (node->is_data)
2549                         continue;
2550                 back = (struct tree_backref *)node;
2551                 if (parent > 0) {
2552                         if (!node->full_backref)
2553                                 continue;
2554                         if (parent == back->parent)
2555                                 return back;
2556                 } else {
2557                         if (node->full_backref)
2558                                 continue;
2559                         if (back->root == root)
2560                                 return back;
2561                 }
2562         }
2563         return NULL;
2564 }
2565
2566 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2567                                                 u64 parent, u64 root)
2568 {
2569         struct tree_backref *ref = malloc(sizeof(*ref));
2570         memset(&ref->node, 0, sizeof(ref->node));
2571         if (parent > 0) {
2572                 ref->parent = parent;
2573                 ref->node.full_backref = 1;
2574         } else {
2575                 ref->root = root;
2576                 ref->node.full_backref = 0;
2577         }
2578         list_add_tail(&ref->node.list, &rec->backrefs);
2579
2580         return ref;
2581 }
2582
2583 static struct data_backref *find_data_backref(struct extent_record *rec,
2584                                                 u64 parent, u64 root,
2585                                                 u64 owner, u64 offset,
2586                                                 int found_ref,
2587                                                 u64 disk_bytenr, u64 bytes)
2588 {
2589         struct list_head *cur = rec->backrefs.next;
2590         struct extent_backref *node;
2591         struct data_backref *back;
2592
2593         while(cur != &rec->backrefs) {
2594                 node = list_entry(cur, struct extent_backref, list);
2595                 cur = cur->next;
2596                 if (!node->is_data)
2597                         continue;
2598                 back = (struct data_backref *)node;
2599                 if (parent > 0) {
2600                         if (!node->full_backref)
2601                                 continue;
2602                         if (parent == back->parent)
2603                                 return back;
2604                 } else {
2605                         if (node->full_backref)
2606                                 continue;
2607                         if (back->root == root && back->owner == owner &&
2608                             back->offset == offset) {
2609                                 if (found_ref && node->found_ref &&
2610                                     (back->bytes != bytes ||
2611                                     back->disk_bytenr != disk_bytenr))
2612                                         continue;
2613                                 return back;
2614                         }
2615                 }
2616         }
2617         return NULL;
2618 }
2619
2620 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2621                                                 u64 parent, u64 root,
2622                                                 u64 owner, u64 offset,
2623                                                 u64 max_size)
2624 {
2625         struct data_backref *ref = malloc(sizeof(*ref));
2626         memset(&ref->node, 0, sizeof(ref->node));
2627         ref->node.is_data = 1;
2628
2629         if (parent > 0) {
2630                 ref->parent = parent;
2631                 ref->owner = 0;
2632                 ref->offset = 0;
2633                 ref->node.full_backref = 1;
2634         } else {
2635                 ref->root = root;
2636                 ref->owner = owner;
2637                 ref->offset = offset;
2638                 ref->node.full_backref = 0;
2639         }
2640         ref->bytes = max_size;
2641         ref->found_ref = 0;
2642         ref->num_refs = 0;
2643         list_add_tail(&ref->node.list, &rec->backrefs);
2644         if (max_size > rec->max_size)
2645                 rec->max_size = max_size;
2646         return ref;
2647 }
2648
2649 static int add_extent_rec(struct cache_tree *extent_cache,
2650                           struct btrfs_key *parent_key, u64 parent_gen,
2651                           u64 start, u64 nr, u64 extent_item_refs,
2652                           int is_root, int inc_ref, int set_checked,
2653                           int metadata, int extent_rec, u64 max_size)
2654 {
2655         struct extent_record *rec;
2656         struct cache_extent *cache;
2657         int ret = 0;
2658         int dup = 0;
2659
2660         cache = lookup_cache_extent(extent_cache, start, nr);
2661         if (cache) {
2662                 rec = container_of(cache, struct extent_record, cache);
2663                 if (inc_ref)
2664                         rec->refs++;
2665                 if (rec->nr == 1)
2666                         rec->nr = max(nr, max_size);
2667
2668                 /*
2669                  * We need to make sure to reset nr to whatever the extent
2670                  * record says was the real size, this way we can compare it to
2671                  * the backrefs.
2672                  */
2673                 if (extent_rec) {
2674                         if (start != rec->start || rec->found_rec) {
2675                                 struct extent_record *tmp;
2676
2677                                 dup = 1;
2678                                 if (list_empty(&rec->list))
2679                                         list_add_tail(&rec->list,
2680                                                       &duplicate_extents);
2681
2682                                 /*
2683                                  * We have to do this song and dance in case we
2684                                  * find an extent record that falls inside of
2685                                  * our current extent record but does not have
2686                                  * the same objectid.
2687                                  */
2688                                 tmp = malloc(sizeof(*tmp));
2689                                 if (!tmp)
2690                                         return -ENOMEM;
2691                                 tmp->start = start;
2692                                 tmp->max_size = max_size;
2693                                 tmp->nr = nr;
2694                                 tmp->found_rec = 1;
2695                                 tmp->metadata = metadata;
2696                                 tmp->extent_item_refs = extent_item_refs;
2697                                 INIT_LIST_HEAD(&tmp->list);
2698                                 list_add_tail(&tmp->list, &rec->dups);
2699                                 rec->num_duplicates++;
2700                         } else {
2701                                 rec->nr = nr;
2702                                 rec->found_rec = 1;
2703                         }
2704                 }
2705
2706                 if (extent_item_refs && !dup) {
2707                         if (rec->extent_item_refs) {
2708                                 fprintf(stderr, "block %llu rec "
2709                                         "extent_item_refs %llu, passed %llu\n",
2710                                         (unsigned long long)start,
2711                                         (unsigned long long)
2712                                                         rec->extent_item_refs,
2713                                         (unsigned long long)extent_item_refs);
2714                         }
2715                         rec->extent_item_refs = extent_item_refs;
2716                 }
2717                 if (is_root)
2718                         rec->is_root = 1;
2719                 if (set_checked) {
2720                         rec->content_checked = 1;
2721                         rec->owner_ref_checked = 1;
2722                 }
2723
2724                 if (parent_key)
2725                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2726                 if (parent_gen)
2727                         rec->parent_generation = parent_gen;
2728
2729                 if (rec->max_size < max_size)
2730                         rec->max_size = max_size;
2731
2732                 maybe_free_extent_rec(extent_cache, rec);
2733                 return ret;
2734         }
2735         rec = malloc(sizeof(*rec));
2736         rec->start = start;
2737         rec->max_size = max_size;
2738         rec->nr = max(nr, max_size);
2739         rec->found_rec = extent_rec;
2740         rec->content_checked = 0;
2741         rec->owner_ref_checked = 0;
2742         rec->num_duplicates = 0;
2743         rec->metadata = metadata;
2744         INIT_LIST_HEAD(&rec->backrefs);
2745         INIT_LIST_HEAD(&rec->dups);
2746         INIT_LIST_HEAD(&rec->list);
2747
2748         if (is_root)
2749                 rec->is_root = 1;
2750         else
2751                 rec->is_root = 0;
2752
2753         if (inc_ref)
2754                 rec->refs = 1;
2755         else
2756                 rec->refs = 0;
2757
2758         if (extent_item_refs)
2759                 rec->extent_item_refs = extent_item_refs;
2760         else
2761                 rec->extent_item_refs = 0;
2762
2763         if (parent_key)
2764                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2765         else
2766                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2767
2768         if (parent_gen)
2769                 rec->parent_generation = parent_gen;
2770         else
2771                 rec->parent_generation = 0;
2772
2773         rec->cache.start = start;
2774         rec->cache.size = nr;
2775         ret = insert_cache_extent(extent_cache, &rec->cache);
2776         BUG_ON(ret);
2777         bytes_used += nr;
2778         if (set_checked) {
2779                 rec->content_checked = 1;
2780                 rec->owner_ref_checked = 1;
2781         }
2782         return ret;
2783 }
2784
2785 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2786                             u64 parent, u64 root, int found_ref)
2787 {
2788         struct extent_record *rec;
2789         struct tree_backref *back;
2790         struct cache_extent *cache;
2791
2792         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2793         if (!cache) {
2794                 add_extent_rec(extent_cache, NULL, 0, bytenr,
2795                                1, 0, 0, 0, 0, 1, 0, 0);
2796                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2797                 if (!cache)
2798                         abort();
2799         }
2800
2801         rec = container_of(cache, struct extent_record, cache);
2802         if (rec->start != bytenr) {
2803                 abort();
2804         }
2805
2806         back = find_tree_backref(rec, parent, root);
2807         if (!back)
2808                 back = alloc_tree_backref(rec, parent, root);
2809
2810         if (found_ref) {
2811                 if (back->node.found_ref) {
2812                         fprintf(stderr, "Extent back ref already exists "
2813                                 "for %llu parent %llu root %llu \n",
2814                                 (unsigned long long)bytenr,
2815                                 (unsigned long long)parent,
2816                                 (unsigned long long)root);
2817                 }
2818                 back->node.found_ref = 1;
2819         } else {
2820                 if (back->node.found_extent_tree) {
2821                         fprintf(stderr, "Extent back ref already exists "
2822                                 "for %llu parent %llu root %llu \n",
2823                                 (unsigned long long)bytenr,
2824                                 (unsigned long long)parent,
2825                                 (unsigned long long)root);
2826                 }
2827                 back->node.found_extent_tree = 1;
2828         }
2829         return 0;
2830 }
2831
2832 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2833                             u64 parent, u64 root, u64 owner, u64 offset,
2834                             u32 num_refs, int found_ref, u64 max_size)
2835 {
2836         struct extent_record *rec;
2837         struct data_backref *back;
2838         struct cache_extent *cache;
2839
2840         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2841         if (!cache) {
2842                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
2843                                0, 0, max_size);
2844                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2845                 if (!cache)
2846                         abort();
2847         }
2848
2849         rec = container_of(cache, struct extent_record, cache);
2850         if (rec->max_size < max_size)
2851                 rec->max_size = max_size;
2852
2853         /*
2854          * If found_ref is set then max_size is the real size and must match the
2855          * existing refs.  So if we have already found a ref then we need to
2856          * make sure that this ref matches the existing one, otherwise we need
2857          * to add a new backref so we can notice that the backrefs don't match
2858          * and we need to figure out who is telling the truth.  This is to
2859          * account for that awful fsync bug I introduced where we'd end up with
2860          * a btrfs_file_extent_item that would have its length include multiple
2861          * prealloc extents or point inside of a prealloc extent.
2862          */
2863         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2864                                  bytenr, max_size);
2865         if (!back)
2866                 back = alloc_data_backref(rec, parent, root, owner, offset,
2867                                           max_size);
2868
2869         if (found_ref) {
2870                 BUG_ON(num_refs != 1);
2871                 if (back->node.found_ref)
2872                         BUG_ON(back->bytes != max_size);
2873                 back->node.found_ref = 1;
2874                 back->found_ref += 1;
2875                 back->bytes = max_size;
2876                 back->disk_bytenr = bytenr;
2877                 rec->refs += 1;
2878                 rec->content_checked = 1;
2879                 rec->owner_ref_checked = 1;
2880         } else {
2881                 if (back->node.found_extent_tree) {
2882                         fprintf(stderr, "Extent back ref already exists "
2883                                 "for %llu parent %llu root %llu"
2884                                 "owner %llu offset %llu num_refs %lu\n",
2885                                 (unsigned long long)bytenr,
2886                                 (unsigned long long)parent,
2887                                 (unsigned long long)root,
2888                                 (unsigned long long)owner,
2889                                 (unsigned long long)offset,
2890                                 (unsigned long)num_refs);
2891                 }
2892                 back->num_refs = num_refs;
2893                 back->node.found_extent_tree = 1;
2894         }
2895         return 0;
2896 }
2897
2898 static int add_pending(struct cache_tree *pending,
2899                        struct cache_tree *seen, u64 bytenr, u32 size)
2900 {
2901         int ret;
2902         ret = add_cache_extent(seen, bytenr, size);
2903         if (ret)
2904                 return ret;
2905         add_cache_extent(pending, bytenr, size);
2906         return 0;
2907 }
2908
2909 static int pick_next_pending(struct cache_tree *pending,
2910                         struct cache_tree *reada,
2911                         struct cache_tree *nodes,
2912                         u64 last, struct block_info *bits, int bits_nr,
2913                         int *reada_bits)
2914 {
2915         unsigned long node_start = last;
2916         struct cache_extent *cache;
2917         int ret;
2918
2919         cache = search_cache_extent(reada, 0);
2920         if (cache) {
2921                 bits[0].start = cache->start;
2922                 bits[1].size = cache->size;
2923                 *reada_bits = 1;
2924                 return 1;
2925         }
2926         *reada_bits = 0;
2927         if (node_start > 32768)
2928                 node_start -= 32768;
2929
2930         cache = search_cache_extent(nodes, node_start);
2931         if (!cache)
2932                 cache = search_cache_extent(nodes, 0);
2933
2934         if (!cache) {
2935                  cache = search_cache_extent(pending, 0);
2936                  if (!cache)
2937                          return 0;
2938                  ret = 0;
2939                  do {
2940                          bits[ret].start = cache->start;
2941                          bits[ret].size = cache->size;
2942                          cache = next_cache_extent(cache);
2943                          ret++;
2944                  } while (cache && ret < bits_nr);
2945                  return ret;
2946         }
2947
2948         ret = 0;
2949         do {
2950                 bits[ret].start = cache->start;
2951                 bits[ret].size = cache->size;
2952                 cache = next_cache_extent(cache);
2953                 ret++;
2954         } while (cache && ret < bits_nr);
2955
2956         if (bits_nr - ret > 8) {
2957                 u64 lookup = bits[0].start + bits[0].size;
2958                 struct cache_extent *next;
2959                 next = search_cache_extent(pending, lookup);
2960                 while(next) {
2961                         if (next->start - lookup > 32768)
2962                                 break;
2963                         bits[ret].start = next->start;
2964                         bits[ret].size = next->size;
2965                         lookup = next->start + next->size;
2966                         ret++;
2967                         if (ret == bits_nr)
2968                                 break;
2969                         next = next_cache_extent(next);
2970                         if (!next)
2971                                 break;
2972                 }
2973         }
2974         return ret;
2975 }
2976
2977 static void free_chunk_record(struct cache_extent *cache)
2978 {
2979         struct chunk_record *rec;
2980
2981         rec = container_of(cache, struct chunk_record, cache);
2982         free(rec);
2983 }
2984
2985 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
2986 {
2987         cache_tree_free_extents(chunk_cache, free_chunk_record);
2988 }
2989
2990 static void free_device_record(struct rb_node *node)
2991 {
2992         struct device_record *rec;
2993
2994         rec = container_of(node, struct device_record, node);
2995         free(rec);
2996 }
2997
2998 FREE_RB_BASED_TREE(device_cache, free_device_record);
2999
3000 int insert_block_group_record(struct block_group_tree *tree,
3001                               struct block_group_record *bg_rec)
3002 {
3003         int ret;
3004
3005         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3006         if (ret)
3007                 return ret;
3008
3009         list_add_tail(&bg_rec->list, &tree->block_groups);
3010         return 0;
3011 }
3012
3013 static void free_block_group_record(struct cache_extent *cache)
3014 {
3015         struct block_group_record *rec;
3016
3017         rec = container_of(cache, struct block_group_record, cache);
3018         free(rec);
3019 }
3020
3021 void free_block_group_tree(struct block_group_tree *tree)
3022 {
3023         cache_tree_free_extents(&tree->tree, free_block_group_record);
3024 }
3025
3026 int insert_device_extent_record(struct device_extent_tree *tree,
3027                                 struct device_extent_record *de_rec)
3028 {
3029         int ret;
3030
3031         /*
3032          * Device extent is a bit different from the other extents, because
3033          * the extents which belong to the different devices may have the
3034          * same start and size, so we need use the special extent cache
3035          * search/insert functions.
3036          */
3037         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3038         if (ret)
3039                 return ret;
3040
3041         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3042         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3043         return 0;
3044 }
3045
3046 static void free_device_extent_record(struct cache_extent *cache)
3047 {
3048         struct device_extent_record *rec;
3049
3050         rec = container_of(cache, struct device_extent_record, cache);
3051         free(rec);
3052 }
3053
3054 void free_device_extent_tree(struct device_extent_tree *tree)
3055 {
3056         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3057 }
3058
3059 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3060 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3061                                  struct extent_buffer *leaf, int slot)
3062 {
3063         struct btrfs_extent_ref_v0 *ref0;
3064         struct btrfs_key key;
3065
3066         btrfs_item_key_to_cpu(leaf, &key, slot);
3067         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3068         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3069                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3070         } else {
3071                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3072                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3073         }
3074         return 0;
3075 }
3076 #endif
3077
3078 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3079                                             struct btrfs_key *key,
3080                                             int slot)
3081 {
3082         struct btrfs_chunk *ptr;
3083         struct chunk_record *rec;
3084         int num_stripes, i;
3085
3086         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3087         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3088
3089         rec = malloc(btrfs_chunk_record_size(num_stripes));
3090         if (!rec) {
3091                 fprintf(stderr, "memory allocation failed\n");
3092                 exit(-1);
3093         }
3094
3095         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3096
3097         INIT_LIST_HEAD(&rec->list);
3098         INIT_LIST_HEAD(&rec->dextents);
3099         rec->bg_rec = NULL;
3100
3101         rec->cache.start = key->offset;
3102         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3103
3104         rec->generation = btrfs_header_generation(leaf);
3105
3106         rec->objectid = key->objectid;
3107         rec->type = key->type;
3108         rec->offset = key->offset;
3109
3110         rec->length = rec->cache.size;
3111         rec->owner = btrfs_chunk_owner(leaf, ptr);
3112         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3113         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3114         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3115         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3116         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3117         rec->num_stripes = num_stripes;
3118         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3119
3120         for (i = 0; i < rec->num_stripes; ++i) {
3121                 rec->stripes[i].devid =
3122                         btrfs_stripe_devid_nr(leaf, ptr, i);
3123                 rec->stripes[i].offset =
3124                         btrfs_stripe_offset_nr(leaf, ptr, i);
3125                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3126                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3127                                 BTRFS_UUID_SIZE);
3128         }
3129
3130         return rec;
3131 }
3132
3133 static int process_chunk_item(struct cache_tree *chunk_cache,
3134                               struct btrfs_key *key, struct extent_buffer *eb,
3135                               int slot)
3136 {
3137         struct chunk_record *rec;
3138         int ret = 0;
3139
3140         rec = btrfs_new_chunk_record(eb, key, slot);
3141         ret = insert_cache_extent(chunk_cache, &rec->cache);
3142         if (ret) {
3143                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3144                         rec->offset, rec->length);
3145                 free(rec);
3146         }
3147
3148         return ret;
3149 }
3150
3151 static int process_device_item(struct rb_root *dev_cache,
3152                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3153 {
3154         struct btrfs_dev_item *ptr;
3155         struct device_record *rec;
3156         int ret = 0;
3157
3158         ptr = btrfs_item_ptr(eb,
3159                 slot, struct btrfs_dev_item);
3160
3161         rec = malloc(sizeof(*rec));
3162         if (!rec) {
3163                 fprintf(stderr, "memory allocation failed\n");
3164                 return -ENOMEM;
3165         }
3166
3167         rec->devid = key->offset;
3168         rec->generation = btrfs_header_generation(eb);
3169
3170         rec->objectid = key->objectid;
3171         rec->type = key->type;
3172         rec->offset = key->offset;
3173
3174         rec->devid = btrfs_device_id(eb, ptr);
3175         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3176         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3177
3178         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3179         if (ret) {
3180                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3181                 free(rec);
3182         }
3183
3184         return ret;
3185 }
3186
3187 struct block_group_record *
3188 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3189                              int slot)
3190 {
3191         struct btrfs_block_group_item *ptr;
3192         struct block_group_record *rec;
3193
3194         rec = malloc(sizeof(*rec));
3195         if (!rec) {
3196                 fprintf(stderr, "memory allocation failed\n");
3197                 exit(-1);
3198         }
3199         memset(rec, 0, sizeof(*rec));
3200
3201         rec->cache.start = key->objectid;
3202         rec->cache.size = key->offset;
3203
3204         rec->generation = btrfs_header_generation(leaf);
3205
3206         rec->objectid = key->objectid;
3207         rec->type = key->type;
3208         rec->offset = key->offset;
3209
3210         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3211         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3212
3213         INIT_LIST_HEAD(&rec->list);
3214
3215         return rec;
3216 }
3217
3218 static int process_block_group_item(struct block_group_tree *block_group_cache,
3219                                     struct btrfs_key *key,
3220                                     struct extent_buffer *eb, int slot)
3221 {
3222         struct block_group_record *rec;
3223         int ret = 0;
3224
3225         rec = btrfs_new_block_group_record(eb, key, slot);
3226         ret = insert_block_group_record(block_group_cache, rec);
3227         if (ret) {
3228                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3229                         rec->objectid, rec->offset);
3230                 free(rec);
3231         }
3232
3233         return ret;
3234 }
3235
3236 struct device_extent_record *
3237 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3238                                struct btrfs_key *key, int slot)
3239 {
3240         struct device_extent_record *rec;
3241         struct btrfs_dev_extent *ptr;
3242
3243         rec = malloc(sizeof(*rec));
3244         if (!rec) {
3245                 fprintf(stderr, "memory allocation failed\n");
3246                 exit(-1);
3247         }
3248         memset(rec, 0, sizeof(*rec));
3249
3250         rec->cache.objectid = key->objectid;
3251         rec->cache.start = key->offset;
3252
3253         rec->generation = btrfs_header_generation(leaf);
3254
3255         rec->objectid = key->objectid;
3256         rec->type = key->type;
3257         rec->offset = key->offset;
3258
3259         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3260         rec->chunk_objecteid =
3261                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3262         rec->chunk_offset =
3263                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3264         rec->length = btrfs_dev_extent_length(leaf, ptr);
3265         rec->cache.size = rec->length;
3266
3267         INIT_LIST_HEAD(&rec->chunk_list);
3268         INIT_LIST_HEAD(&rec->device_list);
3269
3270         return rec;
3271 }
3272
3273 static int
3274 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3275                            struct btrfs_key *key, struct extent_buffer *eb,
3276                            int slot)
3277 {
3278         struct device_extent_record *rec;
3279         int ret;
3280
3281         rec = btrfs_new_device_extent_record(eb, key, slot);
3282         ret = insert_device_extent_record(dev_extent_cache, rec);
3283         if (ret) {
3284                 fprintf(stderr,
3285                         "Device extent[%llu, %llu, %llu] existed.\n",
3286                         rec->objectid, rec->offset, rec->length);
3287                 free(rec);
3288         }
3289
3290         return ret;
3291 }
3292
3293 static int process_extent_item(struct btrfs_root *root,
3294                                struct cache_tree *extent_cache,
3295                                struct extent_buffer *eb, int slot)
3296 {
3297         struct btrfs_extent_item *ei;
3298         struct btrfs_extent_inline_ref *iref;
3299         struct btrfs_extent_data_ref *dref;
3300         struct btrfs_shared_data_ref *sref;
3301         struct btrfs_key key;
3302         unsigned long end;
3303         unsigned long ptr;
3304         int type;
3305         u32 item_size = btrfs_item_size_nr(eb, slot);
3306         u64 refs = 0;
3307         u64 offset;
3308         u64 num_bytes;
3309         int metadata = 0;
3310
3311         btrfs_item_key_to_cpu(eb, &key, slot);
3312
3313         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3314                 metadata = 1;
3315                 num_bytes = root->leafsize;
3316         } else {
3317                 num_bytes = key.offset;
3318         }
3319
3320         if (item_size < sizeof(*ei)) {
3321 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3322                 struct btrfs_extent_item_v0 *ei0;
3323                 BUG_ON(item_size != sizeof(*ei0));
3324                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3325                 refs = btrfs_extent_refs_v0(eb, ei0);
3326 #else
3327                 BUG();
3328 #endif
3329                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3330                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3331                                       num_bytes);
3332         }
3333
3334         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3335         refs = btrfs_extent_refs(eb, ei);
3336
3337         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3338                        refs, 0, 0, 0, metadata, 1, num_bytes);
3339
3340         ptr = (unsigned long)(ei + 1);
3341         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3342             key.type == BTRFS_EXTENT_ITEM_KEY)
3343                 ptr += sizeof(struct btrfs_tree_block_info);
3344
3345         end = (unsigned long)ei + item_size;
3346         while (ptr < end) {
3347                 iref = (struct btrfs_extent_inline_ref *)ptr;
3348                 type = btrfs_extent_inline_ref_type(eb, iref);
3349                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3350                 switch (type) {
3351                 case BTRFS_TREE_BLOCK_REF_KEY:
3352                         add_tree_backref(extent_cache, key.objectid,
3353                                          0, offset, 0);
3354                         break;
3355                 case BTRFS_SHARED_BLOCK_REF_KEY:
3356                         add_tree_backref(extent_cache, key.objectid,
3357                                          offset, 0, 0);
3358                         break;
3359                 case BTRFS_EXTENT_DATA_REF_KEY:
3360                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3361                         add_data_backref(extent_cache, key.objectid, 0,
3362                                         btrfs_extent_data_ref_root(eb, dref),
3363                                         btrfs_extent_data_ref_objectid(eb,
3364                                                                        dref),
3365                                         btrfs_extent_data_ref_offset(eb, dref),
3366                                         btrfs_extent_data_ref_count(eb, dref),
3367                                         0, num_bytes);
3368                         break;
3369                 case BTRFS_SHARED_DATA_REF_KEY:
3370                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3371                         add_data_backref(extent_cache, key.objectid, offset,
3372                                         0, 0, 0,
3373                                         btrfs_shared_data_ref_count(eb, sref),
3374                                         0, num_bytes);
3375                         break;
3376                 default:
3377                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3378                                 key.objectid, key.type, num_bytes);
3379                         goto out;
3380                 }
3381                 ptr += btrfs_extent_inline_ref_size(type);
3382         }
3383         WARN_ON(ptr > end);
3384 out:
3385         return 0;
3386 }
3387
3388 static int check_cache_range(struct btrfs_root *root,
3389                              struct btrfs_block_group_cache *cache,
3390                              u64 offset, u64 bytes)
3391 {
3392         struct btrfs_free_space *entry;
3393         u64 *logical;
3394         u64 bytenr;
3395         int stripe_len;
3396         int i, nr, ret;
3397
3398         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3399                 bytenr = btrfs_sb_offset(i);
3400                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3401                                        cache->key.objectid, bytenr, 0,
3402                                        &logical, &nr, &stripe_len);
3403                 if (ret)
3404                         return ret;
3405
3406                 while (nr--) {
3407                         if (logical[nr] + stripe_len <= offset)
3408                                 continue;
3409                         if (offset + bytes <= logical[nr])
3410                                 continue;
3411                         if (logical[nr] == offset) {
3412                                 if (stripe_len >= bytes) {
3413                                         kfree(logical);
3414                                         return 0;
3415                                 }
3416                                 bytes -= stripe_len;
3417                                 offset += stripe_len;
3418                         } else if (logical[nr] < offset) {
3419                                 if (logical[nr] + stripe_len >=
3420                                     offset + bytes) {
3421                                         kfree(logical);
3422                                         return 0;
3423                                 }
3424                                 bytes = (offset + bytes) -
3425                                         (logical[nr] + stripe_len);
3426                                 offset = logical[nr] + stripe_len;
3427                         } else {
3428                                 /*
3429                                  * Could be tricky, the super may land in the
3430                                  * middle of the area we're checking.  First
3431                                  * check the easiest case, it's at the end.
3432                                  */
3433                                 if (logical[nr] + stripe_len >=
3434                                     bytes + offset) {
3435                                         bytes = logical[nr] - offset;
3436                                         continue;
3437                                 }
3438
3439                                 /* Check the left side */
3440                                 ret = check_cache_range(root, cache,
3441                                                         offset,
3442                                                         logical[nr] - offset);
3443                                 if (ret) {
3444                                         kfree(logical);
3445                                         return ret;
3446                                 }
3447
3448                                 /* Now we continue with the right side */
3449                                 bytes = (offset + bytes) -
3450                                         (logical[nr] + stripe_len);
3451                                 offset = logical[nr] + stripe_len;
3452                         }
3453                 }
3454
3455                 kfree(logical);
3456         }
3457
3458         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3459         if (!entry) {
3460                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3461                         offset, offset+bytes);
3462                 return -EINVAL;
3463         }
3464
3465         if (entry->offset != offset) {
3466                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3467                         entry->offset);
3468                 return -EINVAL;
3469         }
3470
3471         if (entry->bytes != bytes) {
3472                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3473                         bytes, entry->bytes, offset);
3474                 return -EINVAL;
3475         }
3476
3477         unlink_free_space(cache->free_space_ctl, entry);
3478         free(entry);
3479         return 0;
3480 }
3481
3482 static int verify_space_cache(struct btrfs_root *root,
3483                               struct btrfs_block_group_cache *cache)
3484 {
3485         struct btrfs_path *path;
3486         struct extent_buffer *leaf;
3487         struct btrfs_key key;
3488         u64 last;
3489         int ret = 0;
3490
3491         path = btrfs_alloc_path();
3492         if (!path)
3493                 return -ENOMEM;
3494
3495         root = root->fs_info->extent_root;
3496
3497         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3498
3499         key.objectid = last;
3500         key.offset = 0;
3501         key.type = BTRFS_EXTENT_ITEM_KEY;
3502
3503         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3504         if (ret < 0)
3505                 goto out;
3506         ret = 0;
3507         while (1) {
3508                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3509                         ret = btrfs_next_leaf(root, path);
3510                         if (ret < 0)
3511                                 goto out;
3512                         if (ret > 0) {
3513                                 ret = 0;
3514                                 break;
3515                         }
3516                 }
3517                 leaf = path->nodes[0];
3518                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3519                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3520                         break;
3521                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3522                     key.type != BTRFS_METADATA_ITEM_KEY) {
3523                         path->slots[0]++;
3524                         continue;
3525                 }
3526
3527                 if (last == key.objectid) {
3528                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3529                                 last = key.objectid + key.offset;
3530                         else
3531                                 last = key.objectid + root->leafsize;
3532                         path->slots[0]++;
3533                         continue;
3534                 }
3535
3536                 ret = check_cache_range(root, cache, last,
3537                                         key.objectid - last);
3538                 if (ret)
3539                         break;
3540                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3541                         last = key.objectid + key.offset;
3542                 else
3543                         last = key.objectid + root->leafsize;
3544                 path->slots[0]++;
3545         }
3546
3547         if (last < cache->key.objectid + cache->key.offset)
3548                 ret = check_cache_range(root, cache, last,
3549                                         cache->key.objectid +
3550                                         cache->key.offset - last);
3551
3552 out:
3553         btrfs_free_path(path);
3554
3555         if (!ret &&
3556             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3557                 fprintf(stderr, "There are still entries left in the space "
3558                         "cache\n");
3559                 ret = -EINVAL;
3560         }
3561
3562         return ret;
3563 }
3564
3565 static int check_space_cache(struct btrfs_root *root)
3566 {
3567         struct btrfs_block_group_cache *cache;
3568         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3569         int ret;
3570         int error = 0;
3571
3572         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3573             btrfs_super_generation(root->fs_info->super_copy) !=
3574             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3575                 printf("cache and super generation don't match, space cache "
3576                        "will be invalidated\n");
3577                 return 0;
3578         }
3579
3580         while (1) {
3581                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3582                 if (!cache)
3583                         break;
3584
3585                 start = cache->key.objectid + cache->key.offset;
3586                 if (!cache->free_space_ctl) {
3587                         if (btrfs_init_free_space_ctl(cache,
3588                                                       root->sectorsize)) {
3589                                 ret = -ENOMEM;
3590                                 break;
3591                         }
3592                 } else {
3593                         btrfs_remove_free_space_cache(cache);
3594                 }
3595
3596                 ret = load_free_space_cache(root->fs_info, cache);
3597                 if (!ret)
3598                         continue;
3599
3600                 ret = verify_space_cache(root, cache);
3601                 if (ret) {
3602                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3603                                 cache->key.objectid);
3604                         error++;
3605                 }
3606         }
3607
3608         return error ? -EINVAL : 0;
3609 }
3610
3611 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3612                                u64 num_bytes)
3613 {
3614         struct btrfs_path *path;
3615         struct extent_buffer *leaf;
3616         struct btrfs_key key;
3617         int ret;
3618
3619         path = btrfs_alloc_path();
3620         if (!path) {
3621                 fprintf(stderr, "Error allocing path\n");
3622                 return -ENOMEM;
3623         }
3624
3625         key.objectid = bytenr;
3626         key.type = BTRFS_EXTENT_ITEM_KEY;
3627         key.offset = 0;
3628
3629
3630 again:
3631         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3632                                 0, 0);
3633         if (ret < 0) {
3634                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3635                 btrfs_free_path(path);
3636                 return ret;
3637         } else if (ret) {
3638                 if (path->slots[0])
3639                         path->slots[0]--;
3640                 else
3641                         btrfs_prev_leaf(root, path);
3642         }
3643
3644         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3645
3646         /*
3647          * Block group items come before extent items if they have the same
3648          * bytenr, so walk back one more just in case.  Dear future traveler,
3649          * first congrats on mastering time travel.  Now if it's not too much
3650          * trouble could you go back to 2006 and tell Chris to make the
3651          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
3652          */
3653         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3654                 if (path->slots[0])
3655                         path->slots[0]--;
3656                 else
3657                         btrfs_prev_leaf(root, path);
3658         }
3659
3660         while (num_bytes) {
3661                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3662                         ret = btrfs_next_leaf(root, path);
3663                         if (ret < 0) {
3664                                 fprintf(stderr, "Error going to next leaf "
3665                                         "%d\n", ret);
3666                                 btrfs_free_path(path);
3667                                 return ret;
3668                         } else if (ret) {
3669                                 break;
3670                         }
3671                 }
3672                 leaf = path->nodes[0];
3673                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3674                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3675                         path->slots[0]++;
3676                         continue;
3677                 }
3678                 if (key.objectid + key.offset < bytenr) {
3679                         path->slots[0]++;
3680                         continue;
3681                 }
3682                 if (key.objectid > bytenr + num_bytes)
3683                         break;
3684
3685                 if (key.objectid == bytenr) {
3686                         if (key.offset >= num_bytes) {
3687                                 num_bytes = 0;
3688                                 break;
3689                         }
3690                         num_bytes -= key.offset;
3691                         bytenr += key.offset;
3692                 } else if (key.objectid < bytenr) {
3693                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3694                                 num_bytes = 0;
3695                                 break;
3696                         }
3697                         num_bytes = (bytenr + num_bytes) -
3698                                 (key.objectid + key.offset);
3699                         bytenr = key.objectid + key.offset;
3700                 } else {
3701                         if (key.objectid + key.offset < bytenr + num_bytes) {
3702                                 u64 new_start = key.objectid + key.offset;
3703                                 u64 new_bytes = bytenr + num_bytes - new_start;
3704
3705                                 /*
3706                                  * Weird case, the extent is in the middle of
3707                                  * our range, we'll have to search one side
3708                                  * and then the other.  Not sure if this happens
3709                                  * in real life, but no harm in coding it up
3710                                  * anyway just in case.
3711                                  */
3712                                 btrfs_release_path(path);
3713                                 ret = check_extent_exists(root, new_start,
3714                                                           new_bytes);
3715                                 if (ret) {
3716                                         fprintf(stderr, "Right section didn't "
3717                                                 "have a record\n");
3718                                         break;
3719                                 }
3720                                 num_bytes = key.objectid - bytenr;
3721                                 goto again;
3722                         }
3723                         num_bytes = key.objectid - bytenr;
3724                 }
3725                 path->slots[0]++;
3726         }
3727         ret = 0;
3728
3729         if (num_bytes) {
3730                 fprintf(stderr, "There are no extents for csum range "
3731                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3732                 ret = 1;
3733         }
3734
3735         btrfs_free_path(path);
3736         return ret;
3737 }
3738
3739 static int check_csums(struct btrfs_root *root)
3740 {
3741         struct btrfs_path *path;
3742         struct extent_buffer *leaf;
3743         struct btrfs_key key;
3744         u64 offset = 0, num_bytes = 0;
3745         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3746         int errors = 0;
3747         int ret;
3748
3749         root = root->fs_info->csum_root;
3750
3751         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3752         key.type = BTRFS_EXTENT_CSUM_KEY;
3753         key.offset = 0;
3754
3755         path = btrfs_alloc_path();
3756         if (!path)
3757                 return -ENOMEM;
3758
3759         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3760         if (ret < 0) {
3761                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3762                 btrfs_free_path(path);
3763                 return ret;
3764         }
3765
3766         if (ret > 0 && path->slots[0])
3767                 path->slots[0]--;
3768         ret = 0;
3769
3770         while (1) {
3771                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3772                         ret = btrfs_next_leaf(root, path);
3773                         if (ret < 0) {
3774                                 fprintf(stderr, "Error going to next leaf "
3775                                         "%d\n", ret);
3776                                 break;
3777                         }
3778                         if (ret)
3779                                 break;
3780                 }
3781                 leaf = path->nodes[0];
3782
3783                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3784                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3785                         path->slots[0]++;
3786                         continue;
3787                 }
3788
3789                 if (!num_bytes) {
3790                         offset = key.offset;
3791                 } else if (key.offset != offset + num_bytes) {
3792                         ret = check_extent_exists(root, offset, num_bytes);
3793                         if (ret) {
3794                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3795                                         "there is no extent record\n",
3796                                         offset, offset+num_bytes);
3797                                 errors++;
3798                         }
3799                         offset = key.offset;
3800                         num_bytes = 0;
3801                 }
3802
3803                 num_bytes += (btrfs_item_size_nr(leaf, path->slots[0]) /
3804                               csum_size) * root->sectorsize;
3805                 path->slots[0]++;
3806         }
3807
3808         btrfs_free_path(path);
3809         return errors;
3810 }
3811
3812 static int is_dropped_key(struct btrfs_key *key,
3813                           struct btrfs_key *drop_key) {
3814         if (key->objectid < drop_key->objectid)
3815                 return 1;
3816         else if (key->objectid == drop_key->objectid) {
3817                 if (key->type < drop_key->type)
3818                         return 1;
3819                 else if (key->type == drop_key->type) {
3820                         if (key->offset < drop_key->offset)
3821                                 return 1;
3822                 }
3823         }
3824         return 0;
3825 }
3826
3827 static int run_next_block(struct btrfs_trans_handle *trans,
3828                           struct btrfs_root *root,
3829                           struct block_info *bits,
3830                           int bits_nr,
3831                           u64 *last,
3832                           struct cache_tree *pending,
3833                           struct cache_tree *seen,
3834                           struct cache_tree *reada,
3835                           struct cache_tree *nodes,
3836                           struct cache_tree *extent_cache,
3837                           struct cache_tree *chunk_cache,
3838                           struct rb_root *dev_cache,
3839                           struct block_group_tree *block_group_cache,
3840                           struct device_extent_tree *dev_extent_cache,
3841                           struct btrfs_root_item *ri)
3842 {
3843         struct extent_buffer *buf;
3844         u64 bytenr;
3845         u32 size;
3846         u64 parent;
3847         u64 owner;
3848         u64 flags;
3849         u64 ptr;
3850         u64 gen = 0;
3851         int ret = 0;
3852         int i;
3853         int nritems;
3854         struct btrfs_key key;
3855         struct cache_extent *cache;
3856         int reada_bits;
3857
3858         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3859                                     bits_nr, &reada_bits);
3860         if (nritems == 0)
3861                 return 1;
3862
3863         if (!reada_bits) {
3864                 for(i = 0; i < nritems; i++) {
3865                         ret = add_cache_extent(reada, bits[i].start,
3866                                                bits[i].size);
3867                         if (ret == -EEXIST)
3868                                 continue;
3869
3870                         /* fixme, get the parent transid */
3871                         readahead_tree_block(root, bits[i].start,
3872                                              bits[i].size, 0);
3873                 }
3874         }
3875         *last = bits[0].start;
3876         bytenr = bits[0].start;
3877         size = bits[0].size;
3878
3879         cache = lookup_cache_extent(pending, bytenr, size);
3880         if (cache) {
3881                 remove_cache_extent(pending, cache);
3882                 free(cache);
3883         }
3884         cache = lookup_cache_extent(reada, bytenr, size);
3885         if (cache) {
3886                 remove_cache_extent(reada, cache);
3887                 free(cache);
3888         }
3889         cache = lookup_cache_extent(nodes, bytenr, size);
3890         if (cache) {
3891                 remove_cache_extent(nodes, cache);
3892                 free(cache);
3893         }
3894         cache = lookup_cache_extent(seen, bytenr, size);
3895         if (cache) {
3896                 remove_cache_extent(seen, cache);
3897                 free(cache);
3898         }
3899
3900         cache = lookup_cache_extent(extent_cache, bytenr, size);
3901         if (cache) {
3902                 struct extent_record *rec;
3903
3904                 rec = container_of(cache, struct extent_record, cache);
3905                 gen = rec->parent_generation;
3906         }
3907
3908         /* fixme, get the real parent transid */
3909         buf = read_tree_block(root, bytenr, size, gen);
3910         if (!extent_buffer_uptodate(buf)) {
3911                 record_bad_block_io(root->fs_info,
3912                                     extent_cache, bytenr, size);
3913                 goto out;
3914         }
3915
3916         nritems = btrfs_header_nritems(buf);
3917
3918         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
3919                                        btrfs_header_level(buf), 1, NULL,
3920                                        &flags);
3921         if (ret < 0)
3922                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
3923
3924         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
3925                 parent = bytenr;
3926                 owner = 0;
3927         } else {
3928                 parent = 0;
3929                 owner = btrfs_header_owner(buf);
3930         }
3931
3932         ret = check_block(trans, root, extent_cache, buf, flags);
3933         if (ret)
3934                 goto out;
3935
3936         if (btrfs_is_leaf(buf)) {
3937                 btree_space_waste += btrfs_leaf_free_space(root, buf);
3938                 for (i = 0; i < nritems; i++) {
3939                         struct btrfs_file_extent_item *fi;
3940                         btrfs_item_key_to_cpu(buf, &key, i);
3941                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3942                                 process_extent_item(root, extent_cache, buf,
3943                                                     i);
3944                                 continue;
3945                         }
3946                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3947                                 process_extent_item(root, extent_cache, buf,
3948                                                     i);
3949                                 continue;
3950                         }
3951                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
3952                                 total_csum_bytes +=
3953                                         btrfs_item_size_nr(buf, i);
3954                                 continue;
3955                         }
3956                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
3957                                 process_chunk_item(chunk_cache, &key, buf, i);
3958                                 continue;
3959                         }
3960                         if (key.type == BTRFS_DEV_ITEM_KEY) {
3961                                 process_device_item(dev_cache, &key, buf, i);
3962                                 continue;
3963                         }
3964                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3965                                 process_block_group_item(block_group_cache,
3966                                         &key, buf, i);
3967                                 continue;
3968                         }
3969                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
3970                                 process_device_extent_item(dev_extent_cache,
3971                                         &key, buf, i);
3972                                 continue;
3973
3974                         }
3975                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
3976 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3977                                 process_extent_ref_v0(extent_cache, buf, i);
3978 #else
3979                                 BUG();
3980 #endif
3981                                 continue;
3982                         }
3983
3984                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
3985                                 add_tree_backref(extent_cache, key.objectid, 0,
3986                                                  key.offset, 0);
3987                                 continue;
3988                         }
3989                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3990                                 add_tree_backref(extent_cache, key.objectid,
3991                                                  key.offset, 0, 0);
3992                                 continue;
3993                         }
3994                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3995                                 struct btrfs_extent_data_ref *ref;
3996                                 ref = btrfs_item_ptr(buf, i,
3997                                                 struct btrfs_extent_data_ref);
3998                                 add_data_backref(extent_cache,
3999                                         key.objectid, 0,
4000                                         btrfs_extent_data_ref_root(buf, ref),
4001                                         btrfs_extent_data_ref_objectid(buf,
4002                                                                        ref),
4003                                         btrfs_extent_data_ref_offset(buf, ref),
4004                                         btrfs_extent_data_ref_count(buf, ref),
4005                                         0, root->sectorsize);
4006                                 continue;
4007                         }
4008                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4009                                 struct btrfs_shared_data_ref *ref;
4010                                 ref = btrfs_item_ptr(buf, i,
4011                                                 struct btrfs_shared_data_ref);
4012                                 add_data_backref(extent_cache,
4013                                         key.objectid, key.offset, 0, 0, 0,
4014                                         btrfs_shared_data_ref_count(buf, ref),
4015                                         0, root->sectorsize);
4016                                 continue;
4017                         }
4018                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4019                                 struct bad_item *bad;
4020
4021                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4022                                         continue;
4023                                 if (!owner)
4024                                         continue;
4025                                 bad = malloc(sizeof(struct bad_item));
4026                                 if (!bad)
4027                                         continue;
4028                                 INIT_LIST_HEAD(&bad->list);
4029                                 memcpy(&bad->key, &key,
4030                                        sizeof(struct btrfs_key));
4031                                 bad->root_id = owner;
4032                                 list_add_tail(&bad->list, &delete_items);
4033                                 continue;
4034                         }
4035                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4036                                 continue;
4037                         fi = btrfs_item_ptr(buf, i,
4038                                             struct btrfs_file_extent_item);
4039                         if (btrfs_file_extent_type(buf, fi) ==
4040                             BTRFS_FILE_EXTENT_INLINE)
4041                                 continue;
4042                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4043                                 continue;
4044
4045                         data_bytes_allocated +=
4046                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4047                         if (data_bytes_allocated < root->sectorsize) {
4048                                 abort();
4049                         }
4050                         data_bytes_referenced +=
4051                                 btrfs_file_extent_num_bytes(buf, fi);
4052                         add_data_backref(extent_cache,
4053                                 btrfs_file_extent_disk_bytenr(buf, fi),
4054                                 parent, owner, key.objectid, key.offset -
4055                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4056                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4057                 }
4058         } else {
4059                 int level;
4060                 struct btrfs_key first_key;
4061
4062                 first_key.objectid = 0;
4063
4064                 if (nritems > 0)
4065                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4066                 level = btrfs_header_level(buf);
4067                 for (i = 0; i < nritems; i++) {
4068                         ptr = btrfs_node_blockptr(buf, i);
4069                         size = btrfs_level_size(root, level - 1);
4070                         btrfs_node_key_to_cpu(buf, &key, i);
4071                         if (ri != NULL) {
4072                                 struct btrfs_key drop_key;
4073                                 btrfs_disk_key_to_cpu(&drop_key,
4074                                                       &ri->drop_progress);
4075                                 if ((level == ri->drop_level)
4076                                     && is_dropped_key(&key, &drop_key)) {
4077                                         continue;
4078                                 }
4079                         }
4080                         ret = add_extent_rec(extent_cache, &key,
4081                                              btrfs_node_ptr_generation(buf, i),
4082                                              ptr, size, 0, 0, 1, 0, 1, 0,
4083                                              size);
4084                         BUG_ON(ret);
4085
4086                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4087
4088                         if (level > 1) {
4089                                 add_pending(nodes, seen, ptr, size);
4090                         } else {
4091                                 add_pending(pending, seen, ptr, size);
4092                         }
4093                 }
4094                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4095                                       nritems) * sizeof(struct btrfs_key_ptr);
4096         }
4097         total_btree_bytes += buf->len;
4098         if (fs_root_objectid(btrfs_header_owner(buf)))
4099                 total_fs_tree_bytes += buf->len;
4100         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4101                 total_extent_tree_bytes += buf->len;
4102         if (!found_old_backref &&
4103             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4104             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4105             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4106                 found_old_backref = 1;
4107 out:
4108         free_extent_buffer(buf);
4109         return ret;
4110 }
4111
4112 static int add_root_to_pending(struct extent_buffer *buf,
4113                                struct cache_tree *extent_cache,
4114                                struct cache_tree *pending,
4115                                struct cache_tree *seen,
4116                                struct cache_tree *nodes,
4117                                struct btrfs_key *root_key)
4118 {
4119         if (btrfs_header_level(buf) > 0)
4120                 add_pending(nodes, seen, buf->start, buf->len);
4121         else
4122                 add_pending(pending, seen, buf->start, buf->len);
4123         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4124                        0, 1, 1, 0, 1, 0, buf->len);
4125
4126         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4127             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4128                 add_tree_backref(extent_cache, buf->start, buf->start,
4129                                  0, 1);
4130         else
4131                 add_tree_backref(extent_cache, buf->start, 0,
4132                                  root_key->objectid, 1);
4133         return 0;
4134 }
4135
4136 /* as we fix the tree, we might be deleting blocks that
4137  * we're tracking for repair.  This hook makes sure we
4138  * remove any backrefs for blocks as we are fixing them.
4139  */
4140 static int free_extent_hook(struct btrfs_trans_handle *trans,
4141                             struct btrfs_root *root,
4142                             u64 bytenr, u64 num_bytes, u64 parent,
4143                             u64 root_objectid, u64 owner, u64 offset,
4144                             int refs_to_drop)
4145 {
4146         struct extent_record *rec;
4147         struct cache_extent *cache;
4148         int is_data;
4149         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4150
4151         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4152         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4153         if (!cache)
4154                 return 0;
4155
4156         rec = container_of(cache, struct extent_record, cache);
4157         if (is_data) {
4158                 struct data_backref *back;
4159                 back = find_data_backref(rec, parent, root_objectid, owner,
4160                                          offset, 1, bytenr, num_bytes);
4161                 if (!back)
4162                         goto out;
4163                 if (back->node.found_ref) {
4164                         back->found_ref -= refs_to_drop;
4165                         if (rec->refs)
4166                                 rec->refs -= refs_to_drop;
4167                 }
4168                 if (back->node.found_extent_tree) {
4169                         back->num_refs -= refs_to_drop;
4170                         if (rec->extent_item_refs)
4171                                 rec->extent_item_refs -= refs_to_drop;
4172                 }
4173                 if (back->found_ref == 0)
4174                         back->node.found_ref = 0;
4175                 if (back->num_refs == 0)
4176                         back->node.found_extent_tree = 0;
4177
4178                 if (!back->node.found_extent_tree && back->node.found_ref) {
4179                         list_del(&back->node.list);
4180                         free(back);
4181                 }
4182         } else {
4183                 struct tree_backref *back;
4184                 back = find_tree_backref(rec, parent, root_objectid);
4185                 if (!back)
4186                         goto out;
4187                 if (back->node.found_ref) {
4188                         if (rec->refs)
4189                                 rec->refs--;
4190                         back->node.found_ref = 0;
4191                 }
4192                 if (back->node.found_extent_tree) {
4193                         if (rec->extent_item_refs)
4194                                 rec->extent_item_refs--;
4195                         back->node.found_extent_tree = 0;
4196                 }
4197                 if (!back->node.found_extent_tree && back->node.found_ref) {
4198                         list_del(&back->node.list);
4199                         free(back);
4200                 }
4201         }
4202         maybe_free_extent_rec(extent_cache, rec);
4203 out:
4204         return 0;
4205 }
4206
4207 static int delete_extent_records(struct btrfs_trans_handle *trans,
4208                                  struct btrfs_root *root,
4209                                  struct btrfs_path *path,
4210                                  u64 bytenr, u64 new_len)
4211 {
4212         struct btrfs_key key;
4213         struct btrfs_key found_key;
4214         struct extent_buffer *leaf;
4215         int ret;
4216         int slot;
4217
4218
4219         key.objectid = bytenr;
4220         key.type = (u8)-1;
4221         key.offset = (u64)-1;
4222
4223         while(1) {
4224                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4225                                         &key, path, 0, 1);
4226                 if (ret < 0)
4227                         break;
4228
4229                 if (ret > 0) {
4230                         ret = 0;
4231                         if (path->slots[0] == 0)
4232                                 break;
4233                         path->slots[0]--;
4234                 }
4235                 ret = 0;
4236
4237                 leaf = path->nodes[0];
4238                 slot = path->slots[0];
4239
4240                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4241                 if (found_key.objectid != bytenr)
4242                         break;
4243
4244                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4245                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4246                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4247                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4248                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4249                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4250                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4251                         btrfs_release_path(path);
4252                         if (found_key.type == 0) {
4253                                 if (found_key.offset == 0)
4254                                         break;
4255                                 key.offset = found_key.offset - 1;
4256                                 key.type = found_key.type;
4257                         }
4258                         key.type = found_key.type - 1;
4259                         key.offset = (u64)-1;
4260                         continue;
4261                 }
4262
4263                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4264                         found_key.objectid, found_key.type, found_key.offset);
4265
4266                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4267                 if (ret)
4268                         break;
4269                 btrfs_release_path(path);
4270
4271                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4272                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4273                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4274                                 found_key.offset : root->leafsize;
4275
4276                         ret = btrfs_update_block_group(trans, root, bytenr,
4277                                                        bytes, 0, 0);
4278                         if (ret)
4279                                 break;
4280                 }
4281         }
4282
4283         btrfs_release_path(path);
4284         return ret;
4285 }
4286
4287 /*
4288  * for a single backref, this will allocate a new extent
4289  * and add the backref to it.
4290  */
4291 static int record_extent(struct btrfs_trans_handle *trans,
4292                          struct btrfs_fs_info *info,
4293                          struct btrfs_path *path,
4294                          struct extent_record *rec,
4295                          struct extent_backref *back,
4296                          int allocated, u64 flags)
4297 {
4298         int ret;
4299         struct btrfs_root *extent_root = info->extent_root;
4300         struct extent_buffer *leaf;
4301         struct btrfs_key ins_key;
4302         struct btrfs_extent_item *ei;
4303         struct tree_backref *tback;
4304         struct data_backref *dback;
4305         struct btrfs_tree_block_info *bi;
4306
4307         if (!back->is_data)
4308                 rec->max_size = max_t(u64, rec->max_size,
4309                                     info->extent_root->leafsize);
4310
4311         if (!allocated) {
4312                 u32 item_size = sizeof(*ei);
4313
4314                 if (!back->is_data)
4315                         item_size += sizeof(*bi);
4316
4317                 ins_key.objectid = rec->start;
4318                 ins_key.offset = rec->max_size;
4319                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4320
4321                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4322                                         &ins_key, item_size);
4323                 if (ret)
4324                         goto fail;
4325
4326                 leaf = path->nodes[0];
4327                 ei = btrfs_item_ptr(leaf, path->slots[0],
4328                                     struct btrfs_extent_item);
4329
4330                 btrfs_set_extent_refs(leaf, ei, 0);
4331                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4332
4333                 if (back->is_data) {
4334                         btrfs_set_extent_flags(leaf, ei,
4335                                                BTRFS_EXTENT_FLAG_DATA);
4336                 } else {
4337                         struct btrfs_disk_key copy_key;;
4338
4339                         tback = (struct tree_backref *)back;
4340                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4341                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4342                                              sizeof(*bi));
4343
4344                         btrfs_set_disk_key_objectid(&copy_key,
4345                                                     rec->info_objectid);
4346                         btrfs_set_disk_key_type(&copy_key, 0);
4347                         btrfs_set_disk_key_offset(&copy_key, 0);
4348
4349                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4350                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4351
4352                         btrfs_set_extent_flags(leaf, ei,
4353                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4354                 }
4355
4356                 btrfs_mark_buffer_dirty(leaf);
4357                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4358                                                rec->max_size, 1, 0);
4359                 if (ret)
4360                         goto fail;
4361                 btrfs_release_path(path);
4362         }
4363
4364         if (back->is_data) {
4365                 u64 parent;
4366                 int i;
4367
4368                 dback = (struct data_backref *)back;
4369                 if (back->full_backref)
4370                         parent = dback->parent;
4371                 else
4372                         parent = 0;
4373
4374                 for (i = 0; i < dback->found_ref; i++) {
4375                         /* if parent != 0, we're doing a full backref
4376                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4377                          * just makes the backref allocator create a data
4378                          * backref
4379                          */
4380                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4381                                                    rec->start, rec->max_size,
4382                                                    parent,
4383                                                    dback->root,
4384                                                    parent ?
4385                                                    BTRFS_FIRST_FREE_OBJECTID :
4386                                                    dback->owner,
4387                                                    dback->offset);
4388                         if (ret)
4389                                 break;
4390                 }
4391                 fprintf(stderr, "adding new data backref"
4392                                 " on %llu %s %llu owner %llu"
4393                                 " offset %llu found %d\n",
4394                                 (unsigned long long)rec->start,
4395                                 back->full_backref ?
4396                                 "parent" : "root",
4397                                 back->full_backref ?
4398                                 (unsigned long long)parent :
4399                                 (unsigned long long)dback->root,
4400                                 (unsigned long long)dback->owner,
4401                                 (unsigned long long)dback->offset,
4402                                 dback->found_ref);
4403         } else {
4404                 u64 parent;
4405
4406                 tback = (struct tree_backref *)back;
4407                 if (back->full_backref)
4408                         parent = tback->parent;
4409                 else
4410                         parent = 0;
4411
4412                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4413                                            rec->start, rec->max_size,
4414                                            parent, tback->root, 0, 0);
4415                 fprintf(stderr, "adding new tree backref on "
4416                         "start %llu len %llu parent %llu root %llu\n",
4417                         rec->start, rec->max_size, tback->parent, tback->root);
4418         }
4419         if (ret)
4420                 goto fail;
4421 fail:
4422         btrfs_release_path(path);
4423         return ret;
4424 }
4425
4426 struct extent_entry {
4427         u64 bytenr;
4428         u64 bytes;
4429         int count;
4430         int broken;
4431         struct list_head list;
4432 };
4433
4434 static struct extent_entry *find_entry(struct list_head *entries,
4435                                        u64 bytenr, u64 bytes)
4436 {
4437         struct extent_entry *entry = NULL;
4438
4439         list_for_each_entry(entry, entries, list) {
4440                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4441                         return entry;
4442         }
4443
4444         return NULL;
4445 }
4446
4447 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4448 {
4449         struct extent_entry *entry, *best = NULL, *prev = NULL;
4450
4451         list_for_each_entry(entry, entries, list) {
4452                 if (!prev) {
4453                         prev = entry;
4454                         continue;
4455                 }
4456
4457                 /*
4458                  * If there are as many broken entries as entries then we know
4459                  * not to trust this particular entry.
4460                  */
4461                 if (entry->broken == entry->count)
4462                         continue;
4463
4464                 /*
4465                  * If our current entry == best then we can't be sure our best
4466                  * is really the best, so we need to keep searching.
4467                  */
4468                 if (best && best->count == entry->count) {
4469                         prev = entry;
4470                         best = NULL;
4471                         continue;
4472                 }
4473
4474                 /* Prev == entry, not good enough, have to keep searching */
4475                 if (!prev->broken && prev->count == entry->count)
4476                         continue;
4477
4478                 if (!best)
4479                         best = (prev->count > entry->count) ? prev : entry;
4480                 else if (best->count < entry->count)
4481                         best = entry;
4482                 prev = entry;
4483         }
4484
4485         return best;
4486 }
4487
4488 static int repair_ref(struct btrfs_trans_handle *trans,
4489                       struct btrfs_fs_info *info, struct btrfs_path *path,
4490                       struct data_backref *dback, struct extent_entry *entry)
4491 {
4492         struct btrfs_root *root;
4493         struct btrfs_file_extent_item *fi;
4494         struct extent_buffer *leaf;
4495         struct btrfs_key key;
4496         u64 bytenr, bytes;
4497         int ret;
4498
4499         key.objectid = dback->root;
4500         key.type = BTRFS_ROOT_ITEM_KEY;
4501         key.offset = (u64)-1;
4502         root = btrfs_read_fs_root(info, &key);
4503         if (IS_ERR(root)) {
4504                 fprintf(stderr, "Couldn't find root for our ref\n");
4505                 return -EINVAL;
4506         }
4507
4508         /*
4509          * The backref points to the original offset of the extent if it was
4510          * split, so we need to search down to the offset we have and then walk
4511          * forward until we find the backref we're looking for.
4512          */
4513         key.objectid = dback->owner;
4514         key.type = BTRFS_EXTENT_DATA_KEY;
4515         key.offset = dback->offset;
4516         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4517         if (ret < 0) {
4518                 fprintf(stderr, "Error looking up ref %d\n", ret);
4519                 return ret;
4520         }
4521
4522         while (1) {
4523                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4524                         ret = btrfs_next_leaf(root, path);
4525                         if (ret) {
4526                                 fprintf(stderr, "Couldn't find our ref, next\n");
4527                                 return -EINVAL;
4528                         }
4529                 }
4530                 leaf = path->nodes[0];
4531                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4532                 if (key.objectid != dback->owner ||
4533                     key.type != BTRFS_EXTENT_DATA_KEY) {
4534                         fprintf(stderr, "Couldn't find our ref, search\n");
4535                         return -EINVAL;
4536                 }
4537                 fi = btrfs_item_ptr(leaf, path->slots[0],
4538                                     struct btrfs_file_extent_item);
4539                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4540                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4541
4542                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4543                         break;
4544                 path->slots[0]++;
4545         }
4546
4547         btrfs_release_path(path);
4548
4549         /*
4550          * Have to make sure that this root gets updated when we commit the
4551          * transaction
4552          */
4553         root->track_dirty = 1;
4554         if (root->last_trans != trans->transid) {
4555                 root->last_trans = trans->transid;
4556                 root->commit_root = root->node;
4557                 extent_buffer_get(root->node);
4558         }
4559
4560         /*
4561          * Ok we have the key of the file extent we want to fix, now we can cow
4562          * down to the thing and fix it.
4563          */
4564         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4565         if (ret < 0) {
4566                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4567                         key.objectid, key.type, key.offset, ret);
4568                 return ret;
4569         }
4570         if (ret > 0) {
4571                 fprintf(stderr, "Well that's odd, we just found this key "
4572                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4573                         key.offset);
4574                 return -EINVAL;
4575         }
4576         leaf = path->nodes[0];
4577         fi = btrfs_item_ptr(leaf, path->slots[0],
4578                             struct btrfs_file_extent_item);
4579
4580         if (btrfs_file_extent_compression(leaf, fi) &&
4581             dback->disk_bytenr != entry->bytenr) {
4582                 fprintf(stderr, "Ref doesn't match the record start and is "
4583                         "compressed, please take a btrfs-image of this file "
4584                         "system and send it to a btrfs developer so they can "
4585                         "complete this functionality for bytenr %Lu\n",
4586                         dback->disk_bytenr);
4587                 return -EINVAL;
4588         }
4589
4590         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4591                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4592         } else if (dback->disk_bytenr > entry->bytenr) {
4593                 u64 off_diff, offset;
4594
4595                 off_diff = dback->disk_bytenr - entry->bytenr;
4596                 offset = btrfs_file_extent_offset(leaf, fi);
4597                 if (dback->disk_bytenr + offset +
4598                     btrfs_file_extent_num_bytes(leaf, fi) >
4599                     entry->bytenr + entry->bytes) {
4600                         fprintf(stderr, "Ref is past the entry end, please "
4601                                 "take a btrfs-image of this file system and "
4602                                 "send it to a btrfs developer, ref %Lu\n",
4603                                 dback->disk_bytenr);
4604                         return -EINVAL;
4605                 }
4606                 offset += off_diff;
4607                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4608                 btrfs_set_file_extent_offset(leaf, fi, offset);
4609         } else if (dback->disk_bytenr < entry->bytenr) {
4610                 u64 offset;
4611
4612                 offset = btrfs_file_extent_offset(leaf, fi);
4613                 if (dback->disk_bytenr + offset < entry->bytenr) {
4614                         fprintf(stderr, "Ref is before the entry start, please"
4615                                 " take a btrfs-image of this file system and "
4616                                 "send it to a btrfs developer, ref %Lu\n",
4617                                 dback->disk_bytenr);
4618                         return -EINVAL;
4619                 }
4620
4621                 offset += dback->disk_bytenr;
4622                 offset -= entry->bytenr;
4623                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4624                 btrfs_set_file_extent_offset(leaf, fi, offset);
4625         }
4626
4627         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4628
4629         /*
4630          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4631          * only do this if we aren't using compression, otherwise it's a
4632          * trickier case.
4633          */
4634         if (!btrfs_file_extent_compression(leaf, fi))
4635                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4636         else
4637                 printf("ram bytes may be wrong?\n");
4638         btrfs_mark_buffer_dirty(leaf);
4639         btrfs_release_path(path);
4640         return 0;
4641 }
4642
4643 static int verify_backrefs(struct btrfs_trans_handle *trans,
4644                            struct btrfs_fs_info *info, struct btrfs_path *path,
4645                            struct extent_record *rec)
4646 {
4647         struct extent_backref *back;
4648         struct data_backref *dback;
4649         struct extent_entry *entry, *best = NULL;
4650         LIST_HEAD(entries);
4651         int nr_entries = 0;
4652         int broken_entries = 0;
4653         int ret = 0;
4654         short mismatch = 0;
4655
4656         /*
4657          * Metadata is easy and the backrefs should always agree on bytenr and
4658          * size, if not we've got bigger issues.
4659          */
4660         if (rec->metadata)
4661                 return 0;
4662
4663         list_for_each_entry(back, &rec->backrefs, list) {
4664                 dback = (struct data_backref *)back;
4665                 /*
4666                  * We only pay attention to backrefs that we found a real
4667                  * backref for.
4668                  */
4669                 if (dback->found_ref == 0)
4670                         continue;
4671                 if (back->full_backref)
4672                         continue;
4673
4674                 /*
4675                  * For now we only catch when the bytes don't match, not the
4676                  * bytenr.  We can easily do this at the same time, but I want
4677                  * to have a fs image to test on before we just add repair
4678                  * functionality willy-nilly so we know we won't screw up the
4679                  * repair.
4680                  */
4681
4682                 entry = find_entry(&entries, dback->disk_bytenr,
4683                                    dback->bytes);
4684                 if (!entry) {
4685                         entry = malloc(sizeof(struct extent_entry));
4686                         if (!entry) {
4687                                 ret = -ENOMEM;
4688                                 goto out;
4689                         }
4690                         memset(entry, 0, sizeof(*entry));
4691                         entry->bytenr = dback->disk_bytenr;
4692                         entry->bytes = dback->bytes;
4693                         list_add_tail(&entry->list, &entries);
4694                         nr_entries++;
4695                 }
4696
4697                 /*
4698                  * If we only have on entry we may think the entries agree when
4699                  * in reality they don't so we have to do some extra checking.
4700                  */
4701                 if (dback->disk_bytenr != rec->start ||
4702                     dback->bytes != rec->nr || back->broken)
4703                         mismatch = 1;
4704
4705                 if (back->broken) {
4706                         entry->broken++;
4707                         broken_entries++;
4708                 }
4709
4710                 entry->count++;
4711         }
4712
4713         /* Yay all the backrefs agree, carry on good sir */
4714         if (nr_entries <= 1 && !mismatch)
4715                 goto out;
4716
4717         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4718                 "%Lu\n", rec->start);
4719
4720         /*
4721          * First we want to see if the backrefs can agree amongst themselves who
4722          * is right, so figure out which one of the entries has the highest
4723          * count.
4724          */
4725         best = find_most_right_entry(&entries);
4726
4727         /*
4728          * Ok so we may have an even split between what the backrefs think, so
4729          * this is where we use the extent ref to see what it thinks.
4730          */
4731         if (!best) {
4732                 entry = find_entry(&entries, rec->start, rec->nr);
4733                 if (!entry && (!broken_entries || !rec->found_rec)) {
4734                         fprintf(stderr, "Backrefs don't agree with eachother "
4735                                 "and extent record doesn't agree with anybody,"
4736                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4737                                 rec->start, rec->nr);
4738                         ret = -EINVAL;
4739                         goto out;
4740                 } else if (!entry) {
4741                         /*
4742                          * Ok our backrefs were broken, we'll assume this is the
4743                          * correct value and add an entry for this range.
4744                          */
4745                         entry = malloc(sizeof(struct extent_entry));
4746                         if (!entry) {
4747                                 ret = -ENOMEM;
4748                                 goto out;
4749                         }
4750                         memset(entry, 0, sizeof(*entry));
4751                         entry->bytenr = rec->start;
4752                         entry->bytes = rec->nr;
4753                         list_add_tail(&entry->list, &entries);
4754                         nr_entries++;
4755                 }
4756                 entry->count++;
4757                 best = find_most_right_entry(&entries);
4758                 if (!best) {
4759                         fprintf(stderr, "Backrefs and extent record evenly "
4760                                 "split on who is right, this is going to "
4761                                 "require user input to fix bytenr %Lu bytes "
4762                                 "%Lu\n", rec->start, rec->nr);
4763                         ret = -EINVAL;
4764                         goto out;
4765                 }
4766         }
4767
4768         /*
4769          * I don't think this can happen currently as we'll abort() if we catch
4770          * this case higher up, but in case somebody removes that we still can't
4771          * deal with it properly here yet, so just bail out of that's the case.
4772          */
4773         if (best->bytenr != rec->start) {
4774                 fprintf(stderr, "Extent start and backref starts don't match, "
4775                         "please use btrfs-image on this file system and send "
4776                         "it to a btrfs developer so they can make fsck fix "
4777                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
4778                         rec->start, rec->nr);
4779                 ret = -EINVAL;
4780                 goto out;
4781         }
4782
4783         /*
4784          * Ok great we all agreed on an extent record, let's go find the real
4785          * references and fix up the ones that don't match.
4786          */
4787         list_for_each_entry(back, &rec->backrefs, list) {
4788                 dback = (struct data_backref *)back;
4789
4790                 /*
4791                  * Still ignoring backrefs that don't have a real ref attached
4792                  * to them.
4793                  */
4794                 if (dback->found_ref == 0)
4795                         continue;
4796                 if (back->full_backref)
4797                         continue;
4798
4799                 if (dback->bytes == best->bytes &&
4800                     dback->disk_bytenr == best->bytenr)
4801                         continue;
4802
4803                 ret = repair_ref(trans, info, path, dback, best);
4804                 if (ret)
4805                         goto out;
4806         }
4807
4808         /*
4809          * Ok we messed with the actual refs, which means we need to drop our
4810          * entire cache and go back and rescan.  I know this is a huge pain and
4811          * adds a lot of extra work, but it's the only way to be safe.  Once all
4812          * the backrefs agree we may not need to do anything to the extent
4813          * record itself.
4814          */
4815         ret = -EAGAIN;
4816 out:
4817         while (!list_empty(&entries)) {
4818                 entry = list_entry(entries.next, struct extent_entry, list);
4819                 list_del_init(&entry->list);
4820                 free(entry);
4821         }
4822         return ret;
4823 }
4824
4825 static int process_duplicates(struct btrfs_root *root,
4826                               struct cache_tree *extent_cache,
4827                               struct extent_record *rec)
4828 {
4829         struct extent_record *good, *tmp;
4830         struct cache_extent *cache;
4831         int ret;
4832
4833         /*
4834          * If we found a extent record for this extent then return, or if we
4835          * have more than one duplicate we are likely going to need to delete
4836          * something.
4837          */
4838         if (rec->found_rec || rec->num_duplicates > 1)
4839                 return 0;
4840
4841         /* Shouldn't happen but just in case */
4842         BUG_ON(!rec->num_duplicates);
4843
4844         /*
4845          * So this happens if we end up with a backref that doesn't match the
4846          * actual extent entry.  So either the backref is bad or the extent
4847          * entry is bad.  Either way we want to have the extent_record actually
4848          * reflect what we found in the extent_tree, so we need to take the
4849          * duplicate out and use that as the extent_record since the only way we
4850          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4851          */
4852         remove_cache_extent(extent_cache, &rec->cache);
4853
4854         good = list_entry(rec->dups.next, struct extent_record, list);
4855         list_del_init(&good->list);
4856         INIT_LIST_HEAD(&good->backrefs);
4857         INIT_LIST_HEAD(&good->dups);
4858         good->cache.start = good->start;
4859         good->cache.size = good->nr;
4860         good->content_checked = 0;
4861         good->owner_ref_checked = 0;
4862         good->num_duplicates = 0;
4863         good->refs = rec->refs;
4864         list_splice_init(&rec->backrefs, &good->backrefs);
4865         while (1) {
4866                 cache = lookup_cache_extent(extent_cache, good->start,
4867                                             good->nr);
4868                 if (!cache)
4869                         break;
4870                 tmp = container_of(cache, struct extent_record, cache);
4871
4872                 /*
4873                  * If we find another overlapping extent and it's found_rec is
4874                  * set then it's a duplicate and we need to try and delete
4875                  * something.
4876                  */
4877                 if (tmp->found_rec || tmp->num_duplicates > 0) {
4878                         if (list_empty(&good->list))
4879                                 list_add_tail(&good->list,
4880                                               &duplicate_extents);
4881                         good->num_duplicates += tmp->num_duplicates + 1;
4882                         list_splice_init(&tmp->dups, &good->dups);
4883                         list_del_init(&tmp->list);
4884                         list_add_tail(&tmp->list, &good->dups);
4885                         remove_cache_extent(extent_cache, &tmp->cache);
4886                         continue;
4887                 }
4888
4889                 /*
4890                  * Ok we have another non extent item backed extent rec, so lets
4891                  * just add it to this extent and carry on like we did above.
4892                  */
4893                 good->refs += tmp->refs;
4894                 list_splice_init(&tmp->backrefs, &good->backrefs);
4895                 remove_cache_extent(extent_cache, &tmp->cache);
4896                 free(tmp);
4897         }
4898         ret = insert_cache_extent(extent_cache, &good->cache);
4899         BUG_ON(ret);
4900         free(rec);
4901         return good->num_duplicates ? 0 : 1;
4902 }
4903
4904 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
4905                                     struct btrfs_root *root,
4906                                     struct extent_record *rec)
4907 {
4908         LIST_HEAD(delete_list);
4909         struct btrfs_path *path;
4910         struct extent_record *tmp, *good, *n;
4911         int nr_del = 0;
4912         int ret = 0;
4913         struct btrfs_key key;
4914
4915         path = btrfs_alloc_path();
4916         if (!path) {
4917                 ret = -ENOMEM;
4918                 goto out;
4919         }
4920
4921         good = rec;
4922         /* Find the record that covers all of the duplicates. */
4923         list_for_each_entry(tmp, &rec->dups, list) {
4924                 if (good->start < tmp->start)
4925                         continue;
4926                 if (good->nr > tmp->nr)
4927                         continue;
4928
4929                 if (tmp->start + tmp->nr < good->start + good->nr) {
4930                         fprintf(stderr, "Ok we have overlapping extents that "
4931                                 "aren't completely covered by eachother, this "
4932                                 "is going to require more careful thought.  "
4933                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
4934                                 tmp->start, tmp->nr, good->start, good->nr);
4935                         abort();
4936                 }
4937                 good = tmp;
4938         }
4939
4940         if (good != rec)
4941                 list_add_tail(&rec->list, &delete_list);
4942
4943         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
4944                 if (tmp == good)
4945                         continue;
4946                 list_move_tail(&tmp->list, &delete_list);
4947         }
4948
4949         root = root->fs_info->extent_root;
4950         list_for_each_entry(tmp, &delete_list, list) {
4951                 if (tmp->found_rec == 0)
4952                         continue;
4953                 key.objectid = tmp->start;
4954                 key.type = BTRFS_EXTENT_ITEM_KEY;
4955                 key.offset = tmp->nr;
4956
4957                 /* Shouldn't happen but just in case */
4958                 if (tmp->metadata) {
4959                         fprintf(stderr, "Well this shouldn't happen, extent "
4960                                 "record overlaps but is metadata? "
4961                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
4962                         abort();
4963                 }
4964
4965                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4966                 if (ret) {
4967                         if (ret > 0)
4968                                 ret = -EINVAL;
4969                         goto out;
4970                 }
4971                 ret = btrfs_del_item(trans, root, path);
4972                 if (ret)
4973                         goto out;
4974                 btrfs_release_path(path);
4975                 nr_del++;
4976         }
4977
4978 out:
4979         while (!list_empty(&delete_list)) {
4980                 tmp = list_entry(delete_list.next, struct extent_record, list);
4981                 list_del_init(&tmp->list);
4982                 if (tmp == rec)
4983                         continue;
4984                 free(tmp);
4985         }
4986
4987         while (!list_empty(&rec->dups)) {
4988                 tmp = list_entry(rec->dups.next, struct extent_record, list);
4989                 list_del_init(&tmp->list);
4990                 free(tmp);
4991         }
4992
4993         btrfs_free_path(path);
4994
4995         if (!ret && !nr_del)
4996                 rec->num_duplicates = 0;
4997
4998         return ret ? ret : nr_del;
4999 }
5000
5001 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5002                                   struct btrfs_fs_info *info,
5003                                   struct btrfs_path *path,
5004                                   struct cache_tree *extent_cache,
5005                                   struct extent_record *rec)
5006 {
5007         struct btrfs_root *root;
5008         struct extent_backref *back;
5009         struct data_backref *dback;
5010         struct cache_extent *cache;
5011         struct btrfs_file_extent_item *fi;
5012         struct btrfs_key key;
5013         u64 bytenr, bytes;
5014         int ret;
5015
5016         list_for_each_entry(back, &rec->backrefs, list) {
5017                 dback = (struct data_backref *)back;
5018
5019                 /* We found this one, we don't need to do a lookup */
5020                 if (dback->found_ref)
5021                         continue;
5022                 /* Don't care about full backrefs (poor unloved backrefs) */
5023                 if (back->full_backref)
5024                         continue;
5025                 key.objectid = dback->root;
5026                 key.type = BTRFS_ROOT_ITEM_KEY;
5027                 key.offset = (u64)-1;
5028
5029                 root = btrfs_read_fs_root(info, &key);
5030
5031                 /* No root, definitely a bad ref, skip */
5032                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5033                         continue;
5034                 /* Other err, exit */
5035                 if (IS_ERR(root))
5036                         return PTR_ERR(root);
5037
5038                 key.objectid = dback->owner;
5039                 key.type = BTRFS_EXTENT_DATA_KEY;
5040                 key.offset = dback->offset;
5041                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5042                 if (ret) {
5043                         btrfs_release_path(path);
5044                         if (ret < 0)
5045                                 return ret;
5046                         /* Didn't find it, we can carry on */
5047                         ret = 0;
5048                         continue;
5049                 }
5050
5051                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5052                                     struct btrfs_file_extent_item);
5053                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5054                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5055                 btrfs_release_path(path);
5056                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5057                 if (cache) {
5058                         struct extent_record *tmp;
5059                         tmp = container_of(cache, struct extent_record, cache);
5060
5061                         /*
5062                          * If we found an extent record for the bytenr for this
5063                          * particular backref then we can't add it to our
5064                          * current extent record.  We only want to add backrefs
5065                          * that don't have a corresponding extent item in the
5066                          * extent tree since they likely belong to this record
5067                          * and we need to fix it if it doesn't match bytenrs.
5068                          */
5069                         if  (tmp->found_rec)
5070                                 continue;
5071                 }
5072
5073                 dback->found_ref += 1;
5074                 dback->disk_bytenr = bytenr;
5075                 dback->bytes = bytes;
5076
5077                 /*
5078                  * Set this so the verify backref code knows not to trust the
5079                  * values in this backref.
5080                  */
5081                 back->broken = 1;
5082         }
5083
5084         return 0;
5085 }
5086
5087 /*
5088  * when an incorrect extent item is found, this will delete
5089  * all of the existing entries for it and recreate them
5090  * based on what the tree scan found.
5091  */
5092 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5093                              struct btrfs_fs_info *info,
5094                              struct cache_tree *extent_cache,
5095                              struct extent_record *rec)
5096 {
5097         int ret;
5098         struct btrfs_path *path;
5099         struct list_head *cur = rec->backrefs.next;
5100         struct cache_extent *cache;
5101         struct extent_backref *back;
5102         int allocated = 0;
5103         u64 flags = 0;
5104
5105         /* remember our flags for recreating the extent */
5106         ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start,
5107                                        rec->max_size, rec->metadata, NULL,
5108                                        &flags);
5109         if (ret < 0)
5110                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5111
5112         path = btrfs_alloc_path();
5113         if (!path)
5114                 return -ENOMEM;
5115
5116         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5117                 /*
5118                  * Sometimes the backrefs themselves are so broken they don't
5119                  * get attached to any meaningful rec, so first go back and
5120                  * check any of our backrefs that we couldn't find and throw
5121                  * them into the list if we find the backref so that
5122                  * verify_backrefs can figure out what to do.
5123                  */
5124                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5125                                              rec);
5126                 if (ret < 0)
5127                         goto out;
5128         }
5129
5130         /* step one, make sure all of the backrefs agree */
5131         ret = verify_backrefs(trans, info, path, rec);
5132         if (ret < 0)
5133                 goto out;
5134
5135         /* step two, delete all the existing records */
5136         ret = delete_extent_records(trans, info->extent_root, path,
5137                                     rec->start, rec->max_size);
5138
5139         if (ret < 0)
5140                 goto out;
5141
5142         /* was this block corrupt?  If so, don't add references to it */
5143         cache = lookup_cache_extent(info->corrupt_blocks,
5144                                     rec->start, rec->max_size);
5145         if (cache) {
5146                 ret = 0;
5147                 goto out;
5148         }
5149
5150         /* step three, recreate all the refs we did find */
5151         while(cur != &rec->backrefs) {
5152                 back = list_entry(cur, struct extent_backref, list);
5153                 cur = cur->next;
5154
5155                 /*
5156                  * if we didn't find any references, don't create a
5157                  * new extent record
5158                  */
5159                 if (!back->found_ref)
5160                         continue;
5161
5162                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5163                 allocated = 1;
5164
5165                 if (ret)
5166                         goto out;
5167         }
5168 out:
5169         btrfs_free_path(path);
5170         return ret;
5171 }
5172
5173 /* right now we only prune from the extent allocation tree */
5174 static int prune_one_block(struct btrfs_trans_handle *trans,
5175                            struct btrfs_fs_info *info,
5176                            struct btrfs_corrupt_block *corrupt)
5177 {
5178         int ret;
5179         struct btrfs_path path;
5180         struct extent_buffer *eb;
5181         u64 found;
5182         int slot;
5183         int nritems;
5184         int level = corrupt->level + 1;
5185
5186         btrfs_init_path(&path);
5187 again:
5188         /* we want to stop at the parent to our busted block */
5189         path.lowest_level = level;
5190
5191         ret = btrfs_search_slot(trans, info->extent_root,
5192                                 &corrupt->key, &path, -1, 1);
5193
5194         if (ret < 0)
5195                 goto out;
5196
5197         eb = path.nodes[level];
5198         if (!eb) {
5199                 ret = -ENOENT;
5200                 goto out;
5201         }
5202
5203         /*
5204          * hopefully the search gave us the block we want to prune,
5205          * lets try that first
5206          */
5207         slot = path.slots[level];
5208         found =  btrfs_node_blockptr(eb, slot);
5209         if (found == corrupt->cache.start)
5210                 goto del_ptr;
5211
5212         nritems = btrfs_header_nritems(eb);
5213
5214         /* the search failed, lets scan this node and hope we find it */
5215         for (slot = 0; slot < nritems; slot++) {
5216                 found =  btrfs_node_blockptr(eb, slot);
5217                 if (found == corrupt->cache.start)
5218                         goto del_ptr;
5219         }
5220         /*
5221          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5222          * to this block
5223          */
5224         if (eb == info->extent_root->node) {
5225                 ret = -ENOENT;
5226                 goto out;
5227         } else {
5228                 level++;
5229                 btrfs_release_path(&path);
5230                 goto again;
5231         }
5232
5233 del_ptr:
5234         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5235         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5236
5237 out:
5238         btrfs_release_path(&path);
5239         return ret;
5240 }
5241
5242 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5243                                 struct btrfs_fs_info *info)
5244 {
5245         struct cache_extent *cache;
5246         struct btrfs_corrupt_block *corrupt;
5247
5248         cache = search_cache_extent(info->corrupt_blocks, 0);
5249         while (1) {
5250                 if (!cache)
5251                         break;
5252                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5253                 prune_one_block(trans, info, corrupt);
5254                 cache = next_cache_extent(cache);
5255         }
5256         return 0;
5257 }
5258
5259 static void free_corrupt_block(struct cache_extent *cache)
5260 {
5261         struct btrfs_corrupt_block *corrupt;
5262
5263         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5264         free(corrupt);
5265 }
5266
5267 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5268
5269 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5270 {
5271         struct btrfs_block_group_cache *cache;
5272         u64 start, end;
5273         int ret;
5274
5275         while (1) {
5276                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5277                                             &start, &end, EXTENT_DIRTY);
5278                 if (ret)
5279                         break;
5280                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5281                                    GFP_NOFS);
5282         }
5283
5284         start = 0;
5285         while (1) {
5286                 cache = btrfs_lookup_first_block_group(fs_info, start);
5287                 if (!cache)
5288                         break;
5289                 if (cache->cached)
5290                         cache->cached = 0;
5291                 start = cache->key.objectid + cache->key.offset;
5292         }
5293 }
5294
5295 static int check_extent_refs(struct btrfs_trans_handle *trans,
5296                              struct btrfs_root *root,
5297                              struct cache_tree *extent_cache)
5298 {
5299         struct extent_record *rec;
5300         struct cache_extent *cache;
5301         int err = 0;
5302         int ret = 0;
5303         int fixed = 0;
5304         int had_dups = 0;
5305
5306         if (repair) {
5307                 /*
5308                  * if we're doing a repair, we have to make sure
5309                  * we don't allocate from the problem extents.
5310                  * In the worst case, this will be all the
5311                  * extents in the FS
5312                  */
5313                 cache = search_cache_extent(extent_cache, 0);
5314                 while(cache) {
5315                         rec = container_of(cache, struct extent_record, cache);
5316                         btrfs_pin_extent(root->fs_info,
5317                                          rec->start, rec->max_size);
5318                         cache = next_cache_extent(cache);
5319                 }
5320
5321                 /* pin down all the corrupted blocks too */
5322                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5323                 while(cache) {
5324                         btrfs_pin_extent(root->fs_info,
5325                                          cache->start, cache->size);
5326                         cache = next_cache_extent(cache);
5327                 }
5328                 prune_corrupt_blocks(trans, root->fs_info);
5329                 reset_cached_block_groups(root->fs_info);
5330         }
5331
5332         /*
5333          * We need to delete any duplicate entries we find first otherwise we
5334          * could mess up the extent tree when we have backrefs that actually
5335          * belong to a different extent item and not the weird duplicate one.
5336          */
5337         while (repair && !list_empty(&duplicate_extents)) {
5338                 rec = list_entry(duplicate_extents.next, struct extent_record,
5339                                  list);
5340                 list_del_init(&rec->list);
5341
5342                 /* Sometimes we can find a backref before we find an actual
5343                  * extent, so we need to process it a little bit to see if there
5344                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5345                  * if this is a backref screwup.  If we need to delete stuff
5346                  * process_duplicates() will return 0, otherwise it will return
5347                  * 1 and we
5348                  */
5349                 if (process_duplicates(root, extent_cache, rec))
5350                         continue;
5351                 ret = delete_duplicate_records(trans, root, rec);
5352                 if (ret < 0)
5353                         return ret;
5354                 /*
5355                  * delete_duplicate_records will return the number of entries
5356                  * deleted, so if it's greater than 0 then we know we actually
5357                  * did something and we need to remove.
5358                  */
5359                 if (ret)
5360                         had_dups = 1;
5361         }
5362
5363         if (had_dups)
5364                 return -EAGAIN;
5365
5366         while(1) {
5367                 fixed = 0;
5368                 cache = search_cache_extent(extent_cache, 0);
5369                 if (!cache)
5370                         break;
5371                 rec = container_of(cache, struct extent_record, cache);
5372                 if (rec->num_duplicates) {
5373                         fprintf(stderr, "extent item %llu has multiple extent "
5374                                 "items\n", (unsigned long long)rec->start);
5375                         err = 1;
5376                 }
5377
5378                 if (rec->refs != rec->extent_item_refs) {
5379                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5380                                 (unsigned long long)rec->start,
5381                                 (unsigned long long)rec->nr);
5382                         fprintf(stderr, "extent item %llu, found %llu\n",
5383                                 (unsigned long long)rec->extent_item_refs,
5384                                 (unsigned long long)rec->refs);
5385                         if (!fixed && repair) {
5386                                 ret = fixup_extent_refs(trans, root->fs_info,
5387                                                         extent_cache, rec);
5388                                 if (ret)
5389                                         goto repair_abort;
5390                                 fixed = 1;
5391                         }
5392                         err = 1;
5393
5394                 }
5395                 if (all_backpointers_checked(rec, 1)) {
5396                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5397                                 (unsigned long long)rec->start,
5398                                 (unsigned long long)rec->nr);
5399
5400                         if (!fixed && repair) {
5401                                 ret = fixup_extent_refs(trans, root->fs_info,
5402                                                         extent_cache, rec);
5403                                 if (ret)
5404                                         goto repair_abort;
5405                                 fixed = 1;
5406                         }
5407
5408                         err = 1;
5409                 }
5410                 if (!rec->owner_ref_checked) {
5411                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5412                                 (unsigned long long)rec->start,
5413                                 (unsigned long long)rec->nr);
5414                         if (!fixed && repair) {
5415                                 ret = fixup_extent_refs(trans, root->fs_info,
5416                                                         extent_cache, rec);
5417                                 if (ret)
5418                                         goto repair_abort;
5419                                 fixed = 1;
5420                         }
5421                         err = 1;
5422                 }
5423
5424                 remove_cache_extent(extent_cache, cache);
5425                 free_all_extent_backrefs(rec);
5426                 free(rec);
5427         }
5428 repair_abort:
5429         if (repair) {
5430                 if (ret && ret != -EAGAIN) {
5431                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5432                         exit(1);
5433                 } else if (!ret) {
5434                         btrfs_fix_block_accounting(trans, root);
5435                 }
5436                 if (err)
5437                         fprintf(stderr, "repaired damaged extent references\n");
5438                 return ret;
5439         }
5440         return err;
5441 }
5442
5443 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5444 {
5445         u64 stripe_size;
5446
5447         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5448                 stripe_size = length;
5449                 stripe_size /= num_stripes;
5450         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5451                 stripe_size = length * 2;
5452                 stripe_size /= num_stripes;
5453         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5454                 stripe_size = length;
5455                 stripe_size /= (num_stripes - 1);
5456         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5457                 stripe_size = length;
5458                 stripe_size /= (num_stripes - 2);
5459         } else {
5460                 stripe_size = length;
5461         }
5462         return stripe_size;
5463 }
5464
5465 static int check_chunk_refs(struct chunk_record *chunk_rec,
5466                             struct block_group_tree *block_group_cache,
5467                             struct device_extent_tree *dev_extent_cache,
5468                             int silent)
5469 {
5470         struct cache_extent *block_group_item;
5471         struct block_group_record *block_group_rec;
5472         struct cache_extent *dev_extent_item;
5473         struct device_extent_record *dev_extent_rec;
5474         u64 devid;
5475         u64 offset;
5476         u64 length;
5477         int i;
5478         int ret = 0;
5479
5480         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5481                                                chunk_rec->offset,
5482                                                chunk_rec->length);
5483         if (block_group_item) {
5484                 block_group_rec = container_of(block_group_item,
5485                                                struct block_group_record,
5486                                                cache);
5487                 if (chunk_rec->length != block_group_rec->offset ||
5488                     chunk_rec->offset != block_group_rec->objectid ||
5489                     chunk_rec->type_flags != block_group_rec->flags) {
5490                         if (!silent)
5491                                 fprintf(stderr,
5492                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5493                                         chunk_rec->objectid,
5494                                         chunk_rec->type,
5495                                         chunk_rec->offset,
5496                                         chunk_rec->length,
5497                                         chunk_rec->offset,
5498                                         chunk_rec->type_flags,
5499                                         block_group_rec->objectid,
5500                                         block_group_rec->type,
5501                                         block_group_rec->offset,
5502                                         block_group_rec->offset,
5503                                         block_group_rec->objectid,
5504                                         block_group_rec->flags);
5505                         ret = -1;
5506                 } else {
5507                         list_del_init(&block_group_rec->list);
5508                         chunk_rec->bg_rec = block_group_rec;
5509                 }
5510         } else {
5511                 if (!silent)
5512                         fprintf(stderr,
5513                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5514                                 chunk_rec->objectid,
5515                                 chunk_rec->type,
5516                                 chunk_rec->offset,
5517                                 chunk_rec->length,
5518                                 chunk_rec->offset,
5519                                 chunk_rec->type_flags);
5520                 ret = -1;
5521         }
5522
5523         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5524                                     chunk_rec->num_stripes);
5525         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5526                 devid = chunk_rec->stripes[i].devid;
5527                 offset = chunk_rec->stripes[i].offset;
5528                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5529                                                        devid, offset, length);
5530                 if (dev_extent_item) {
5531                         dev_extent_rec = container_of(dev_extent_item,
5532                                                 struct device_extent_record,
5533                                                 cache);
5534                         if (dev_extent_rec->objectid != devid ||
5535                             dev_extent_rec->offset != offset ||
5536                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5537                             dev_extent_rec->length != length) {
5538                                 if (!silent)
5539                                         fprintf(stderr,
5540                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5541                                                 chunk_rec->objectid,
5542                                                 chunk_rec->type,
5543                                                 chunk_rec->offset,
5544                                                 chunk_rec->stripes[i].devid,
5545                                                 chunk_rec->stripes[i].offset,
5546                                                 dev_extent_rec->objectid,
5547                                                 dev_extent_rec->offset,
5548                                                 dev_extent_rec->length);
5549                                 ret = -1;
5550                         } else {
5551                                 list_move(&dev_extent_rec->chunk_list,
5552                                           &chunk_rec->dextents);
5553                         }
5554                 } else {
5555                         if (!silent)
5556                                 fprintf(stderr,
5557                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5558                                         chunk_rec->objectid,
5559                                         chunk_rec->type,
5560                                         chunk_rec->offset,
5561                                         chunk_rec->stripes[i].devid,
5562                                         chunk_rec->stripes[i].offset);
5563                         ret = -1;
5564                 }
5565         }
5566         return ret;
5567 }
5568
5569 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5570 int check_chunks(struct cache_tree *chunk_cache,
5571                  struct block_group_tree *block_group_cache,
5572                  struct device_extent_tree *dev_extent_cache,
5573                  struct list_head *good, struct list_head *bad, int silent)
5574 {
5575         struct cache_extent *chunk_item;
5576         struct chunk_record *chunk_rec;
5577         struct block_group_record *bg_rec;
5578         struct device_extent_record *dext_rec;
5579         int err;
5580         int ret = 0;
5581
5582         chunk_item = first_cache_extent(chunk_cache);
5583         while (chunk_item) {
5584                 chunk_rec = container_of(chunk_item, struct chunk_record,
5585                                          cache);
5586                 err = check_chunk_refs(chunk_rec, block_group_cache,
5587                                        dev_extent_cache, silent);
5588                 if (err) {
5589                         ret = err;
5590                         if (bad)
5591                                 list_add_tail(&chunk_rec->list, bad);
5592                 } else {
5593                         if (good)
5594                                 list_add_tail(&chunk_rec->list, good);
5595                 }
5596
5597                 chunk_item = next_cache_extent(chunk_item);
5598         }
5599
5600         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5601                 if (!silent)
5602                         fprintf(stderr,
5603                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5604                                 bg_rec->objectid,
5605                                 bg_rec->offset,
5606                                 bg_rec->flags);
5607                 if (!ret)
5608                         ret = 1;
5609         }
5610
5611         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5612                             chunk_list) {
5613                 if (!silent)
5614                         fprintf(stderr,
5615                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5616                                 dext_rec->objectid,
5617                                 dext_rec->offset,
5618                                 dext_rec->length);
5619                 if (!ret)
5620                         ret = 1;
5621         }
5622         return ret;
5623 }
5624
5625
5626 static int check_device_used(struct device_record *dev_rec,
5627                              struct device_extent_tree *dext_cache)
5628 {
5629         struct cache_extent *cache;
5630         struct device_extent_record *dev_extent_rec;
5631         u64 total_byte = 0;
5632
5633         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5634         while (cache) {
5635                 dev_extent_rec = container_of(cache,
5636                                               struct device_extent_record,
5637                                               cache);
5638                 if (dev_extent_rec->objectid != dev_rec->devid)
5639                         break;
5640
5641                 list_del(&dev_extent_rec->device_list);
5642                 total_byte += dev_extent_rec->length;
5643                 cache = next_cache_extent(cache);
5644         }
5645
5646         if (total_byte != dev_rec->byte_used) {
5647                 fprintf(stderr,
5648                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5649                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5650                         dev_rec->type, dev_rec->offset);
5651                 return -1;
5652         } else {
5653                 return 0;
5654         }
5655 }
5656
5657 /* check btrfs_dev_item -> btrfs_dev_extent */
5658 static int check_devices(struct rb_root *dev_cache,
5659                          struct device_extent_tree *dev_extent_cache)
5660 {
5661         struct rb_node *dev_node;
5662         struct device_record *dev_rec;
5663         struct device_extent_record *dext_rec;
5664         int err;
5665         int ret = 0;
5666
5667         dev_node = rb_first(dev_cache);
5668         while (dev_node) {
5669                 dev_rec = container_of(dev_node, struct device_record, node);
5670                 err = check_device_used(dev_rec, dev_extent_cache);
5671                 if (err)
5672                         ret = err;
5673
5674                 dev_node = rb_next(dev_node);
5675         }
5676         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5677                             device_list) {
5678                 fprintf(stderr,
5679                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5680                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5681                 if (!ret)
5682                         ret = 1;
5683         }
5684         return ret;
5685 }
5686
5687 static int check_chunks_and_extents(struct btrfs_root *root)
5688 {
5689         struct rb_root dev_cache;
5690         struct cache_tree chunk_cache;
5691         struct block_group_tree block_group_cache;
5692         struct device_extent_tree dev_extent_cache;
5693         struct cache_tree extent_cache;
5694         struct cache_tree seen;
5695         struct cache_tree pending;
5696         struct cache_tree reada;
5697         struct cache_tree nodes;
5698         struct cache_tree corrupt_blocks;
5699         struct btrfs_path path;
5700         struct btrfs_key key;
5701         struct btrfs_key found_key;
5702         int ret, err = 0;
5703         u64 last = 0;
5704         struct block_info *bits;
5705         int bits_nr;
5706         struct extent_buffer *leaf;
5707         struct btrfs_trans_handle *trans = NULL;
5708         int slot;
5709         struct btrfs_root_item ri;
5710         struct list_head dropping_trees;
5711
5712         dev_cache = RB_ROOT;
5713         cache_tree_init(&chunk_cache);
5714         block_group_tree_init(&block_group_cache);
5715         device_extent_tree_init(&dev_extent_cache);
5716
5717         cache_tree_init(&extent_cache);
5718         cache_tree_init(&seen);
5719         cache_tree_init(&pending);
5720         cache_tree_init(&nodes);
5721         cache_tree_init(&reada);
5722         cache_tree_init(&corrupt_blocks);
5723         INIT_LIST_HEAD(&dropping_trees);
5724
5725         if (repair) {
5726                 trans = btrfs_start_transaction(root, 1);
5727                 if (IS_ERR(trans)) {
5728                         fprintf(stderr, "Error starting transaction\n");
5729                         return PTR_ERR(trans);
5730                 }
5731                 root->fs_info->fsck_extent_cache = &extent_cache;
5732                 root->fs_info->free_extent_hook = free_extent_hook;
5733                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5734         }
5735
5736         bits_nr = 1024;
5737         bits = malloc(bits_nr * sizeof(struct block_info));
5738         if (!bits) {
5739                 perror("malloc");
5740                 exit(1);
5741         }
5742
5743 again:
5744         add_root_to_pending(root->fs_info->tree_root->node,
5745                             &extent_cache, &pending, &seen, &nodes,
5746                             &root->fs_info->tree_root->root_key);
5747
5748         add_root_to_pending(root->fs_info->chunk_root->node,
5749                             &extent_cache, &pending, &seen, &nodes,
5750                             &root->fs_info->chunk_root->root_key);
5751
5752         btrfs_init_path(&path);
5753         key.offset = 0;
5754         key.objectid = 0;
5755         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
5756         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
5757                                         &key, &path, 0, 0);
5758         BUG_ON(ret < 0);
5759         while(1) {
5760                 leaf = path.nodes[0];
5761                 slot = path.slots[0];
5762                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
5763                         ret = btrfs_next_leaf(root, &path);
5764                         if (ret != 0)
5765                                 break;
5766                         leaf = path.nodes[0];
5767                         slot = path.slots[0];
5768                 }
5769                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
5770                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
5771                         unsigned long offset;
5772                         struct extent_buffer *buf;
5773
5774                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5775                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
5776                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
5777                                 buf = read_tree_block(root->fs_info->tree_root,
5778                                                       btrfs_root_bytenr(&ri),
5779                                                       btrfs_level_size(root,
5780                                                       btrfs_root_level(&ri)),
5781                                                       0);
5782                                 if (!buf) {
5783                                         ret = -EIO;
5784                                         goto out;
5785                                 }
5786                                 add_root_to_pending(buf, &extent_cache,
5787                                                     &pending, &seen, &nodes,
5788                                                     &found_key);
5789                                 free_extent_buffer(buf);
5790                         } else {
5791                                 struct dropping_root_item_record *dri_rec;
5792                                 dri_rec = malloc(sizeof(*dri_rec));
5793                                 if (!dri_rec) {
5794                                         perror("malloc");
5795                                         exit(1);
5796                                 }
5797                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
5798                                 memcpy(&dri_rec->found_key, &found_key,
5799                                        sizeof(found_key));
5800                                 list_add_tail(&dri_rec->list, &dropping_trees);
5801                         }
5802                 }
5803                 path.slots[0]++;
5804         }
5805         btrfs_release_path(&path);
5806         while (1) {
5807                 ret = run_next_block(trans, root, bits, bits_nr, &last,
5808                                      &pending, &seen, &reada, &nodes,
5809                                      &extent_cache, &chunk_cache, &dev_cache,
5810                                      &block_group_cache, &dev_extent_cache,
5811                                      NULL);
5812                 if (ret != 0)
5813                         break;
5814         }
5815
5816         while (!list_empty(&dropping_trees)) {
5817                 struct dropping_root_item_record *rec;
5818                 struct extent_buffer *buf;
5819                 rec = list_entry(dropping_trees.next,
5820                                  struct dropping_root_item_record, list);
5821                 last = 0;
5822                 if (!bits) {
5823                         perror("realloc");
5824                         exit(1);
5825                 }
5826                 buf = read_tree_block(root->fs_info->tree_root,
5827                                       btrfs_root_bytenr(&rec->ri),
5828                                       btrfs_level_size(root,
5829                                       btrfs_root_level(&rec->ri)), 0);
5830                 if (!buf) {
5831                         ret = -EIO;
5832                         goto out;
5833                 }
5834                 add_root_to_pending(buf, &extent_cache, &pending,
5835                                     &seen, &nodes, &rec->found_key);
5836                 while (1) {
5837                         ret = run_next_block(trans, root, bits, bits_nr, &last,
5838                                              &pending, &seen, &reada,
5839                                              &nodes, &extent_cache,
5840                                              &chunk_cache, &dev_cache,
5841                                              &block_group_cache,
5842                                              &dev_extent_cache,
5843                                              &rec->ri);
5844                         if (ret != 0)
5845                                 break;
5846                 }
5847                 free_extent_buffer(buf);
5848                 list_del(&rec->list);
5849                 free(rec);
5850         }
5851
5852         if (ret >= 0)
5853                 ret = check_extent_refs(trans, root, &extent_cache);
5854         if (ret == -EAGAIN) {
5855                 ret = btrfs_commit_transaction(trans, root);
5856                 if (ret)
5857                         goto out;
5858
5859                 trans = btrfs_start_transaction(root, 1);
5860                 if (IS_ERR(trans)) {
5861                         ret = PTR_ERR(trans);
5862                         goto out;
5863                 }
5864
5865                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5866                 free_extent_cache_tree(&seen);
5867                 free_extent_cache_tree(&pending);
5868                 free_extent_cache_tree(&reada);
5869                 free_extent_cache_tree(&nodes);
5870                 free_extent_record_cache(root->fs_info, &extent_cache);
5871                 goto again;
5872         }
5873
5874         err = check_chunks(&chunk_cache, &block_group_cache,
5875                            &dev_extent_cache, NULL, NULL, 0);
5876         if (err && !ret)
5877                 ret = err;
5878
5879         err = check_devices(&dev_cache, &dev_extent_cache);
5880         if (err && !ret)
5881                 ret = err;
5882
5883         if (trans) {
5884                 err = btrfs_commit_transaction(trans, root);
5885                 if (!ret)
5886                         ret = err;
5887         }
5888 out:
5889         if (repair) {
5890                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5891                 root->fs_info->fsck_extent_cache = NULL;
5892                 root->fs_info->free_extent_hook = NULL;
5893                 root->fs_info->corrupt_blocks = NULL;
5894         }
5895         free(bits);
5896         free_chunk_cache_tree(&chunk_cache);
5897         free_device_cache_tree(&dev_cache);
5898         free_block_group_tree(&block_group_cache);
5899         free_device_extent_tree(&dev_extent_cache);
5900         return ret;
5901 }
5902
5903 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
5904                            struct btrfs_root *root, int overwrite)
5905 {
5906         struct extent_buffer *c;
5907         struct extent_buffer *old = root->node;
5908         int level;
5909         struct btrfs_disk_key disk_key = {0,0,0};
5910
5911         level = 0;
5912
5913         if (overwrite) {
5914                 c = old;
5915                 extent_buffer_get(c);
5916                 goto init;
5917         }
5918         c = btrfs_alloc_free_block(trans, root,
5919                                    btrfs_level_size(root, 0),
5920                                    root->root_key.objectid,
5921                                    &disk_key, level, 0, 0);
5922         if (IS_ERR(c)) {
5923                 c = old;
5924                 extent_buffer_get(c);
5925         }
5926 init:
5927         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
5928         btrfs_set_header_level(c, level);
5929         btrfs_set_header_bytenr(c, c->start);
5930         btrfs_set_header_generation(c, trans->transid);
5931         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
5932         btrfs_set_header_owner(c, root->root_key.objectid);
5933
5934         write_extent_buffer(c, root->fs_info->fsid,
5935                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
5936
5937         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
5938                             btrfs_header_chunk_tree_uuid(c),
5939                             BTRFS_UUID_SIZE);
5940
5941         btrfs_mark_buffer_dirty(c);
5942
5943         free_extent_buffer(old);
5944         root->node = c;
5945         add_root_to_dirty_list(root);
5946         return 0;
5947 }
5948
5949 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
5950                                 struct extent_buffer *eb, int tree_root)
5951 {
5952         struct extent_buffer *tmp;
5953         struct btrfs_root_item *ri;
5954         struct btrfs_key key;
5955         u64 bytenr;
5956         u32 leafsize;
5957         int level = btrfs_header_level(eb);
5958         int nritems;
5959         int ret;
5960         int i;
5961
5962         btrfs_pin_extent(fs_info, eb->start, eb->len);
5963
5964         leafsize = btrfs_super_leafsize(fs_info->super_copy);
5965         nritems = btrfs_header_nritems(eb);
5966         for (i = 0; i < nritems; i++) {
5967                 if (level == 0) {
5968                         btrfs_item_key_to_cpu(eb, &key, i);
5969                         if (key.type != BTRFS_ROOT_ITEM_KEY)
5970                                 continue;
5971                         /* Skip the extent root and reloc roots */
5972                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
5973                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
5974                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
5975                                 continue;
5976                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
5977                         bytenr = btrfs_disk_root_bytenr(eb, ri);
5978
5979                         /*
5980                          * If at any point we start needing the real root we
5981                          * will have to build a stump root for the root we are
5982                          * in, but for now this doesn't actually use the root so
5983                          * just pass in extent_root.
5984                          */
5985                         tmp = read_tree_block(fs_info->extent_root, bytenr,
5986                                               leafsize, 0);
5987                         if (!tmp) {
5988                                 fprintf(stderr, "Error reading root block\n");
5989                                 return -EIO;
5990                         }
5991                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
5992                         free_extent_buffer(tmp);
5993                         if (ret)
5994                                 return ret;
5995                 } else {
5996                         bytenr = btrfs_node_blockptr(eb, i);
5997
5998                         /* If we aren't the tree root don't read the block */
5999                         if (level == 1 && !tree_root) {
6000                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6001                                 continue;
6002                         }
6003
6004                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6005                                               leafsize, 0);
6006                         if (!tmp) {
6007                                 fprintf(stderr, "Error reading tree block\n");
6008                                 return -EIO;
6009                         }
6010                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6011                         free_extent_buffer(tmp);
6012                         if (ret)
6013                                 return ret;
6014                 }
6015         }
6016
6017         return 0;
6018 }
6019
6020 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6021 {
6022         int ret;
6023
6024         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6025         if (ret)
6026                 return ret;
6027
6028         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6029 }
6030
6031 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6032 {
6033         struct btrfs_block_group_cache *cache;
6034         struct btrfs_path *path;
6035         struct extent_buffer *leaf;
6036         struct btrfs_chunk *chunk;
6037         struct btrfs_key key;
6038         int ret;
6039         u64 start;
6040
6041         path = btrfs_alloc_path();
6042         if (!path)
6043                 return -ENOMEM;
6044
6045         key.objectid = 0;
6046         key.type = BTRFS_CHUNK_ITEM_KEY;
6047         key.offset = 0;
6048
6049         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6050         if (ret < 0) {
6051                 btrfs_free_path(path);
6052                 return ret;
6053         }
6054
6055         /*
6056          * We do this in case the block groups were screwed up and had alloc
6057          * bits that aren't actually set on the chunks.  This happens with
6058          * restored images every time and could happen in real life I guess.
6059          */
6060         fs_info->avail_data_alloc_bits = 0;
6061         fs_info->avail_metadata_alloc_bits = 0;
6062         fs_info->avail_system_alloc_bits = 0;
6063
6064         /* First we need to create the in-memory block groups */
6065         while (1) {
6066                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6067                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6068                         if (ret < 0) {
6069                                 btrfs_free_path(path);
6070                                 return ret;
6071                         }
6072                         if (ret) {
6073                                 ret = 0;
6074                                 break;
6075                         }
6076                 }
6077                 leaf = path->nodes[0];
6078                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6079                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6080                         path->slots[0]++;
6081                         continue;
6082                 }
6083
6084                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6085                                        struct btrfs_chunk);
6086                 btrfs_add_block_group(fs_info, 0,
6087                                       btrfs_chunk_type(leaf, chunk),
6088                                       key.objectid, key.offset,
6089                                       btrfs_chunk_length(leaf, chunk));
6090                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6091                                  key.offset + btrfs_chunk_length(leaf, chunk),
6092                                  GFP_NOFS);
6093                 path->slots[0]++;
6094         }
6095         start = 0;
6096         while (1) {
6097                 cache = btrfs_lookup_first_block_group(fs_info, start);
6098                 if (!cache)
6099                         break;
6100                 cache->cached = 1;
6101                 start = cache->key.objectid + cache->key.offset;
6102         }
6103
6104         btrfs_free_path(path);
6105         return 0;
6106 }
6107
6108 static int reset_balance(struct btrfs_trans_handle *trans,
6109                          struct btrfs_fs_info *fs_info)
6110 {
6111         struct btrfs_root *root = fs_info->tree_root;
6112         struct btrfs_path *path;
6113         struct extent_buffer *leaf;
6114         struct btrfs_key key;
6115         int del_slot, del_nr = 0;
6116         int ret;
6117         int found = 0;
6118
6119         path = btrfs_alloc_path();
6120         if (!path)
6121                 return -ENOMEM;
6122
6123         key.objectid = BTRFS_BALANCE_OBJECTID;
6124         key.type = BTRFS_BALANCE_ITEM_KEY;
6125         key.offset = 0;
6126
6127         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6128         if (ret) {
6129                 if (ret > 0)
6130                         ret = 0;
6131                 goto out;
6132         }
6133
6134         ret = btrfs_del_item(trans, root, path);
6135         if (ret)
6136                 goto out;
6137         btrfs_release_path(path);
6138
6139         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6140         key.type = BTRFS_ROOT_ITEM_KEY;
6141         key.offset = 0;
6142
6143         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6144         if (ret < 0)
6145                 goto out;
6146         while (1) {
6147                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6148                         if (!found)
6149                                 break;
6150
6151                         if (del_nr) {
6152                                 ret = btrfs_del_items(trans, root, path,
6153                                                       del_slot, del_nr);
6154                                 del_nr = 0;
6155                                 if (ret)
6156                                         goto out;
6157                         }
6158                         key.offset++;
6159                         btrfs_release_path(path);
6160
6161                         found = 0;
6162                         ret = btrfs_search_slot(trans, root, &key, path,
6163                                                 -1, 1);
6164                         if (ret < 0)
6165                                 goto out;
6166                         continue;
6167                 }
6168                 found = 1;
6169                 leaf = path->nodes[0];
6170                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6171                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6172                         break;
6173                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6174                         path->slots[0]++;
6175                         continue;
6176                 }
6177                 if (!del_nr) {
6178                         del_slot = path->slots[0];
6179                         del_nr = 1;
6180                 } else {
6181                         del_nr++;
6182                 }
6183                 path->slots[0]++;
6184         }
6185
6186         if (del_nr) {
6187                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6188                 if (ret)
6189                         goto out;
6190         }
6191         btrfs_release_path(path);
6192
6193         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6194         key.type = BTRFS_ROOT_ITEM_KEY;
6195         key.offset = (u64)-1;
6196         root = btrfs_read_fs_root(fs_info, &key);
6197         if (IS_ERR(root)) {
6198                 fprintf(stderr, "Error reading data reloc tree\n");
6199                 return PTR_ERR(root);
6200         }
6201         root->track_dirty = 1;
6202         if (root->last_trans != trans->transid) {
6203                 root->last_trans = trans->transid;
6204                 root->commit_root = root->node;
6205                 extent_buffer_get(root->node);
6206         }
6207         ret = btrfs_fsck_reinit_root(trans, root, 0);
6208 out:
6209         btrfs_free_path(path);
6210         return ret;
6211 }
6212
6213 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6214                               struct btrfs_fs_info *fs_info)
6215 {
6216         u64 start = 0;
6217         int ret;
6218
6219         /*
6220          * The only reason we don't do this is because right now we're just
6221          * walking the trees we find and pinning down their bytes, we don't look
6222          * at any of the leaves.  In order to do mixed groups we'd have to check
6223          * the leaves of any fs roots and pin down the bytes for any file
6224          * extents we find.  Not hard but why do it if we don't have to?
6225          */
6226         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6227                 fprintf(stderr, "We don't support re-initing the extent tree "
6228                         "for mixed block groups yet, please notify a btrfs "
6229                         "developer you want to do this so they can add this "
6230                         "functionality.\n");
6231                 return -EINVAL;
6232         }
6233
6234         /*
6235          * first we need to walk all of the trees except the extent tree and pin
6236          * down the bytes that are in use so we don't overwrite any existing
6237          * metadata.
6238          */
6239         ret = pin_metadata_blocks(fs_info);
6240         if (ret) {
6241                 fprintf(stderr, "error pinning down used bytes\n");
6242                 return ret;
6243         }
6244
6245         /*
6246          * Need to drop all the block groups since we're going to recreate all
6247          * of them again.
6248          */
6249         btrfs_free_block_groups(fs_info);
6250         ret = reset_block_groups(fs_info);
6251         if (ret) {
6252                 fprintf(stderr, "error resetting the block groups\n");
6253                 return ret;
6254         }
6255
6256         ret = reset_balance(trans, fs_info);
6257         if (ret) {
6258                 fprintf(stderr, "error reseting the pending balance\n");
6259                 return ret;
6260         }
6261
6262         /* Ok we can allocate now, reinit the extent root */
6263         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6264         if (ret) {
6265                 fprintf(stderr, "extent root initialization failed\n");
6266                 /*
6267                  * When the transaction code is updated we should end the
6268                  * transaction, but for now progs only knows about commit so
6269                  * just return an error.
6270                  */
6271                 return ret;
6272         }
6273
6274         /*
6275          * Now we have all the in-memory block groups setup so we can make
6276          * allocations properly, and the metadata we care about is safe since we
6277          * pinned all of it above.
6278          */
6279         while (1) {
6280                 struct btrfs_block_group_cache *cache;
6281
6282                 cache = btrfs_lookup_first_block_group(fs_info, start);
6283                 if (!cache)
6284                         break;
6285                 start = cache->key.objectid + cache->key.offset;
6286                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6287                                         &cache->key, &cache->item,
6288                                         sizeof(cache->item));
6289                 if (ret) {
6290                         fprintf(stderr, "Error adding block group\n");
6291                         return ret;
6292                 }
6293                 btrfs_extent_post_op(trans, fs_info->extent_root);
6294         }
6295
6296         return 0;
6297 }
6298
6299 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6300 {
6301         struct btrfs_path *path;
6302         struct btrfs_trans_handle *trans;
6303         struct btrfs_key key;
6304         int ret;
6305
6306         printf("Recowing metadata block %llu\n", eb->start);
6307         key.objectid = btrfs_header_owner(eb);
6308         key.type = BTRFS_ROOT_ITEM_KEY;
6309         key.offset = (u64)-1;
6310
6311         root = btrfs_read_fs_root(root->fs_info, &key);
6312         if (IS_ERR(root)) {
6313                 fprintf(stderr, "Couldn't find owner root %llu\n",
6314                         key.objectid);
6315                 return PTR_ERR(root);
6316         }
6317
6318         path = btrfs_alloc_path();
6319         if (!path)
6320                 return -ENOMEM;
6321
6322         trans = btrfs_start_transaction(root, 1);
6323         if (IS_ERR(trans)) {
6324                 btrfs_free_path(path);
6325                 return PTR_ERR(trans);
6326         }
6327
6328         path->lowest_level = btrfs_header_level(eb);
6329         if (path->lowest_level)
6330                 btrfs_node_key_to_cpu(eb, &key, 0);
6331         else
6332                 btrfs_item_key_to_cpu(eb, &key, 0);
6333
6334         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6335         btrfs_commit_transaction(trans, root);
6336         btrfs_free_path(path);
6337         return ret;
6338 }
6339
6340 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6341 {
6342         struct btrfs_path *path;
6343         struct btrfs_trans_handle *trans;
6344         struct btrfs_key key;
6345         int ret;
6346
6347         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6348                bad->key.type, bad->key.offset);
6349         key.objectid = bad->root_id;
6350         key.type = BTRFS_ROOT_ITEM_KEY;
6351         key.offset = (u64)-1;
6352
6353         root = btrfs_read_fs_root(root->fs_info, &key);
6354         if (IS_ERR(root)) {
6355                 fprintf(stderr, "Couldn't find owner root %llu\n",
6356                         key.objectid);
6357                 return PTR_ERR(root);
6358         }
6359
6360         path = btrfs_alloc_path();
6361         if (!path)
6362                 return -ENOMEM;
6363
6364         trans = btrfs_start_transaction(root, 1);
6365         if (IS_ERR(trans)) {
6366                 btrfs_free_path(path);
6367                 return PTR_ERR(trans);
6368         }
6369
6370         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6371         if (ret) {
6372                 if (ret > 0)
6373                         ret = 0;
6374                 goto out;
6375         }
6376         ret = btrfs_del_item(trans, root, path);
6377 out:
6378         btrfs_commit_transaction(trans, root);
6379         btrfs_free_path(path);
6380         return ret;
6381 }
6382
6383 static struct option long_options[] = {
6384         { "super", 1, NULL, 's' },
6385         { "repair", 0, NULL, 0 },
6386         { "init-csum-tree", 0, NULL, 0 },
6387         { "init-extent-tree", 0, NULL, 0 },
6388         { "backup", 0, NULL, 0 },
6389         { NULL, 0, NULL, 0}
6390 };
6391
6392 const char * const cmd_check_usage[] = {
6393         "btrfs check [options] <device>",
6394         "Check an unmounted btrfs filesystem.",
6395         "",
6396         "-s|--super <superblock>     use this superblock copy",
6397         "-b|--backup                 use the backup root copy",
6398         "--repair                    try to repair the filesystem",
6399         "--init-csum-tree            create a new CRC tree",
6400         "--init-extent-tree          create a new extent tree",
6401         NULL
6402 };
6403
6404 int cmd_check(int argc, char **argv)
6405 {
6406         struct cache_tree root_cache;
6407         struct btrfs_root *root;
6408         struct btrfs_fs_info *info;
6409         u64 bytenr = 0;
6410         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6411         int ret;
6412         u64 num;
6413         int option_index = 0;
6414         int init_csum_tree = 0;
6415         int init_extent_tree = 0;
6416         enum btrfs_open_ctree_flags ctree_flags =
6417                 OPEN_CTREE_PARTIAL | OPEN_CTREE_EXCLUSIVE;
6418
6419         while(1) {
6420                 int c;
6421                 c = getopt_long(argc, argv, "as:b", long_options,
6422                                 &option_index);
6423                 if (c < 0)
6424                         break;
6425                 switch(c) {
6426                         case 'a': /* ignored */ break;
6427                         case 'b':
6428                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6429                                 break;
6430                         case 's':
6431                                 num = arg_strtou64(optarg);
6432                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
6433                                         fprintf(stderr,
6434                                                 "ERROR: super mirror should be less than: %d\n",
6435                                                 BTRFS_SUPER_MIRROR_MAX);
6436                                         exit(1);
6437                                 }
6438                                 bytenr = btrfs_sb_offset(((int)num));
6439                                 printf("using SB copy %llu, bytenr %llu\n", num,
6440                                        (unsigned long long)bytenr);
6441                                 break;
6442                         case '?':
6443                         case 'h':
6444                                 usage(cmd_check_usage);
6445                 }
6446                 if (option_index == 1) {
6447                         printf("enabling repair mode\n");
6448                         repair = 1;
6449                         ctree_flags |= OPEN_CTREE_WRITES;
6450                 } else if (option_index == 2) {
6451                         printf("Creating a new CRC tree\n");
6452                         init_csum_tree = 1;
6453                         repair = 1;
6454                         ctree_flags |= OPEN_CTREE_WRITES;
6455                 } else if (option_index == 3) {
6456                         init_extent_tree = 1;
6457                         ctree_flags |= (OPEN_CTREE_WRITES |
6458                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6459                         repair = 1;
6460                 }
6461
6462         }
6463         argc = argc - optind;
6464
6465         if (argc != 1)
6466                 usage(cmd_check_usage);
6467
6468         radix_tree_init();
6469         cache_tree_init(&root_cache);
6470
6471         if((ret = check_mounted(argv[optind])) < 0) {
6472                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6473                 goto err_out;
6474         } else if(ret) {
6475                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6476                 ret = -EBUSY;
6477                 goto err_out;
6478         }
6479
6480         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6481         if (!info) {
6482                 fprintf(stderr, "Couldn't open file system\n");
6483                 ret = -EIO;
6484                 goto err_out;
6485         }
6486
6487         root = info->fs_root;
6488         uuid_unparse(info->super_copy->fsid, uuidbuf);
6489         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6490
6491         if (!extent_buffer_uptodate(info->tree_root->node) ||
6492             !extent_buffer_uptodate(info->dev_root->node) ||
6493             !extent_buffer_uptodate(info->chunk_root->node)) {
6494                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6495                 ret = -EIO;
6496                 goto close_out;
6497         }
6498
6499         if (init_extent_tree || init_csum_tree) {
6500                 struct btrfs_trans_handle *trans;
6501
6502                 trans = btrfs_start_transaction(info->extent_root, 0);
6503                 if (IS_ERR(trans)) {
6504                         fprintf(stderr, "Error starting transaction\n");
6505                         ret = PTR_ERR(trans);
6506                         goto close_out;
6507                 }
6508
6509                 if (init_extent_tree) {
6510                         printf("Creating a new extent tree\n");
6511                         ret = reinit_extent_tree(trans, info);
6512                         if (ret)
6513                                 goto close_out;
6514                 }
6515
6516                 if (init_csum_tree) {
6517                         fprintf(stderr, "Reinit crc root\n");
6518                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6519                         if (ret) {
6520                                 fprintf(stderr, "crc root initialization failed\n");
6521                                 ret = -EIO;
6522                                 goto close_out;
6523                         }
6524                 }
6525                 /*
6526                  * Ok now we commit and run the normal fsck, which will add
6527                  * extent entries for all of the items it finds.
6528                  */
6529                 ret = btrfs_commit_transaction(trans, info->extent_root);
6530                 if (ret)
6531                         goto close_out;
6532         }
6533         if (!extent_buffer_uptodate(info->extent_root->node)) {
6534                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6535                 ret = -EIO;
6536                 goto close_out;
6537         }
6538
6539         fprintf(stderr, "checking extents\n");
6540         ret = check_chunks_and_extents(root);
6541         if (ret)
6542                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
6543
6544         fprintf(stderr, "checking free space cache\n");
6545         ret = check_space_cache(root);
6546         if (ret)
6547                 goto out;
6548
6549         /*
6550          * We used to have to have these hole extents in between our real
6551          * extents so if we don't have this flag set we need to make sure there
6552          * are no gaps in the file extents for inodes, otherwise we can just
6553          * ignore it when this happens.
6554          */
6555         no_holes = btrfs_fs_incompat(root->fs_info,
6556                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
6557         fprintf(stderr, "checking fs roots\n");
6558         ret = check_fs_roots(root, &root_cache);
6559         if (ret)
6560                 goto out;
6561
6562         fprintf(stderr, "checking csums\n");
6563         ret = check_csums(root);
6564         if (ret)
6565                 goto out;
6566
6567         fprintf(stderr, "checking root refs\n");
6568         ret = check_root_refs(root, &root_cache);
6569         if (ret)
6570                 goto out;
6571
6572         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
6573                 struct extent_buffer *eb;
6574
6575                 eb = list_first_entry(&root->fs_info->recow_ebs,
6576                                       struct extent_buffer, recow);
6577                 ret = recow_extent_buffer(root, eb);
6578                 if (ret)
6579                         break;
6580         }
6581
6582         while (!list_empty(&delete_items)) {
6583                 struct bad_item *bad;
6584
6585                 bad = list_first_entry(&delete_items, struct bad_item, list);
6586                 list_del_init(&bad->list);
6587                 if (repair)
6588                         ret = delete_bad_item(root, bad);
6589                 free(bad);
6590         }
6591
6592         if (!list_empty(&root->fs_info->recow_ebs)) {
6593                 fprintf(stderr, "Transid errors in file system\n");
6594                 ret = 1;
6595         }
6596 out:
6597         if (found_old_backref) { /*
6598                  * there was a disk format change when mixed
6599                  * backref was in testing tree. The old format
6600                  * existed about one week.
6601                  */
6602                 printf("\n * Found old mixed backref format. "
6603                        "The old format is not supported! *"
6604                        "\n * Please mount the FS in readonly mode, "
6605                        "backup data and re-format the FS. *\n\n");
6606                 ret = 1;
6607         }
6608         printf("found %llu bytes used err is %d\n",
6609                (unsigned long long)bytes_used, ret);
6610         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
6611         printf("total tree bytes: %llu\n",
6612                (unsigned long long)total_btree_bytes);
6613         printf("total fs tree bytes: %llu\n",
6614                (unsigned long long)total_fs_tree_bytes);
6615         printf("total extent tree bytes: %llu\n",
6616                (unsigned long long)total_extent_tree_bytes);
6617         printf("btree space waste bytes: %llu\n",
6618                (unsigned long long)btree_space_waste);
6619         printf("file data blocks allocated: %llu\n referenced %llu\n",
6620                 (unsigned long long)data_bytes_allocated,
6621                 (unsigned long long)data_bytes_referenced);
6622         printf("%s\n", BTRFS_BUILD_VERSION);
6623
6624         free_root_recs_tree(&root_cache);
6625 close_out:
6626         close_ctree(root);
6627 err_out:
6628         return ret;
6629 }