Btrfs-progs: delete bogus dir indexes
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42 #include "rbtree-utils.h"
43
44 static u64 bytes_used = 0;
45 static u64 total_csum_bytes = 0;
46 static u64 total_btree_bytes = 0;
47 static u64 total_fs_tree_bytes = 0;
48 static u64 total_extent_tree_bytes = 0;
49 static u64 btree_space_waste = 0;
50 static u64 data_bytes_allocated = 0;
51 static u64 data_bytes_referenced = 0;
52 static int found_old_backref = 0;
53 static LIST_HEAD(duplicate_extents);
54 static LIST_HEAD(delete_items);
55 static int repair = 0;
56 static int no_holes = 0;
57 static int init_extent_tree = 0;
58 static int check_data_csum = 0;
59
60 struct extent_backref {
61         struct list_head list;
62         unsigned int is_data:1;
63         unsigned int found_extent_tree:1;
64         unsigned int full_backref:1;
65         unsigned int found_ref:1;
66         unsigned int broken:1;
67 };
68
69 struct data_backref {
70         struct extent_backref node;
71         union {
72                 u64 parent;
73                 u64 root;
74         };
75         u64 owner;
76         u64 offset;
77         u64 disk_bytenr;
78         u64 bytes;
79         u64 ram_bytes;
80         u32 num_refs;
81         u32 found_ref;
82 };
83
84 struct tree_backref {
85         struct extent_backref node;
86         union {
87                 u64 parent;
88                 u64 root;
89         };
90 };
91
92 struct extent_record {
93         struct list_head backrefs;
94         struct list_head dups;
95         struct list_head list;
96         struct cache_extent cache;
97         struct btrfs_disk_key parent_key;
98         u64 start;
99         u64 max_size;
100         u64 nr;
101         u64 refs;
102         u64 extent_item_refs;
103         u64 generation;
104         u64 parent_generation;
105         u64 info_objectid;
106         u32 num_duplicates;
107         u8 info_level;
108         unsigned int found_rec:1;
109         unsigned int content_checked:1;
110         unsigned int owner_ref_checked:1;
111         unsigned int is_root:1;
112         unsigned int metadata:1;
113 };
114
115 struct inode_backref {
116         struct list_head list;
117         unsigned int found_dir_item:1;
118         unsigned int found_dir_index:1;
119         unsigned int found_inode_ref:1;
120         unsigned int filetype:8;
121         int errors;
122         unsigned int ref_type;
123         u64 dir;
124         u64 index;
125         u16 namelen;
126         char name[0];
127 };
128
129 struct dropping_root_item_record {
130         struct list_head list;
131         struct btrfs_root_item ri;
132         struct btrfs_key found_key;
133 };
134
135 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
136 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
137 #define REF_ERR_NO_INODE_REF            (1 << 2)
138 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
139 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
140 #define REF_ERR_DUP_INODE_REF           (1 << 5)
141 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
142 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
143 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
144 #define REF_ERR_NO_ROOT_REF             (1 << 9)
145 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
146 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
147 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
148
149 struct inode_record {
150         struct list_head backrefs;
151         unsigned int checked:1;
152         unsigned int merging:1;
153         unsigned int found_inode_item:1;
154         unsigned int found_dir_item:1;
155         unsigned int found_file_extent:1;
156         unsigned int found_csum_item:1;
157         unsigned int some_csum_missing:1;
158         unsigned int nodatasum:1;
159         int errors;
160
161         u64 ino;
162         u32 nlink;
163         u32 imode;
164         u64 isize;
165         u64 nbytes;
166
167         u32 found_link;
168         u64 found_size;
169         u64 extent_start;
170         u64 extent_end;
171         u64 first_extent_gap;
172
173         u32 refs;
174 };
175
176 #define I_ERR_NO_INODE_ITEM             (1 << 0)
177 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
178 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
179 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
180 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
181 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
182 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
183 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
184 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
185 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
186 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
187 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
188 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
189 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
190
191 struct root_backref {
192         struct list_head list;
193         unsigned int found_dir_item:1;
194         unsigned int found_dir_index:1;
195         unsigned int found_back_ref:1;
196         unsigned int found_forward_ref:1;
197         unsigned int reachable:1;
198         int errors;
199         u64 ref_root;
200         u64 dir;
201         u64 index;
202         u16 namelen;
203         char name[0];
204 };
205
206 struct root_record {
207         struct list_head backrefs;
208         struct cache_extent cache;
209         unsigned int found_root_item:1;
210         u64 objectid;
211         u32 found_ref;
212 };
213
214 struct ptr_node {
215         struct cache_extent cache;
216         void *data;
217 };
218
219 struct shared_node {
220         struct cache_extent cache;
221         struct cache_tree root_cache;
222         struct cache_tree inode_cache;
223         struct inode_record *current;
224         u32 refs;
225 };
226
227 struct block_info {
228         u64 start;
229         u32 size;
230 };
231
232 struct walk_control {
233         struct cache_tree shared;
234         struct shared_node *nodes[BTRFS_MAX_LEVEL];
235         int active_node;
236         int root_level;
237 };
238
239 struct bad_item {
240         struct btrfs_key key;
241         u64 root_id;
242         struct list_head list;
243 };
244
245 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
246
247 static void record_root_in_trans(struct btrfs_trans_handle *trans,
248                                  struct btrfs_root *root)
249 {
250         if (root->last_trans != trans->transid) {
251                 root->track_dirty = 1;
252                 root->last_trans = trans->transid;
253                 root->commit_root = root->node;
254                 extent_buffer_get(root->node);
255         }
256 }
257
258 static u8 imode_to_type(u32 imode)
259 {
260 #define S_SHIFT 12
261         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
262                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
263                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
264                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
265                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
266                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
267                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
268                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
269         };
270
271         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
272 #undef S_SHIFT
273 }
274
275 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
276 {
277         struct device_record *rec1;
278         struct device_record *rec2;
279
280         rec1 = rb_entry(node1, struct device_record, node);
281         rec2 = rb_entry(node2, struct device_record, node);
282         if (rec1->devid > rec2->devid)
283                 return -1;
284         else if (rec1->devid < rec2->devid)
285                 return 1;
286         else
287                 return 0;
288 }
289
290 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
291 {
292         struct inode_record *rec;
293         struct inode_backref *backref;
294         struct inode_backref *orig;
295         size_t size;
296
297         rec = malloc(sizeof(*rec));
298         memcpy(rec, orig_rec, sizeof(*rec));
299         rec->refs = 1;
300         INIT_LIST_HEAD(&rec->backrefs);
301
302         list_for_each_entry(orig, &orig_rec->backrefs, list) {
303                 size = sizeof(*orig) + orig->namelen + 1;
304                 backref = malloc(size);
305                 memcpy(backref, orig, size);
306                 list_add_tail(&backref->list, &rec->backrefs);
307         }
308         return rec;
309 }
310
311 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
312 {
313         u64 root_objectid = root->root_key.objectid;
314         int errors = rec->errors;
315
316         if (!errors)
317                 return;
318         /* reloc root errors, we print its corresponding fs root objectid*/
319         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
320                 root_objectid = root->root_key.offset;
321                 fprintf(stderr, "reloc");
322         }
323         fprintf(stderr, "root %llu inode %llu errors %x",
324                 (unsigned long long) root_objectid,
325                 (unsigned long long) rec->ino, rec->errors);
326
327         if (errors & I_ERR_NO_INODE_ITEM)
328                 fprintf(stderr, ", no inode item");
329         if (errors & I_ERR_NO_ORPHAN_ITEM)
330                 fprintf(stderr, ", no orphan item");
331         if (errors & I_ERR_DUP_INODE_ITEM)
332                 fprintf(stderr, ", dup inode item");
333         if (errors & I_ERR_DUP_DIR_INDEX)
334                 fprintf(stderr, ", dup dir index");
335         if (errors & I_ERR_ODD_DIR_ITEM)
336                 fprintf(stderr, ", odd dir item");
337         if (errors & I_ERR_ODD_FILE_EXTENT)
338                 fprintf(stderr, ", odd file extent");
339         if (errors & I_ERR_BAD_FILE_EXTENT)
340                 fprintf(stderr, ", bad file extent");
341         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
342                 fprintf(stderr, ", file extent overlap");
343         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
344                 fprintf(stderr, ", file extent discount");
345         if (errors & I_ERR_DIR_ISIZE_WRONG)
346                 fprintf(stderr, ", dir isize wrong");
347         if (errors & I_ERR_FILE_NBYTES_WRONG)
348                 fprintf(stderr, ", nbytes wrong");
349         if (errors & I_ERR_ODD_CSUM_ITEM)
350                 fprintf(stderr, ", odd csum item");
351         if (errors & I_ERR_SOME_CSUM_MISSING)
352                 fprintf(stderr, ", some csum missing");
353         if (errors & I_ERR_LINK_COUNT_WRONG)
354                 fprintf(stderr, ", link count wrong");
355         fprintf(stderr, "\n");
356 }
357
358 static void print_ref_error(int errors)
359 {
360         if (errors & REF_ERR_NO_DIR_ITEM)
361                 fprintf(stderr, ", no dir item");
362         if (errors & REF_ERR_NO_DIR_INDEX)
363                 fprintf(stderr, ", no dir index");
364         if (errors & REF_ERR_NO_INODE_REF)
365                 fprintf(stderr, ", no inode ref");
366         if (errors & REF_ERR_DUP_DIR_ITEM)
367                 fprintf(stderr, ", dup dir item");
368         if (errors & REF_ERR_DUP_DIR_INDEX)
369                 fprintf(stderr, ", dup dir index");
370         if (errors & REF_ERR_DUP_INODE_REF)
371                 fprintf(stderr, ", dup inode ref");
372         if (errors & REF_ERR_INDEX_UNMATCH)
373                 fprintf(stderr, ", index unmatch");
374         if (errors & REF_ERR_FILETYPE_UNMATCH)
375                 fprintf(stderr, ", filetype unmatch");
376         if (errors & REF_ERR_NAME_TOO_LONG)
377                 fprintf(stderr, ", name too long");
378         if (errors & REF_ERR_NO_ROOT_REF)
379                 fprintf(stderr, ", no root ref");
380         if (errors & REF_ERR_NO_ROOT_BACKREF)
381                 fprintf(stderr, ", no root backref");
382         if (errors & REF_ERR_DUP_ROOT_REF)
383                 fprintf(stderr, ", dup root ref");
384         if (errors & REF_ERR_DUP_ROOT_BACKREF)
385                 fprintf(stderr, ", dup root backref");
386         fprintf(stderr, "\n");
387 }
388
389 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
390                                           u64 ino, int mod)
391 {
392         struct ptr_node *node;
393         struct cache_extent *cache;
394         struct inode_record *rec = NULL;
395         int ret;
396
397         cache = lookup_cache_extent(inode_cache, ino, 1);
398         if (cache) {
399                 node = container_of(cache, struct ptr_node, cache);
400                 rec = node->data;
401                 if (mod && rec->refs > 1) {
402                         node->data = clone_inode_rec(rec);
403                         rec->refs--;
404                         rec = node->data;
405                 }
406         } else if (mod) {
407                 rec = calloc(1, sizeof(*rec));
408                 rec->ino = ino;
409                 rec->extent_start = (u64)-1;
410                 rec->first_extent_gap = (u64)-1;
411                 rec->refs = 1;
412                 INIT_LIST_HEAD(&rec->backrefs);
413
414                 node = malloc(sizeof(*node));
415                 node->cache.start = ino;
416                 node->cache.size = 1;
417                 node->data = rec;
418
419                 if (ino == BTRFS_FREE_INO_OBJECTID)
420                         rec->found_link = 1;
421
422                 ret = insert_cache_extent(inode_cache, &node->cache);
423                 BUG_ON(ret);
424         }
425         return rec;
426 }
427
428 static void free_inode_rec(struct inode_record *rec)
429 {
430         struct inode_backref *backref;
431
432         if (--rec->refs > 0)
433                 return;
434
435         while (!list_empty(&rec->backrefs)) {
436                 backref = list_entry(rec->backrefs.next,
437                                      struct inode_backref, list);
438                 list_del(&backref->list);
439                 free(backref);
440         }
441         free(rec);
442 }
443
444 static int can_free_inode_rec(struct inode_record *rec)
445 {
446         if (!rec->errors && rec->checked && rec->found_inode_item &&
447             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
448                 return 1;
449         return 0;
450 }
451
452 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
453                                  struct inode_record *rec)
454 {
455         struct cache_extent *cache;
456         struct inode_backref *tmp, *backref;
457         struct ptr_node *node;
458         unsigned char filetype;
459
460         if (!rec->found_inode_item)
461                 return;
462
463         filetype = imode_to_type(rec->imode);
464         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
465                 if (backref->found_dir_item && backref->found_dir_index) {
466                         if (backref->filetype != filetype)
467                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
468                         if (!backref->errors && backref->found_inode_ref) {
469                                 list_del(&backref->list);
470                                 free(backref);
471                         }
472                 }
473         }
474
475         if (!rec->checked || rec->merging)
476                 return;
477
478         if (S_ISDIR(rec->imode)) {
479                 if (rec->found_size != rec->isize)
480                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
481                 if (rec->found_file_extent)
482                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
483         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
484                 if (rec->found_dir_item)
485                         rec->errors |= I_ERR_ODD_DIR_ITEM;
486                 if (rec->found_size != rec->nbytes)
487                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
488                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
489                         rec->first_extent_gap = 0;
490                 if (rec->nlink > 0 && !no_holes &&
491                     (rec->extent_end < rec->isize ||
492                      rec->first_extent_gap < rec->isize))
493                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
494         }
495
496         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
497                 if (rec->found_csum_item && rec->nodatasum)
498                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
499                 if (rec->some_csum_missing && !rec->nodatasum)
500                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
501         }
502
503         BUG_ON(rec->refs != 1);
504         if (can_free_inode_rec(rec)) {
505                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
506                 node = container_of(cache, struct ptr_node, cache);
507                 BUG_ON(node->data != rec);
508                 remove_cache_extent(inode_cache, &node->cache);
509                 free(node);
510                 free_inode_rec(rec);
511         }
512 }
513
514 static int check_orphan_item(struct btrfs_root *root, u64 ino)
515 {
516         struct btrfs_path path;
517         struct btrfs_key key;
518         int ret;
519
520         key.objectid = BTRFS_ORPHAN_OBJECTID;
521         key.type = BTRFS_ORPHAN_ITEM_KEY;
522         key.offset = ino;
523
524         btrfs_init_path(&path);
525         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
526         btrfs_release_path(&path);
527         if (ret > 0)
528                 ret = -ENOENT;
529         return ret;
530 }
531
532 static int process_inode_item(struct extent_buffer *eb,
533                               int slot, struct btrfs_key *key,
534                               struct shared_node *active_node)
535 {
536         struct inode_record *rec;
537         struct btrfs_inode_item *item;
538
539         rec = active_node->current;
540         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
541         if (rec->found_inode_item) {
542                 rec->errors |= I_ERR_DUP_INODE_ITEM;
543                 return 1;
544         }
545         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
546         rec->nlink = btrfs_inode_nlink(eb, item);
547         rec->isize = btrfs_inode_size(eb, item);
548         rec->nbytes = btrfs_inode_nbytes(eb, item);
549         rec->imode = btrfs_inode_mode(eb, item);
550         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
551                 rec->nodatasum = 1;
552         rec->found_inode_item = 1;
553         if (rec->nlink == 0)
554                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
555         maybe_free_inode_rec(&active_node->inode_cache, rec);
556         return 0;
557 }
558
559 static struct inode_backref *get_inode_backref(struct inode_record *rec,
560                                                 const char *name,
561                                                 int namelen, u64 dir)
562 {
563         struct inode_backref *backref;
564
565         list_for_each_entry(backref, &rec->backrefs, list) {
566                 if (backref->dir != dir || backref->namelen != namelen)
567                         continue;
568                 if (memcmp(name, backref->name, namelen))
569                         continue;
570                 return backref;
571         }
572
573         backref = malloc(sizeof(*backref) + namelen + 1);
574         memset(backref, 0, sizeof(*backref));
575         backref->dir = dir;
576         backref->namelen = namelen;
577         memcpy(backref->name, name, namelen);
578         backref->name[namelen] = '\0';
579         list_add_tail(&backref->list, &rec->backrefs);
580         return backref;
581 }
582
583 static int add_inode_backref(struct cache_tree *inode_cache,
584                              u64 ino, u64 dir, u64 index,
585                              const char *name, int namelen,
586                              int filetype, int itemtype, int errors)
587 {
588         struct inode_record *rec;
589         struct inode_backref *backref;
590
591         rec = get_inode_rec(inode_cache, ino, 1);
592         backref = get_inode_backref(rec, name, namelen, dir);
593         if (errors)
594                 backref->errors |= errors;
595         if (itemtype == BTRFS_DIR_INDEX_KEY) {
596                 if (backref->found_dir_index)
597                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
598                 if (backref->found_inode_ref && backref->index != index)
599                         backref->errors |= REF_ERR_INDEX_UNMATCH;
600                 if (backref->found_dir_item && backref->filetype != filetype)
601                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
602
603                 backref->index = index;
604                 backref->filetype = filetype;
605                 backref->found_dir_index = 1;
606         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
607                 rec->found_link++;
608                 if (backref->found_dir_item)
609                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
610                 if (backref->found_dir_index && backref->filetype != filetype)
611                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
612
613                 backref->filetype = filetype;
614                 backref->found_dir_item = 1;
615         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
616                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
617                 if (backref->found_inode_ref)
618                         backref->errors |= REF_ERR_DUP_INODE_REF;
619                 if (backref->found_dir_index && backref->index != index)
620                         backref->errors |= REF_ERR_INDEX_UNMATCH;
621
622                 backref->ref_type = itemtype;
623                 backref->index = index;
624                 backref->found_inode_ref = 1;
625         } else {
626                 BUG_ON(1);
627         }
628
629         maybe_free_inode_rec(inode_cache, rec);
630         return 0;
631 }
632
633 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
634                             struct cache_tree *dst_cache)
635 {
636         struct inode_backref *backref;
637         u32 dir_count = 0;
638
639         dst->merging = 1;
640         list_for_each_entry(backref, &src->backrefs, list) {
641                 if (backref->found_dir_index) {
642                         add_inode_backref(dst_cache, dst->ino, backref->dir,
643                                         backref->index, backref->name,
644                                         backref->namelen, backref->filetype,
645                                         BTRFS_DIR_INDEX_KEY, backref->errors);
646                 }
647                 if (backref->found_dir_item) {
648                         dir_count++;
649                         add_inode_backref(dst_cache, dst->ino,
650                                         backref->dir, 0, backref->name,
651                                         backref->namelen, backref->filetype,
652                                         BTRFS_DIR_ITEM_KEY, backref->errors);
653                 }
654                 if (backref->found_inode_ref) {
655                         add_inode_backref(dst_cache, dst->ino,
656                                         backref->dir, backref->index,
657                                         backref->name, backref->namelen, 0,
658                                         backref->ref_type, backref->errors);
659                 }
660         }
661
662         if (src->found_dir_item)
663                 dst->found_dir_item = 1;
664         if (src->found_file_extent)
665                 dst->found_file_extent = 1;
666         if (src->found_csum_item)
667                 dst->found_csum_item = 1;
668         if (src->some_csum_missing)
669                 dst->some_csum_missing = 1;
670         if (dst->first_extent_gap > src->first_extent_gap)
671                 dst->first_extent_gap = src->first_extent_gap;
672
673         BUG_ON(src->found_link < dir_count);
674         dst->found_link += src->found_link - dir_count;
675         dst->found_size += src->found_size;
676         if (src->extent_start != (u64)-1) {
677                 if (dst->extent_start == (u64)-1) {
678                         dst->extent_start = src->extent_start;
679                         dst->extent_end = src->extent_end;
680                 } else {
681                         if (dst->extent_end > src->extent_start)
682                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
683                         else if (dst->extent_end < src->extent_start &&
684                                  dst->extent_end < dst->first_extent_gap)
685                                 dst->first_extent_gap = dst->extent_end;
686                         if (dst->extent_end < src->extent_end)
687                                 dst->extent_end = src->extent_end;
688                 }
689         }
690
691         dst->errors |= src->errors;
692         if (src->found_inode_item) {
693                 if (!dst->found_inode_item) {
694                         dst->nlink = src->nlink;
695                         dst->isize = src->isize;
696                         dst->nbytes = src->nbytes;
697                         dst->imode = src->imode;
698                         dst->nodatasum = src->nodatasum;
699                         dst->found_inode_item = 1;
700                 } else {
701                         dst->errors |= I_ERR_DUP_INODE_ITEM;
702                 }
703         }
704         dst->merging = 0;
705
706         return 0;
707 }
708
709 static int splice_shared_node(struct shared_node *src_node,
710                               struct shared_node *dst_node)
711 {
712         struct cache_extent *cache;
713         struct ptr_node *node, *ins;
714         struct cache_tree *src, *dst;
715         struct inode_record *rec, *conflict;
716         u64 current_ino = 0;
717         int splice = 0;
718         int ret;
719
720         if (--src_node->refs == 0)
721                 splice = 1;
722         if (src_node->current)
723                 current_ino = src_node->current->ino;
724
725         src = &src_node->root_cache;
726         dst = &dst_node->root_cache;
727 again:
728         cache = search_cache_extent(src, 0);
729         while (cache) {
730                 node = container_of(cache, struct ptr_node, cache);
731                 rec = node->data;
732                 cache = next_cache_extent(cache);
733
734                 if (splice) {
735                         remove_cache_extent(src, &node->cache);
736                         ins = node;
737                 } else {
738                         ins = malloc(sizeof(*ins));
739                         ins->cache.start = node->cache.start;
740                         ins->cache.size = node->cache.size;
741                         ins->data = rec;
742                         rec->refs++;
743                 }
744                 ret = insert_cache_extent(dst, &ins->cache);
745                 if (ret == -EEXIST) {
746                         conflict = get_inode_rec(dst, rec->ino, 1);
747                         merge_inode_recs(rec, conflict, dst);
748                         if (rec->checked) {
749                                 conflict->checked = 1;
750                                 if (dst_node->current == conflict)
751                                         dst_node->current = NULL;
752                         }
753                         maybe_free_inode_rec(dst, conflict);
754                         free_inode_rec(rec);
755                         free(ins);
756                 } else {
757                         BUG_ON(ret);
758                 }
759         }
760
761         if (src == &src_node->root_cache) {
762                 src = &src_node->inode_cache;
763                 dst = &dst_node->inode_cache;
764                 goto again;
765         }
766
767         if (current_ino > 0 && (!dst_node->current ||
768             current_ino > dst_node->current->ino)) {
769                 if (dst_node->current) {
770                         dst_node->current->checked = 1;
771                         maybe_free_inode_rec(dst, dst_node->current);
772                 }
773                 dst_node->current = get_inode_rec(dst, current_ino, 1);
774         }
775         return 0;
776 }
777
778 static void free_inode_ptr(struct cache_extent *cache)
779 {
780         struct ptr_node *node;
781         struct inode_record *rec;
782
783         node = container_of(cache, struct ptr_node, cache);
784         rec = node->data;
785         free_inode_rec(rec);
786         free(node);
787 }
788
789 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
790
791 static struct shared_node *find_shared_node(struct cache_tree *shared,
792                                             u64 bytenr)
793 {
794         struct cache_extent *cache;
795         struct shared_node *node;
796
797         cache = lookup_cache_extent(shared, bytenr, 1);
798         if (cache) {
799                 node = container_of(cache, struct shared_node, cache);
800                 return node;
801         }
802         return NULL;
803 }
804
805 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
806 {
807         int ret;
808         struct shared_node *node;
809
810         node = calloc(1, sizeof(*node));
811         node->cache.start = bytenr;
812         node->cache.size = 1;
813         cache_tree_init(&node->root_cache);
814         cache_tree_init(&node->inode_cache);
815         node->refs = refs;
816
817         ret = insert_cache_extent(shared, &node->cache);
818         BUG_ON(ret);
819         return 0;
820 }
821
822 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
823                              struct walk_control *wc, int level)
824 {
825         struct shared_node *node;
826         struct shared_node *dest;
827
828         if (level == wc->active_node)
829                 return 0;
830
831         BUG_ON(wc->active_node <= level);
832         node = find_shared_node(&wc->shared, bytenr);
833         if (!node) {
834                 add_shared_node(&wc->shared, bytenr, refs);
835                 node = find_shared_node(&wc->shared, bytenr);
836                 wc->nodes[level] = node;
837                 wc->active_node = level;
838                 return 0;
839         }
840
841         if (wc->root_level == wc->active_node &&
842             btrfs_root_refs(&root->root_item) == 0) {
843                 if (--node->refs == 0) {
844                         free_inode_recs_tree(&node->root_cache);
845                         free_inode_recs_tree(&node->inode_cache);
846                         remove_cache_extent(&wc->shared, &node->cache);
847                         free(node);
848                 }
849                 return 1;
850         }
851
852         dest = wc->nodes[wc->active_node];
853         splice_shared_node(node, dest);
854         if (node->refs == 0) {
855                 remove_cache_extent(&wc->shared, &node->cache);
856                 free(node);
857         }
858         return 1;
859 }
860
861 static int leave_shared_node(struct btrfs_root *root,
862                              struct walk_control *wc, int level)
863 {
864         struct shared_node *node;
865         struct shared_node *dest;
866         int i;
867
868         if (level == wc->root_level)
869                 return 0;
870
871         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
872                 if (wc->nodes[i])
873                         break;
874         }
875         BUG_ON(i >= BTRFS_MAX_LEVEL);
876
877         node = wc->nodes[wc->active_node];
878         wc->nodes[wc->active_node] = NULL;
879         wc->active_node = i;
880
881         dest = wc->nodes[wc->active_node];
882         if (wc->active_node < wc->root_level ||
883             btrfs_root_refs(&root->root_item) > 0) {
884                 BUG_ON(node->refs <= 1);
885                 splice_shared_node(node, dest);
886         } else {
887                 BUG_ON(node->refs < 2);
888                 node->refs--;
889         }
890         return 0;
891 }
892
893 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
894                          u64 child_root_id)
895 {
896         struct btrfs_path path;
897         struct btrfs_key key;
898         struct extent_buffer *leaf;
899         int has_parent = 0;
900         int ret;
901
902         btrfs_init_path(&path);
903
904         key.objectid = parent_root_id;
905         key.type = BTRFS_ROOT_REF_KEY;
906         key.offset = child_root_id;
907         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
908                                 0, 0);
909         if (ret < 0)
910                 return ret;
911         btrfs_release_path(&path);
912         if (!ret)
913                 return 1;
914
915         key.objectid = child_root_id;
916         key.type = BTRFS_ROOT_BACKREF_KEY;
917         key.offset = 0;
918         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
919                                 0, 0);
920         if (ret < 0)
921                 goto out;
922
923         while (1) {
924                 leaf = path.nodes[0];
925                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
926                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
927                         if (ret)
928                                 break;
929                         leaf = path.nodes[0];
930                 }
931
932                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
933                 if (key.objectid != child_root_id ||
934                     key.type != BTRFS_ROOT_BACKREF_KEY)
935                         break;
936
937                 has_parent = 1;
938
939                 if (key.offset == parent_root_id) {
940                         btrfs_release_path(&path);
941                         return 1;
942                 }
943
944                 path.slots[0]++;
945         }
946 out:
947         btrfs_release_path(&path);
948         if (ret < 0)
949                 return ret;
950         return has_parent? 0 : -1;
951 }
952
953 static int process_dir_item(struct btrfs_root *root,
954                             struct extent_buffer *eb,
955                             int slot, struct btrfs_key *key,
956                             struct shared_node *active_node)
957 {
958         u32 total;
959         u32 cur = 0;
960         u32 len;
961         u32 name_len;
962         u32 data_len;
963         int error;
964         int nritems = 0;
965         int filetype;
966         struct btrfs_dir_item *di;
967         struct inode_record *rec;
968         struct cache_tree *root_cache;
969         struct cache_tree *inode_cache;
970         struct btrfs_key location;
971         char namebuf[BTRFS_NAME_LEN];
972
973         root_cache = &active_node->root_cache;
974         inode_cache = &active_node->inode_cache;
975         rec = active_node->current;
976         rec->found_dir_item = 1;
977
978         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
979         total = btrfs_item_size_nr(eb, slot);
980         while (cur < total) {
981                 nritems++;
982                 btrfs_dir_item_key_to_cpu(eb, di, &location);
983                 name_len = btrfs_dir_name_len(eb, di);
984                 data_len = btrfs_dir_data_len(eb, di);
985                 filetype = btrfs_dir_type(eb, di);
986
987                 rec->found_size += name_len;
988                 if (name_len <= BTRFS_NAME_LEN) {
989                         len = name_len;
990                         error = 0;
991                 } else {
992                         len = BTRFS_NAME_LEN;
993                         error = REF_ERR_NAME_TOO_LONG;
994                 }
995                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
996
997                 if (location.type == BTRFS_INODE_ITEM_KEY) {
998                         add_inode_backref(inode_cache, location.objectid,
999                                           key->objectid, key->offset, namebuf,
1000                                           len, filetype, key->type, error);
1001                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1002                         add_inode_backref(root_cache, location.objectid,
1003                                           key->objectid, key->offset,
1004                                           namebuf, len, filetype,
1005                                           key->type, error);
1006                 } else {
1007                         fprintf(stderr, "warning line %d\n", __LINE__);
1008                 }
1009
1010                 len = sizeof(*di) + name_len + data_len;
1011                 di = (struct btrfs_dir_item *)((char *)di + len);
1012                 cur += len;
1013         }
1014         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1015                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1016
1017         return 0;
1018 }
1019
1020 static int process_inode_ref(struct extent_buffer *eb,
1021                              int slot, struct btrfs_key *key,
1022                              struct shared_node *active_node)
1023 {
1024         u32 total;
1025         u32 cur = 0;
1026         u32 len;
1027         u32 name_len;
1028         u64 index;
1029         int error;
1030         struct cache_tree *inode_cache;
1031         struct btrfs_inode_ref *ref;
1032         char namebuf[BTRFS_NAME_LEN];
1033
1034         inode_cache = &active_node->inode_cache;
1035
1036         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1037         total = btrfs_item_size_nr(eb, slot);
1038         while (cur < total) {
1039                 name_len = btrfs_inode_ref_name_len(eb, ref);
1040                 index = btrfs_inode_ref_index(eb, ref);
1041                 if (name_len <= BTRFS_NAME_LEN) {
1042                         len = name_len;
1043                         error = 0;
1044                 } else {
1045                         len = BTRFS_NAME_LEN;
1046                         error = REF_ERR_NAME_TOO_LONG;
1047                 }
1048                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1049                 add_inode_backref(inode_cache, key->objectid, key->offset,
1050                                   index, namebuf, len, 0, key->type, error);
1051
1052                 len = sizeof(*ref) + name_len;
1053                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1054                 cur += len;
1055         }
1056         return 0;
1057 }
1058
1059 static int process_inode_extref(struct extent_buffer *eb,
1060                                 int slot, struct btrfs_key *key,
1061                                 struct shared_node *active_node)
1062 {
1063         u32 total;
1064         u32 cur = 0;
1065         u32 len;
1066         u32 name_len;
1067         u64 index;
1068         u64 parent;
1069         int error;
1070         struct cache_tree *inode_cache;
1071         struct btrfs_inode_extref *extref;
1072         char namebuf[BTRFS_NAME_LEN];
1073
1074         inode_cache = &active_node->inode_cache;
1075
1076         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1077         total = btrfs_item_size_nr(eb, slot);
1078         while (cur < total) {
1079                 name_len = btrfs_inode_extref_name_len(eb, extref);
1080                 index = btrfs_inode_extref_index(eb, extref);
1081                 parent = btrfs_inode_extref_parent(eb, extref);
1082                 if (name_len <= BTRFS_NAME_LEN) {
1083                         len = name_len;
1084                         error = 0;
1085                 } else {
1086                         len = BTRFS_NAME_LEN;
1087                         error = REF_ERR_NAME_TOO_LONG;
1088                 }
1089                 read_extent_buffer(eb, namebuf,
1090                                    (unsigned long)(extref + 1), len);
1091                 add_inode_backref(inode_cache, key->objectid, parent,
1092                                   index, namebuf, len, 0, key->type, error);
1093
1094                 len = sizeof(*extref) + name_len;
1095                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1096                 cur += len;
1097         }
1098         return 0;
1099
1100 }
1101
1102 static int count_csum_range(struct btrfs_root *root, u64 start,
1103                             u64 len, u64 *found)
1104 {
1105         struct btrfs_key key;
1106         struct btrfs_path path;
1107         struct extent_buffer *leaf;
1108         int ret;
1109         size_t size;
1110         *found = 0;
1111         u64 csum_end;
1112         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1113
1114         btrfs_init_path(&path);
1115
1116         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1117         key.offset = start;
1118         key.type = BTRFS_EXTENT_CSUM_KEY;
1119
1120         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1121                                 &key, &path, 0, 0);
1122         if (ret < 0)
1123                 goto out;
1124         if (ret > 0 && path.slots[0] > 0) {
1125                 leaf = path.nodes[0];
1126                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1127                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1128                     key.type == BTRFS_EXTENT_CSUM_KEY)
1129                         path.slots[0]--;
1130         }
1131
1132         while (len > 0) {
1133                 leaf = path.nodes[0];
1134                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1135                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1136                         if (ret > 0)
1137                                 break;
1138                         else if (ret < 0)
1139                                 goto out;
1140                         leaf = path.nodes[0];
1141                 }
1142
1143                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1144                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1145                     key.type != BTRFS_EXTENT_CSUM_KEY)
1146                         break;
1147
1148                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1149                 if (key.offset >= start + len)
1150                         break;
1151
1152                 if (key.offset > start)
1153                         start = key.offset;
1154
1155                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1156                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1157                 if (csum_end > start) {
1158                         size = min(csum_end - start, len);
1159                         len -= size;
1160                         start += size;
1161                         *found += size;
1162                 }
1163
1164                 path.slots[0]++;
1165         }
1166 out:
1167         if (ret < 0)
1168                 return ret;
1169         btrfs_release_path(&path);
1170         return 0;
1171 }
1172
1173 static int process_file_extent(struct btrfs_root *root,
1174                                 struct extent_buffer *eb,
1175                                 int slot, struct btrfs_key *key,
1176                                 struct shared_node *active_node)
1177 {
1178         struct inode_record *rec;
1179         struct btrfs_file_extent_item *fi;
1180         u64 num_bytes = 0;
1181         u64 disk_bytenr = 0;
1182         u64 extent_offset = 0;
1183         u64 mask = root->sectorsize - 1;
1184         int extent_type;
1185         int ret;
1186
1187         rec = active_node->current;
1188         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1189         rec->found_file_extent = 1;
1190
1191         if (rec->extent_start == (u64)-1) {
1192                 rec->extent_start = key->offset;
1193                 rec->extent_end = key->offset;
1194         }
1195
1196         if (rec->extent_end > key->offset)
1197                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1198         else if (rec->extent_end < key->offset &&
1199                  rec->extent_end < rec->first_extent_gap)
1200                 rec->first_extent_gap = rec->extent_end;
1201
1202         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1203         extent_type = btrfs_file_extent_type(eb, fi);
1204
1205         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1206                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1207                 if (num_bytes == 0)
1208                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1209                 rec->found_size += num_bytes;
1210                 num_bytes = (num_bytes + mask) & ~mask;
1211         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1212                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1213                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1214                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1215                 extent_offset = btrfs_file_extent_offset(eb, fi);
1216                 if (num_bytes == 0 || (num_bytes & mask))
1217                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1218                 if (num_bytes + extent_offset >
1219                     btrfs_file_extent_ram_bytes(eb, fi))
1220                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1221                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1222                     (btrfs_file_extent_compression(eb, fi) ||
1223                      btrfs_file_extent_encryption(eb, fi) ||
1224                      btrfs_file_extent_other_encoding(eb, fi)))
1225                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1226                 if (disk_bytenr > 0)
1227                         rec->found_size += num_bytes;
1228         } else {
1229                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1230         }
1231         rec->extent_end = key->offset + num_bytes;
1232
1233         if (disk_bytenr > 0) {
1234                 u64 found;
1235                 if (btrfs_file_extent_compression(eb, fi))
1236                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1237                 else
1238                         disk_bytenr += extent_offset;
1239
1240                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1241                 if (ret < 0)
1242                         return ret;
1243                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1244                         if (found > 0)
1245                                 rec->found_csum_item = 1;
1246                         if (found < num_bytes)
1247                                 rec->some_csum_missing = 1;
1248                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1249                         if (found > 0)
1250                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1251                 }
1252         }
1253         return 0;
1254 }
1255
1256 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1257                             struct walk_control *wc)
1258 {
1259         struct btrfs_key key;
1260         u32 nritems;
1261         int i;
1262         int ret = 0;
1263         struct cache_tree *inode_cache;
1264         struct shared_node *active_node;
1265
1266         if (wc->root_level == wc->active_node &&
1267             btrfs_root_refs(&root->root_item) == 0)
1268                 return 0;
1269
1270         active_node = wc->nodes[wc->active_node];
1271         inode_cache = &active_node->inode_cache;
1272         nritems = btrfs_header_nritems(eb);
1273         for (i = 0; i < nritems; i++) {
1274                 btrfs_item_key_to_cpu(eb, &key, i);
1275
1276                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1277                         continue;
1278                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1279                         continue;
1280
1281                 if (active_node->current == NULL ||
1282                     active_node->current->ino < key.objectid) {
1283                         if (active_node->current) {
1284                                 active_node->current->checked = 1;
1285                                 maybe_free_inode_rec(inode_cache,
1286                                                      active_node->current);
1287                         }
1288                         active_node->current = get_inode_rec(inode_cache,
1289                                                              key.objectid, 1);
1290                 }
1291                 switch (key.type) {
1292                 case BTRFS_DIR_ITEM_KEY:
1293                 case BTRFS_DIR_INDEX_KEY:
1294                         ret = process_dir_item(root, eb, i, &key, active_node);
1295                         break;
1296                 case BTRFS_INODE_REF_KEY:
1297                         ret = process_inode_ref(eb, i, &key, active_node);
1298                         break;
1299                 case BTRFS_INODE_EXTREF_KEY:
1300                         ret = process_inode_extref(eb, i, &key, active_node);
1301                         break;
1302                 case BTRFS_INODE_ITEM_KEY:
1303                         ret = process_inode_item(eb, i, &key, active_node);
1304                         break;
1305                 case BTRFS_EXTENT_DATA_KEY:
1306                         ret = process_file_extent(root, eb, i, &key,
1307                                                   active_node);
1308                         break;
1309                 default:
1310                         break;
1311                 };
1312         }
1313         return ret;
1314 }
1315
1316 static void reada_walk_down(struct btrfs_root *root,
1317                             struct extent_buffer *node, int slot)
1318 {
1319         u64 bytenr;
1320         u64 ptr_gen;
1321         u32 nritems;
1322         u32 blocksize;
1323         int i;
1324         int level;
1325
1326         level = btrfs_header_level(node);
1327         if (level != 1)
1328                 return;
1329
1330         nritems = btrfs_header_nritems(node);
1331         blocksize = btrfs_level_size(root, level - 1);
1332         for (i = slot; i < nritems; i++) {
1333                 bytenr = btrfs_node_blockptr(node, i);
1334                 ptr_gen = btrfs_node_ptr_generation(node, i);
1335                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1336         }
1337 }
1338
1339 /*
1340  * Check the child node/leaf by the following condition:
1341  * 1. the first item key of the node/leaf should be the same with the one
1342  *    in parent.
1343  * 2. block in parent node should match the child node/leaf.
1344  * 3. generation of parent node and child's header should be consistent.
1345  *
1346  * Or the child node/leaf pointed by the key in parent is not valid.
1347  *
1348  * We hope to check leaf owner too, but since subvol may share leaves,
1349  * which makes leaf owner check not so strong, key check should be
1350  * sufficient enough for that case.
1351  */
1352 static int check_child_node(struct btrfs_root *root,
1353                             struct extent_buffer *parent, int slot,
1354                             struct extent_buffer *child)
1355 {
1356         struct btrfs_key parent_key;
1357         struct btrfs_key child_key;
1358         int ret = 0;
1359
1360         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1361         if (btrfs_header_level(child) == 0)
1362                 btrfs_item_key_to_cpu(child, &child_key, 0);
1363         else
1364                 btrfs_node_key_to_cpu(child, &child_key, 0);
1365
1366         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1367                 ret = -EINVAL;
1368                 fprintf(stderr,
1369                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1370                         parent_key.objectid, parent_key.type, parent_key.offset,
1371                         child_key.objectid, child_key.type, child_key.offset);
1372         }
1373         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1374                 ret = -EINVAL;
1375                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1376                         btrfs_node_blockptr(parent, slot),
1377                         btrfs_header_bytenr(child));
1378         }
1379         if (btrfs_node_ptr_generation(parent, slot) !=
1380             btrfs_header_generation(child)) {
1381                 ret = -EINVAL;
1382                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1383                         btrfs_header_generation(child),
1384                         btrfs_node_ptr_generation(parent, slot));
1385         }
1386         return ret;
1387 }
1388
1389 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1390                           struct walk_control *wc, int *level)
1391 {
1392         u64 bytenr;
1393         u64 ptr_gen;
1394         struct extent_buffer *next;
1395         struct extent_buffer *cur;
1396         u32 blocksize;
1397         int ret, err = 0;
1398         u64 refs;
1399
1400         WARN_ON(*level < 0);
1401         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1402         ret = btrfs_lookup_extent_info(NULL, root,
1403                                        path->nodes[*level]->start,
1404                                        *level, 1, &refs, NULL);
1405         if (ret < 0) {
1406                 err = ret;
1407                 goto out;
1408         }
1409
1410         if (refs > 1) {
1411                 ret = enter_shared_node(root, path->nodes[*level]->start,
1412                                         refs, wc, *level);
1413                 if (ret > 0) {
1414                         err = ret;
1415                         goto out;
1416                 }
1417         }
1418
1419         while (*level >= 0) {
1420                 WARN_ON(*level < 0);
1421                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1422                 cur = path->nodes[*level];
1423
1424                 if (btrfs_header_level(cur) != *level)
1425                         WARN_ON(1);
1426
1427                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1428                         break;
1429                 if (*level == 0) {
1430                         ret = process_one_leaf(root, cur, wc);
1431                         if (ret < 0)
1432                                 err = ret;
1433                         break;
1434                 }
1435                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1436                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1437                 blocksize = btrfs_level_size(root, *level - 1);
1438                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1439                                                1, &refs, NULL);
1440                 if (ret < 0)
1441                         refs = 0;
1442
1443                 if (refs > 1) {
1444                         ret = enter_shared_node(root, bytenr, refs,
1445                                                 wc, *level - 1);
1446                         if (ret > 0) {
1447                                 path->slots[*level]++;
1448                                 continue;
1449                         }
1450                 }
1451
1452                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1453                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1454                         free_extent_buffer(next);
1455                         reada_walk_down(root, cur, path->slots[*level]);
1456                         next = read_tree_block(root, bytenr, blocksize,
1457                                                ptr_gen);
1458                         if (!next) {
1459                                 err = -EIO;
1460                                 goto out;
1461                         }
1462                 }
1463
1464                 ret = check_child_node(root, cur, path->slots[*level], next);
1465                 if (ret) {
1466                         err = ret;
1467                         goto out;
1468                 }
1469                 *level = *level - 1;
1470                 free_extent_buffer(path->nodes[*level]);
1471                 path->nodes[*level] = next;
1472                 path->slots[*level] = 0;
1473         }
1474 out:
1475         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1476         return err;
1477 }
1478
1479 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1480                         struct walk_control *wc, int *level)
1481 {
1482         int i;
1483         struct extent_buffer *leaf;
1484
1485         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1486                 leaf = path->nodes[i];
1487                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1488                         path->slots[i]++;
1489                         *level = i;
1490                         return 0;
1491                 } else {
1492                         free_extent_buffer(path->nodes[*level]);
1493                         path->nodes[*level] = NULL;
1494                         BUG_ON(*level > wc->active_node);
1495                         if (*level == wc->active_node)
1496                                 leave_shared_node(root, wc, *level);
1497                         *level = i + 1;
1498                 }
1499         }
1500         return 1;
1501 }
1502
1503 static int check_root_dir(struct inode_record *rec)
1504 {
1505         struct inode_backref *backref;
1506         int ret = -1;
1507
1508         if (!rec->found_inode_item || rec->errors)
1509                 goto out;
1510         if (rec->nlink != 1 || rec->found_link != 0)
1511                 goto out;
1512         if (list_empty(&rec->backrefs))
1513                 goto out;
1514         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1515         if (!backref->found_inode_ref)
1516                 goto out;
1517         if (backref->index != 0 || backref->namelen != 2 ||
1518             memcmp(backref->name, "..", 2))
1519                 goto out;
1520         if (backref->found_dir_index || backref->found_dir_item)
1521                 goto out;
1522         ret = 0;
1523 out:
1524         return ret;
1525 }
1526
1527 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1528                               struct btrfs_root *root, struct btrfs_path *path,
1529                               struct inode_record *rec)
1530 {
1531         struct btrfs_inode_item *ei;
1532         struct btrfs_key key;
1533         int ret;
1534
1535         key.objectid = rec->ino;
1536         key.type = BTRFS_INODE_ITEM_KEY;
1537         key.offset = (u64)-1;
1538
1539         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1540         if (ret < 0)
1541                 goto out;
1542         if (ret) {
1543                 if (!path->slots[0]) {
1544                         ret = -ENOENT;
1545                         goto out;
1546                 }
1547                 path->slots[0]--;
1548                 ret = 0;
1549         }
1550         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1551         if (key.objectid != rec->ino) {
1552                 ret = -ENOENT;
1553                 goto out;
1554         }
1555
1556         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1557                             struct btrfs_inode_item);
1558         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1559         btrfs_mark_buffer_dirty(path->nodes[0]);
1560         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1561         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1562                root->root_key.objectid);
1563 out:
1564         btrfs_release_path(path);
1565         return ret;
1566 }
1567
1568 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1569                                     struct btrfs_root *root,
1570                                     struct btrfs_path *path,
1571                                     struct inode_record *rec)
1572 {
1573         struct btrfs_key key;
1574         int ret;
1575
1576         key.objectid = BTRFS_ORPHAN_OBJECTID;
1577         key.type = BTRFS_ORPHAN_ITEM_KEY;
1578         key.offset = rec->ino;
1579
1580         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1581         btrfs_release_path(path);
1582         if (!ret)
1583                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1584         return ret;
1585 }
1586
1587 static int add_missing_dir_index(struct btrfs_root *root,
1588                                  struct cache_tree *inode_cache,
1589                                  struct inode_record *rec,
1590                                  struct inode_backref *backref)
1591 {
1592         struct btrfs_path *path;
1593         struct btrfs_trans_handle *trans;
1594         struct btrfs_dir_item *dir_item;
1595         struct extent_buffer *leaf;
1596         struct btrfs_key key;
1597         struct btrfs_disk_key disk_key;
1598         struct inode_record *dir_rec;
1599         unsigned long name_ptr;
1600         u32 data_size = sizeof(*dir_item) + backref->namelen;
1601         int ret;
1602
1603         path = btrfs_alloc_path();
1604         if (!path)
1605                 return -ENOMEM;
1606
1607         trans = btrfs_start_transaction(root, 1);
1608         if (IS_ERR(trans)) {
1609                 btrfs_free_path(path);
1610                 return PTR_ERR(trans);
1611         }
1612
1613         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1614                 (unsigned long long)rec->ino);
1615         key.objectid = backref->dir;
1616         key.type = BTRFS_DIR_INDEX_KEY;
1617         key.offset = backref->index;
1618
1619         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
1620         BUG_ON(ret);
1621
1622         leaf = path->nodes[0];
1623         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
1624
1625         disk_key.objectid = cpu_to_le64(rec->ino);
1626         disk_key.type = BTRFS_INODE_ITEM_KEY;
1627         disk_key.offset = 0;
1628
1629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1631         btrfs_set_dir_data_len(leaf, dir_item, 0);
1632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1633         name_ptr = (unsigned long)(dir_item + 1);
1634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1635         btrfs_mark_buffer_dirty(leaf);
1636         btrfs_free_path(path);
1637         btrfs_commit_transaction(trans, root);
1638
1639         backref->found_dir_index = 1;
1640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1641         if (!dir_rec)
1642                 return 0;
1643         dir_rec->found_size += backref->namelen;
1644         if (dir_rec->found_size == dir_rec->isize &&
1645             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1646                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1647         if (dir_rec->found_size != dir_rec->isize)
1648                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1649
1650         return 0;
1651 }
1652
1653 static int delete_dir_index(struct btrfs_root *root,
1654                             struct cache_tree *inode_cache,
1655                             struct inode_record *rec,
1656                             struct inode_backref *backref)
1657 {
1658         struct btrfs_trans_handle *trans;
1659         struct btrfs_dir_item *di;
1660         struct btrfs_path *path;
1661         int ret = 0;
1662
1663         path = btrfs_alloc_path();
1664         if (!path)
1665                 return -ENOMEM;
1666
1667         trans = btrfs_start_transaction(root, 1);
1668         if (IS_ERR(trans)) {
1669                 btrfs_free_path(path);
1670                 return PTR_ERR(trans);
1671         }
1672
1673
1674         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1675                 (unsigned long long)backref->dir,
1676                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1677                 (unsigned long long)root->objectid);
1678
1679         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
1680                                     backref->name, backref->namelen,
1681                                     backref->index, -1);
1682         if (IS_ERR(di)) {
1683                 ret = PTR_ERR(di);
1684                 btrfs_free_path(path);
1685                 btrfs_commit_transaction(trans, root);
1686                 if (ret == -ENOENT)
1687                         return 0;
1688                 return ret;
1689         }
1690
1691         if (!di)
1692                 ret = btrfs_del_item(trans, root, path);
1693         else
1694                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
1695         BUG_ON(ret);
1696         btrfs_free_path(path);
1697         btrfs_commit_transaction(trans, root);
1698         return ret;
1699 }
1700
1701 static int repair_inode_backrefs(struct btrfs_root *root,
1702                                  struct inode_record *rec,
1703                                  struct cache_tree *inode_cache,
1704                                  int delete)
1705 {
1706         struct inode_backref *tmp, *backref;
1707         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1708         int ret = 0;
1709         int repaired = 0;
1710
1711         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1712                 /* Index 0 for root dir's are special, don't mess with it */
1713                 if (rec->ino == root_dirid && backref->index == 0)
1714                         continue;
1715
1716                 if (delete && backref->found_dir_index &&
1717                     !backref->found_inode_ref) {
1718                         ret = delete_dir_index(root, inode_cache, rec, backref);
1719                         if (ret)
1720                                 break;
1721                         repaired++;
1722                         list_del(&backref->list);
1723                         free(backref);
1724                 }
1725
1726                 if (!delete && !backref->found_dir_index &&
1727                     backref->found_dir_item && backref->found_inode_ref) {
1728                         ret = add_missing_dir_index(root, inode_cache, rec,
1729                                                     backref);
1730                         if (ret)
1731                                 break;
1732                         repaired++;
1733                         if (backref->found_dir_item &&
1734                             backref->found_dir_index &&
1735                             backref->found_dir_index) {
1736                                 if (!backref->errors &&
1737                                     backref->found_inode_ref) {
1738                                         list_del(&backref->list);
1739                                         free(backref);
1740                                 }
1741                         }
1742                 }
1743
1744         }
1745         return ret ? ret : repaired;
1746 }
1747
1748 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1749 {
1750         struct btrfs_trans_handle *trans;
1751         struct btrfs_path *path;
1752         int ret = 0;
1753
1754         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1755                 return rec->errors;
1756
1757         path = btrfs_alloc_path();
1758         if (!path)
1759                 return -ENOMEM;
1760
1761         trans = btrfs_start_transaction(root, 1);
1762         if (IS_ERR(trans)) {
1763                 btrfs_free_path(path);
1764                 return PTR_ERR(trans);
1765         }
1766
1767         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1768                 ret = repair_inode_isize(trans, root, path, rec);
1769         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1770                 ret = repair_inode_orphan_item(trans, root, path, rec);
1771         btrfs_commit_transaction(trans, root);
1772         btrfs_free_path(path);
1773         return ret;
1774 }
1775
1776 static int check_inode_recs(struct btrfs_root *root,
1777                             struct cache_tree *inode_cache)
1778 {
1779         struct cache_extent *cache;
1780         struct ptr_node *node;
1781         struct inode_record *rec;
1782         struct inode_backref *backref;
1783         int stage = 0;
1784         int ret;
1785         int err = 0;
1786         u64 error = 0;
1787         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1788
1789         if (btrfs_root_refs(&root->root_item) == 0) {
1790                 if (!cache_tree_empty(inode_cache))
1791                         fprintf(stderr, "warning line %d\n", __LINE__);
1792                 return 0;
1793         }
1794
1795         /*
1796          * We need to repair backrefs first because we could change some of the
1797          * errors in the inode recs.
1798          *
1799          * We also need to go through and delete invalid backrefs first and then
1800          * add the correct ones second.  We do this because we may get EEXIST
1801          * when adding back the correct index because we hadn't yet deleted the
1802          * invalid index.
1803          *
1804          * For example, if we were missing a dir index then the directories
1805          * isize would be wrong, so if we fixed the isize to what we thought it
1806          * would be and then fixed the backref we'd still have a invalid fs, so
1807          * we need to add back the dir index and then check to see if the isize
1808          * is still wrong.
1809          */
1810         while (stage < 3) {
1811                 stage++;
1812                 if (stage == 3 && !err)
1813                         break;
1814
1815                 cache = search_cache_extent(inode_cache, 0);
1816                 while (repair && cache) {
1817                         node = container_of(cache, struct ptr_node, cache);
1818                         rec = node->data;
1819                         cache = next_cache_extent(cache);
1820
1821                         /* Need to free everything up and rescan */
1822                         if (stage == 3) {
1823                                 remove_cache_extent(inode_cache, &node->cache);
1824                                 free(node);
1825                                 free_inode_rec(rec);
1826                                 continue;
1827                         }
1828
1829                         if (list_empty(&rec->backrefs))
1830                                 continue;
1831
1832                         ret = repair_inode_backrefs(root, rec, inode_cache,
1833                                                     stage == 1);
1834                         if (ret < 0) {
1835                                 err = ret;
1836                                 stage = 2;
1837                                 break;
1838                         } if (ret > 0) {
1839                                 err = -EAGAIN;
1840                         }
1841                 }
1842         }
1843         if (err)
1844                 return err;
1845
1846         rec = get_inode_rec(inode_cache, root_dirid, 0);
1847         if (rec) {
1848                 ret = check_root_dir(rec);
1849                 if (ret) {
1850                         fprintf(stderr, "root %llu root dir %llu error\n",
1851                                 (unsigned long long)root->root_key.objectid,
1852                                 (unsigned long long)root_dirid);
1853                         error++;
1854                 }
1855         } else {
1856                 fprintf(stderr, "root %llu root dir %llu not found\n",
1857                         (unsigned long long)root->root_key.objectid,
1858                         (unsigned long long)root_dirid);
1859         }
1860
1861         while (1) {
1862                 cache = search_cache_extent(inode_cache, 0);
1863                 if (!cache)
1864                         break;
1865                 node = container_of(cache, struct ptr_node, cache);
1866                 rec = node->data;
1867                 remove_cache_extent(inode_cache, &node->cache);
1868                 free(node);
1869                 if (rec->ino == root_dirid ||
1870                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1871                         free_inode_rec(rec);
1872                         continue;
1873                 }
1874
1875                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1876                         ret = check_orphan_item(root, rec->ino);
1877                         if (ret == 0)
1878                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1879                         if (can_free_inode_rec(rec)) {
1880                                 free_inode_rec(rec);
1881                                 continue;
1882                         }
1883                 }
1884
1885                 if (repair) {
1886                         ret = try_repair_inode(root, rec);
1887                         if (ret == 0 && can_free_inode_rec(rec)) {
1888                                 free_inode_rec(rec);
1889                                 continue;
1890                         }
1891                         ret = 0;
1892                 }
1893
1894                 error++;
1895                 if (!rec->found_inode_item)
1896                         rec->errors |= I_ERR_NO_INODE_ITEM;
1897                 if (rec->found_link != rec->nlink)
1898                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1899                 print_inode_error(root, rec);
1900                 list_for_each_entry(backref, &rec->backrefs, list) {
1901                         if (!backref->found_dir_item)
1902                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1903                         if (!backref->found_dir_index)
1904                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1905                         if (!backref->found_inode_ref)
1906                                 backref->errors |= REF_ERR_NO_INODE_REF;
1907                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1908                                 " namelen %u name %s filetype %d errors %x",
1909                                 (unsigned long long)backref->dir,
1910                                 (unsigned long long)backref->index,
1911                                 backref->namelen, backref->name,
1912                                 backref->filetype, backref->errors);
1913                         print_ref_error(backref->errors);
1914                 }
1915                 free_inode_rec(rec);
1916         }
1917         return (error > 0) ? -1 : 0;
1918 }
1919
1920 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1921                                         u64 objectid)
1922 {
1923         struct cache_extent *cache;
1924         struct root_record *rec = NULL;
1925         int ret;
1926
1927         cache = lookup_cache_extent(root_cache, objectid, 1);
1928         if (cache) {
1929                 rec = container_of(cache, struct root_record, cache);
1930         } else {
1931                 rec = calloc(1, sizeof(*rec));
1932                 rec->objectid = objectid;
1933                 INIT_LIST_HEAD(&rec->backrefs);
1934                 rec->cache.start = objectid;
1935                 rec->cache.size = 1;
1936
1937                 ret = insert_cache_extent(root_cache, &rec->cache);
1938                 BUG_ON(ret);
1939         }
1940         return rec;
1941 }
1942
1943 static struct root_backref *get_root_backref(struct root_record *rec,
1944                                              u64 ref_root, u64 dir, u64 index,
1945                                              const char *name, int namelen)
1946 {
1947         struct root_backref *backref;
1948
1949         list_for_each_entry(backref, &rec->backrefs, list) {
1950                 if (backref->ref_root != ref_root || backref->dir != dir ||
1951                     backref->namelen != namelen)
1952                         continue;
1953                 if (memcmp(name, backref->name, namelen))
1954                         continue;
1955                 return backref;
1956         }
1957
1958         backref = malloc(sizeof(*backref) + namelen + 1);
1959         memset(backref, 0, sizeof(*backref));
1960         backref->ref_root = ref_root;
1961         backref->dir = dir;
1962         backref->index = index;
1963         backref->namelen = namelen;
1964         memcpy(backref->name, name, namelen);
1965         backref->name[namelen] = '\0';
1966         list_add_tail(&backref->list, &rec->backrefs);
1967         return backref;
1968 }
1969
1970 static void free_root_record(struct cache_extent *cache)
1971 {
1972         struct root_record *rec;
1973         struct root_backref *backref;
1974
1975         rec = container_of(cache, struct root_record, cache);
1976         while (!list_empty(&rec->backrefs)) {
1977                 backref = list_entry(rec->backrefs.next,
1978                                      struct root_backref, list);
1979                 list_del(&backref->list);
1980                 free(backref);
1981         }
1982
1983         kfree(rec);
1984 }
1985
1986 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1987
1988 static int add_root_backref(struct cache_tree *root_cache,
1989                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1990                             const char *name, int namelen,
1991                             int item_type, int errors)
1992 {
1993         struct root_record *rec;
1994         struct root_backref *backref;
1995
1996         rec = get_root_rec(root_cache, root_id);
1997         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1998
1999         backref->errors |= errors;
2000
2001         if (item_type != BTRFS_DIR_ITEM_KEY) {
2002                 if (backref->found_dir_index || backref->found_back_ref ||
2003                     backref->found_forward_ref) {
2004                         if (backref->index != index)
2005                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2006                 } else {
2007                         backref->index = index;
2008                 }
2009         }
2010
2011         if (item_type == BTRFS_DIR_ITEM_KEY) {
2012                 if (backref->found_forward_ref)
2013                         rec->found_ref++;
2014                 backref->found_dir_item = 1;
2015         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2016                 backref->found_dir_index = 1;
2017         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2018                 if (backref->found_forward_ref)
2019                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2020                 else if (backref->found_dir_item)
2021                         rec->found_ref++;
2022                 backref->found_forward_ref = 1;
2023         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2024                 if (backref->found_back_ref)
2025                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2026                 backref->found_back_ref = 1;
2027         } else {
2028                 BUG_ON(1);
2029         }
2030
2031         if (backref->found_forward_ref && backref->found_dir_item)
2032                 backref->reachable = 1;
2033         return 0;
2034 }
2035
2036 static int merge_root_recs(struct btrfs_root *root,
2037                            struct cache_tree *src_cache,
2038                            struct cache_tree *dst_cache)
2039 {
2040         struct cache_extent *cache;
2041         struct ptr_node *node;
2042         struct inode_record *rec;
2043         struct inode_backref *backref;
2044         int ret = 0;
2045
2046         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2047                 free_inode_recs_tree(src_cache);
2048                 return 0;
2049         }
2050
2051         while (1) {
2052                 cache = search_cache_extent(src_cache, 0);
2053                 if (!cache)
2054                         break;
2055                 node = container_of(cache, struct ptr_node, cache);
2056                 rec = node->data;
2057                 remove_cache_extent(src_cache, &node->cache);
2058                 free(node);
2059
2060                 ret = is_child_root(root, root->objectid, rec->ino);
2061                 if (ret < 0)
2062                         break;
2063                 else if (ret == 0)
2064                         goto skip;
2065
2066                 list_for_each_entry(backref, &rec->backrefs, list) {
2067                         BUG_ON(backref->found_inode_ref);
2068                         if (backref->found_dir_item)
2069                                 add_root_backref(dst_cache, rec->ino,
2070                                         root->root_key.objectid, backref->dir,
2071                                         backref->index, backref->name,
2072                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2073                                         backref->errors);
2074                         if (backref->found_dir_index)
2075                                 add_root_backref(dst_cache, rec->ino,
2076                                         root->root_key.objectid, backref->dir,
2077                                         backref->index, backref->name,
2078                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2079                                         backref->errors);
2080                 }
2081 skip:
2082                 free_inode_rec(rec);
2083         }
2084         if (ret < 0)
2085                 return ret;
2086         return 0;
2087 }
2088
2089 static int check_root_refs(struct btrfs_root *root,
2090                            struct cache_tree *root_cache)
2091 {
2092         struct root_record *rec;
2093         struct root_record *ref_root;
2094         struct root_backref *backref;
2095         struct cache_extent *cache;
2096         int loop = 1;
2097         int ret;
2098         int error;
2099         int errors = 0;
2100
2101         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2102         rec->found_ref = 1;
2103
2104         /* fixme: this can not detect circular references */
2105         while (loop) {
2106                 loop = 0;
2107                 cache = search_cache_extent(root_cache, 0);
2108                 while (1) {
2109                         if (!cache)
2110                                 break;
2111                         rec = container_of(cache, struct root_record, cache);
2112                         cache = next_cache_extent(cache);
2113
2114                         if (rec->found_ref == 0)
2115                                 continue;
2116
2117                         list_for_each_entry(backref, &rec->backrefs, list) {
2118                                 if (!backref->reachable)
2119                                         continue;
2120
2121                                 ref_root = get_root_rec(root_cache,
2122                                                         backref->ref_root);
2123                                 if (ref_root->found_ref > 0)
2124                                         continue;
2125
2126                                 backref->reachable = 0;
2127                                 rec->found_ref--;
2128                                 if (rec->found_ref == 0)
2129                                         loop = 1;
2130                         }
2131                 }
2132         }
2133
2134         cache = search_cache_extent(root_cache, 0);
2135         while (1) {
2136                 if (!cache)
2137                         break;
2138                 rec = container_of(cache, struct root_record, cache);
2139                 cache = next_cache_extent(cache);
2140
2141                 if (rec->found_ref == 0 &&
2142                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2143                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2144                         ret = check_orphan_item(root->fs_info->tree_root,
2145                                                 rec->objectid);
2146                         if (ret == 0)
2147                                 continue;
2148
2149                         /*
2150                          * If we don't have a root item then we likely just have
2151                          * a dir item in a snapshot for this root but no actual
2152                          * ref key or anything so it's meaningless.
2153                          */
2154                         if (!rec->found_root_item)
2155                                 continue;
2156                         errors++;
2157                         fprintf(stderr, "fs tree %llu not referenced\n",
2158                                 (unsigned long long)rec->objectid);
2159                 }
2160
2161                 error = 0;
2162                 if (rec->found_ref > 0 && !rec->found_root_item)
2163                         error = 1;
2164                 list_for_each_entry(backref, &rec->backrefs, list) {
2165                         if (!backref->found_dir_item)
2166                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2167                         if (!backref->found_dir_index)
2168                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2169                         if (!backref->found_back_ref)
2170                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2171                         if (!backref->found_forward_ref)
2172                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2173                         if (backref->reachable && backref->errors)
2174                                 error = 1;
2175                 }
2176                 if (!error)
2177                         continue;
2178
2179                 errors++;
2180                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2181                         (unsigned long long)rec->objectid, rec->found_ref,
2182                          rec->found_root_item ? "" : "not found");
2183
2184                 list_for_each_entry(backref, &rec->backrefs, list) {
2185                         if (!backref->reachable)
2186                                 continue;
2187                         if (!backref->errors && rec->found_root_item)
2188                                 continue;
2189                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2190                                 " index %llu namelen %u name %s errors %x\n",
2191                                 (unsigned long long)backref->ref_root,
2192                                 (unsigned long long)backref->dir,
2193                                 (unsigned long long)backref->index,
2194                                 backref->namelen, backref->name,
2195                                 backref->errors);
2196                         print_ref_error(backref->errors);
2197                 }
2198         }
2199         return errors > 0 ? 1 : 0;
2200 }
2201
2202 static int process_root_ref(struct extent_buffer *eb, int slot,
2203                             struct btrfs_key *key,
2204                             struct cache_tree *root_cache)
2205 {
2206         u64 dirid;
2207         u64 index;
2208         u32 len;
2209         u32 name_len;
2210         struct btrfs_root_ref *ref;
2211         char namebuf[BTRFS_NAME_LEN];
2212         int error;
2213
2214         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
2215
2216         dirid = btrfs_root_ref_dirid(eb, ref);
2217         index = btrfs_root_ref_sequence(eb, ref);
2218         name_len = btrfs_root_ref_name_len(eb, ref);
2219
2220         if (name_len <= BTRFS_NAME_LEN) {
2221                 len = name_len;
2222                 error = 0;
2223         } else {
2224                 len = BTRFS_NAME_LEN;
2225                 error = REF_ERR_NAME_TOO_LONG;
2226         }
2227         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
2228
2229         if (key->type == BTRFS_ROOT_REF_KEY) {
2230                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
2231                                  index, namebuf, len, key->type, error);
2232         } else {
2233                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
2234                                  index, namebuf, len, key->type, error);
2235         }
2236         return 0;
2237 }
2238
2239 static int check_fs_root(struct btrfs_root *root,
2240                          struct cache_tree *root_cache,
2241                          struct walk_control *wc)
2242 {
2243         int ret = 0;
2244         int err = 0;
2245         int wret;
2246         int level;
2247         struct btrfs_path path;
2248         struct shared_node root_node;
2249         struct root_record *rec;
2250         struct btrfs_root_item *root_item = &root->root_item;
2251
2252         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2253                 rec = get_root_rec(root_cache, root->root_key.objectid);
2254                 if (btrfs_root_refs(root_item) > 0)
2255                         rec->found_root_item = 1;
2256         }
2257
2258         btrfs_init_path(&path);
2259         memset(&root_node, 0, sizeof(root_node));
2260         cache_tree_init(&root_node.root_cache);
2261         cache_tree_init(&root_node.inode_cache);
2262
2263         level = btrfs_header_level(root->node);
2264         memset(wc->nodes, 0, sizeof(wc->nodes));
2265         wc->nodes[level] = &root_node;
2266         wc->active_node = level;
2267         wc->root_level = level;
2268
2269         if (btrfs_root_refs(root_item) > 0 ||
2270             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2271                 path.nodes[level] = root->node;
2272                 extent_buffer_get(root->node);
2273                 path.slots[level] = 0;
2274         } else {
2275                 struct btrfs_key key;
2276                 struct btrfs_disk_key found_key;
2277
2278                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2279                 level = root_item->drop_level;
2280                 path.lowest_level = level;
2281                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2282                 if (wret < 0)
2283                         goto skip_walking;
2284                 btrfs_node_key(path.nodes[level], &found_key,
2285                                 path.slots[level]);
2286                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2287                                         sizeof(found_key)));
2288         }
2289
2290         while (1) {
2291                 wret = walk_down_tree(root, &path, wc, &level);
2292                 if (wret < 0)
2293                         ret = wret;
2294                 if (wret != 0)
2295                         break;
2296
2297                 wret = walk_up_tree(root, &path, wc, &level);
2298                 if (wret < 0)
2299                         ret = wret;
2300                 if (wret != 0)
2301                         break;
2302         }
2303 skip_walking:
2304         btrfs_release_path(&path);
2305
2306         err = merge_root_recs(root, &root_node.root_cache, root_cache);
2307         if (err < 0)
2308                 ret = err;
2309
2310         if (root_node.current) {
2311                 root_node.current->checked = 1;
2312                 maybe_free_inode_rec(&root_node.inode_cache,
2313                                 root_node.current);
2314         }
2315
2316         err = check_inode_recs(root, &root_node.inode_cache);
2317         if (!ret)
2318                 ret = err;
2319         return ret;
2320 }
2321
2322 static int fs_root_objectid(u64 objectid)
2323 {
2324         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2325             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2326                 return 1;
2327         return is_fstree(objectid);
2328 }
2329
2330 static int check_fs_roots(struct btrfs_root *root,
2331                           struct cache_tree *root_cache)
2332 {
2333         struct btrfs_path path;
2334         struct btrfs_key key;
2335         struct walk_control wc;
2336         struct extent_buffer *leaf, *tree_node;
2337         struct btrfs_root *tmp_root;
2338         struct btrfs_root *tree_root = root->fs_info->tree_root;
2339         int ret;
2340         int err = 0;
2341
2342         /*
2343          * Just in case we made any changes to the extent tree that weren't
2344          * reflected into the free space cache yet.
2345          */
2346         if (repair)
2347                 reset_cached_block_groups(root->fs_info);
2348         memset(&wc, 0, sizeof(wc));
2349         cache_tree_init(&wc.shared);
2350         btrfs_init_path(&path);
2351
2352 again:
2353         key.offset = 0;
2354         key.objectid = 0;
2355         key.type = BTRFS_ROOT_ITEM_KEY;
2356         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2357         if (ret < 0) {
2358                 err = 1;
2359                 goto out;
2360         }
2361         tree_node = tree_root->node;
2362         while (1) {
2363                 if (tree_node != tree_root->node) {
2364                         free_root_recs_tree(root_cache);
2365                         btrfs_release_path(&path);
2366                         goto again;
2367                 }
2368                 leaf = path.nodes[0];
2369                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2370                         ret = btrfs_next_leaf(tree_root, &path);
2371                         if (ret) {
2372                                 if (ret < 0)
2373                                         err = 1;
2374                                 break;
2375                         }
2376                         leaf = path.nodes[0];
2377                 }
2378                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2379                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2380                     fs_root_objectid(key.objectid)) {
2381                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2382                                 tmp_root = btrfs_read_fs_root_no_cache(
2383                                                 root->fs_info, &key);
2384                         } else {
2385                                 key.offset = (u64)-1;
2386                                 tmp_root = btrfs_read_fs_root(
2387                                                 root->fs_info, &key);
2388                         }
2389                         if (IS_ERR(tmp_root)) {
2390                                 err = 1;
2391                                 goto next;
2392                         }
2393                         ret = check_fs_root(tmp_root, root_cache, &wc);
2394                         if (ret == -EAGAIN) {
2395                                 free_root_recs_tree(root_cache);
2396                                 btrfs_release_path(&path);
2397                                 goto again;
2398                         }
2399                         if (ret)
2400                                 err = 1;
2401                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
2402                                 btrfs_free_fs_root(tmp_root);
2403                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2404                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2405                         process_root_ref(leaf, path.slots[0], &key,
2406                                          root_cache);
2407                 }
2408 next:
2409                 path.slots[0]++;
2410         }
2411 out:
2412         btrfs_release_path(&path);
2413         if (err)
2414                 free_extent_cache_tree(&wc.shared);
2415         if (!cache_tree_empty(&wc.shared))
2416                 fprintf(stderr, "warning line %d\n", __LINE__);
2417
2418         return err;
2419 }
2420
2421 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2422 {
2423         struct list_head *cur = rec->backrefs.next;
2424         struct extent_backref *back;
2425         struct tree_backref *tback;
2426         struct data_backref *dback;
2427         u64 found = 0;
2428         int err = 0;
2429
2430         while(cur != &rec->backrefs) {
2431                 back = list_entry(cur, struct extent_backref, list);
2432                 cur = cur->next;
2433                 if (!back->found_extent_tree) {
2434                         err = 1;
2435                         if (!print_errs)
2436                                 goto out;
2437                         if (back->is_data) {
2438                                 dback = (struct data_backref *)back;
2439                                 fprintf(stderr, "Backref %llu %s %llu"
2440                                         " owner %llu offset %llu num_refs %lu"
2441                                         " not found in extent tree\n",
2442                                         (unsigned long long)rec->start,
2443                                         back->full_backref ?
2444                                         "parent" : "root",
2445                                         back->full_backref ?
2446                                         (unsigned long long)dback->parent:
2447                                         (unsigned long long)dback->root,
2448                                         (unsigned long long)dback->owner,
2449                                         (unsigned long long)dback->offset,
2450                                         (unsigned long)dback->num_refs);
2451                         } else {
2452                                 tback = (struct tree_backref *)back;
2453                                 fprintf(stderr, "Backref %llu parent %llu"
2454                                         " root %llu not found in extent tree\n",
2455                                         (unsigned long long)rec->start,
2456                                         (unsigned long long)tback->parent,
2457                                         (unsigned long long)tback->root);
2458                         }
2459                 }
2460                 if (!back->is_data && !back->found_ref) {
2461                         err = 1;
2462                         if (!print_errs)
2463                                 goto out;
2464                         tback = (struct tree_backref *)back;
2465                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2466                                 (unsigned long long)rec->start,
2467                                 back->full_backref ? "parent" : "root",
2468                                 back->full_backref ?
2469                                 (unsigned long long)tback->parent :
2470                                 (unsigned long long)tback->root, back);
2471                 }
2472                 if (back->is_data) {
2473                         dback = (struct data_backref *)back;
2474                         if (dback->found_ref != dback->num_refs) {
2475                                 err = 1;
2476                                 if (!print_errs)
2477                                         goto out;
2478                                 fprintf(stderr, "Incorrect local backref count"
2479                                         " on %llu %s %llu owner %llu"
2480                                         " offset %llu found %u wanted %u back %p\n",
2481                                         (unsigned long long)rec->start,
2482                                         back->full_backref ?
2483                                         "parent" : "root",
2484                                         back->full_backref ?
2485                                         (unsigned long long)dback->parent:
2486                                         (unsigned long long)dback->root,
2487                                         (unsigned long long)dback->owner,
2488                                         (unsigned long long)dback->offset,
2489                                         dback->found_ref, dback->num_refs, back);
2490                         }
2491                         if (dback->disk_bytenr != rec->start) {
2492                                 err = 1;
2493                                 if (!print_errs)
2494                                         goto out;
2495                                 fprintf(stderr, "Backref disk bytenr does not"
2496                                         " match extent record, bytenr=%llu, "
2497                                         "ref bytenr=%llu\n",
2498                                         (unsigned long long)rec->start,
2499                                         (unsigned long long)dback->disk_bytenr);
2500                         }
2501
2502                         if (dback->bytes != rec->nr) {
2503                                 err = 1;
2504                                 if (!print_errs)
2505                                         goto out;
2506                                 fprintf(stderr, "Backref bytes do not match "
2507                                         "extent backref, bytenr=%llu, ref "
2508                                         "bytes=%llu, backref bytes=%llu\n",
2509                                         (unsigned long long)rec->start,
2510                                         (unsigned long long)rec->nr,
2511                                         (unsigned long long)dback->bytes);
2512                         }
2513                 }
2514                 if (!back->is_data) {
2515                         found += 1;
2516                 } else {
2517                         dback = (struct data_backref *)back;
2518                         found += dback->found_ref;
2519                 }
2520         }
2521         if (found != rec->refs) {
2522                 err = 1;
2523                 if (!print_errs)
2524                         goto out;
2525                 fprintf(stderr, "Incorrect global backref count "
2526                         "on %llu found %llu wanted %llu\n",
2527                         (unsigned long long)rec->start,
2528                         (unsigned long long)found,
2529                         (unsigned long long)rec->refs);
2530         }
2531 out:
2532         return err;
2533 }
2534
2535 static int free_all_extent_backrefs(struct extent_record *rec)
2536 {
2537         struct extent_backref *back;
2538         struct list_head *cur;
2539         while (!list_empty(&rec->backrefs)) {
2540                 cur = rec->backrefs.next;
2541                 back = list_entry(cur, struct extent_backref, list);
2542                 list_del(cur);
2543                 free(back);
2544         }
2545         return 0;
2546 }
2547
2548 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2549                                      struct cache_tree *extent_cache)
2550 {
2551         struct cache_extent *cache;
2552         struct extent_record *rec;
2553
2554         while (1) {
2555                 cache = first_cache_extent(extent_cache);
2556                 if (!cache)
2557                         break;
2558                 rec = container_of(cache, struct extent_record, cache);
2559                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2560                 remove_cache_extent(extent_cache, cache);
2561                 free_all_extent_backrefs(rec);
2562                 free(rec);
2563         }
2564 }
2565
2566 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2567                                  struct extent_record *rec)
2568 {
2569         if (rec->content_checked && rec->owner_ref_checked &&
2570             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2571             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2572                 remove_cache_extent(extent_cache, &rec->cache);
2573                 free_all_extent_backrefs(rec);
2574                 list_del_init(&rec->list);
2575                 free(rec);
2576         }
2577         return 0;
2578 }
2579
2580 static int check_owner_ref(struct btrfs_root *root,
2581                             struct extent_record *rec,
2582                             struct extent_buffer *buf)
2583 {
2584         struct extent_backref *node;
2585         struct tree_backref *back;
2586         struct btrfs_root *ref_root;
2587         struct btrfs_key key;
2588         struct btrfs_path path;
2589         struct extent_buffer *parent;
2590         int level;
2591         int found = 0;
2592         int ret;
2593
2594         list_for_each_entry(node, &rec->backrefs, list) {
2595                 if (node->is_data)
2596                         continue;
2597                 if (!node->found_ref)
2598                         continue;
2599                 if (node->full_backref)
2600                         continue;
2601                 back = (struct tree_backref *)node;
2602                 if (btrfs_header_owner(buf) == back->root)
2603                         return 0;
2604         }
2605         BUG_ON(rec->is_root);
2606
2607         /* try to find the block by search corresponding fs tree */
2608         key.objectid = btrfs_header_owner(buf);
2609         key.type = BTRFS_ROOT_ITEM_KEY;
2610         key.offset = (u64)-1;
2611
2612         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2613         if (IS_ERR(ref_root))
2614                 return 1;
2615
2616         level = btrfs_header_level(buf);
2617         if (level == 0)
2618                 btrfs_item_key_to_cpu(buf, &key, 0);
2619         else
2620                 btrfs_node_key_to_cpu(buf, &key, 0);
2621
2622         btrfs_init_path(&path);
2623         path.lowest_level = level + 1;
2624         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2625         if (ret < 0)
2626                 return 0;
2627
2628         parent = path.nodes[level + 1];
2629         if (parent && buf->start == btrfs_node_blockptr(parent,
2630                                                         path.slots[level + 1]))
2631                 found = 1;
2632
2633         btrfs_release_path(&path);
2634         return found ? 0 : 1;
2635 }
2636
2637 static int is_extent_tree_record(struct extent_record *rec)
2638 {
2639         struct list_head *cur = rec->backrefs.next;
2640         struct extent_backref *node;
2641         struct tree_backref *back;
2642         int is_extent = 0;
2643
2644         while(cur != &rec->backrefs) {
2645                 node = list_entry(cur, struct extent_backref, list);
2646                 cur = cur->next;
2647                 if (node->is_data)
2648                         return 0;
2649                 back = (struct tree_backref *)node;
2650                 if (node->full_backref)
2651                         return 0;
2652                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2653                         is_extent = 1;
2654         }
2655         return is_extent;
2656 }
2657
2658
2659 static int record_bad_block_io(struct btrfs_fs_info *info,
2660                                struct cache_tree *extent_cache,
2661                                u64 start, u64 len)
2662 {
2663         struct extent_record *rec;
2664         struct cache_extent *cache;
2665         struct btrfs_key key;
2666
2667         cache = lookup_cache_extent(extent_cache, start, len);
2668         if (!cache)
2669                 return 0;
2670
2671         rec = container_of(cache, struct extent_record, cache);
2672         if (!is_extent_tree_record(rec))
2673                 return 0;
2674
2675         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2676         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2677 }
2678
2679 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2680                        struct extent_buffer *buf, int slot)
2681 {
2682         if (btrfs_header_level(buf)) {
2683                 struct btrfs_key_ptr ptr1, ptr2;
2684
2685                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2686                                    sizeof(struct btrfs_key_ptr));
2687                 read_extent_buffer(buf, &ptr2,
2688                                    btrfs_node_key_ptr_offset(slot + 1),
2689                                    sizeof(struct btrfs_key_ptr));
2690                 write_extent_buffer(buf, &ptr1,
2691                                     btrfs_node_key_ptr_offset(slot + 1),
2692                                     sizeof(struct btrfs_key_ptr));
2693                 write_extent_buffer(buf, &ptr2,
2694                                     btrfs_node_key_ptr_offset(slot),
2695                                     sizeof(struct btrfs_key_ptr));
2696                 if (slot == 0) {
2697                         struct btrfs_disk_key key;
2698                         btrfs_node_key(buf, &key, 0);
2699                         btrfs_fixup_low_keys(root, path, &key,
2700                                              btrfs_header_level(buf) + 1);
2701                 }
2702         } else {
2703                 struct btrfs_item *item1, *item2;
2704                 struct btrfs_key k1, k2;
2705                 char *item1_data, *item2_data;
2706                 u32 item1_offset, item2_offset, item1_size, item2_size;
2707
2708                 item1 = btrfs_item_nr(slot);
2709                 item2 = btrfs_item_nr(slot + 1);
2710                 btrfs_item_key_to_cpu(buf, &k1, slot);
2711                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2712                 item1_offset = btrfs_item_offset(buf, item1);
2713                 item2_offset = btrfs_item_offset(buf, item2);
2714                 item1_size = btrfs_item_size(buf, item1);
2715                 item2_size = btrfs_item_size(buf, item2);
2716
2717                 item1_data = malloc(item1_size);
2718                 if (!item1_data)
2719                         return -ENOMEM;
2720                 item2_data = malloc(item2_size);
2721                 if (!item2_data) {
2722                         free(item1_data);
2723                         return -ENOMEM;
2724                 }
2725
2726                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2727                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2728
2729                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2730                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2731                 free(item1_data);
2732                 free(item2_data);
2733
2734                 btrfs_set_item_offset(buf, item1, item2_offset);
2735                 btrfs_set_item_offset(buf, item2, item1_offset);
2736                 btrfs_set_item_size(buf, item1, item2_size);
2737                 btrfs_set_item_size(buf, item2, item1_size);
2738
2739                 path->slots[0] = slot;
2740                 btrfs_set_item_key_unsafe(root, path, &k2);
2741                 path->slots[0] = slot + 1;
2742                 btrfs_set_item_key_unsafe(root, path, &k1);
2743         }
2744         return 0;
2745 }
2746
2747 /*
2748  * Attempt to fix basic block failures.  Currently we only handle bad key
2749  * orders, we will cycle through the keys and swap them if necessary.
2750  */
2751 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2752                                 struct btrfs_root *root,
2753                                 struct extent_buffer *buf,
2754                                 struct btrfs_disk_key *parent_key,
2755                                 enum btrfs_tree_block_status status)
2756 {
2757         struct btrfs_path *path;
2758         struct btrfs_key k1, k2;
2759         int i;
2760         int level;
2761         int ret;
2762
2763         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2764                 return -EIO;
2765
2766         k1.objectid = btrfs_header_owner(buf);
2767         k1.type = BTRFS_ROOT_ITEM_KEY;
2768         k1.offset = (u64)-1;
2769
2770         root = btrfs_read_fs_root(root->fs_info, &k1);
2771         if (IS_ERR(root))
2772                 return -EIO;
2773
2774         record_root_in_trans(trans, root);
2775
2776         path = btrfs_alloc_path();
2777         if (!path)
2778                 return -EIO;
2779
2780         level = btrfs_header_level(buf);
2781         path->lowest_level = level;
2782         path->skip_check_block = 1;
2783         if (level)
2784                 btrfs_node_key_to_cpu(buf, &k1, 0);
2785         else
2786                 btrfs_item_key_to_cpu(buf, &k1, 0);
2787
2788         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2789         if (ret) {
2790                 btrfs_free_path(path);
2791                 return -EIO;
2792         }
2793
2794         buf = path->nodes[level];
2795         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2796                 if (level) {
2797                         btrfs_node_key_to_cpu(buf, &k1, i);
2798                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2799                 } else {
2800                         btrfs_item_key_to_cpu(buf, &k1, i);
2801                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2802                 }
2803                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2804                         continue;
2805                 ret = swap_values(root, path, buf, i);
2806                 if (ret)
2807                         break;
2808                 btrfs_mark_buffer_dirty(buf);
2809                 i = 0;
2810         }
2811
2812         btrfs_free_path(path);
2813         return ret;
2814 }
2815
2816 static int check_block(struct btrfs_trans_handle *trans,
2817                        struct btrfs_root *root,
2818                        struct cache_tree *extent_cache,
2819                        struct extent_buffer *buf, u64 flags)
2820 {
2821         struct extent_record *rec;
2822         struct cache_extent *cache;
2823         struct btrfs_key key;
2824         enum btrfs_tree_block_status status;
2825         int ret = 0;
2826         int level;
2827
2828         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2829         if (!cache)
2830                 return 1;
2831         rec = container_of(cache, struct extent_record, cache);
2832         rec->generation = btrfs_header_generation(buf);
2833
2834         level = btrfs_header_level(buf);
2835         if (btrfs_header_nritems(buf) > 0) {
2836
2837                 if (level == 0)
2838                         btrfs_item_key_to_cpu(buf, &key, 0);
2839                 else
2840                         btrfs_node_key_to_cpu(buf, &key, 0);
2841
2842                 rec->info_objectid = key.objectid;
2843         }
2844         rec->info_level = level;
2845
2846         if (btrfs_is_leaf(buf))
2847                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2848         else
2849                 status = btrfs_check_node(root, &rec->parent_key, buf);
2850
2851         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2852                 if (repair)
2853                         status = try_to_fix_bad_block(trans, root, buf,
2854                                                       &rec->parent_key,
2855                                                       status);
2856                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2857                         ret = -EIO;
2858                         fprintf(stderr, "bad block %llu\n",
2859                                 (unsigned long long)buf->start);
2860                 } else {
2861                         /*
2862                          * Signal to callers we need to start the scan over
2863                          * again since we'll have cow'ed blocks.
2864                          */
2865                         ret = -EAGAIN;
2866                 }
2867         } else {
2868                 rec->content_checked = 1;
2869                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2870                         rec->owner_ref_checked = 1;
2871                 else {
2872                         ret = check_owner_ref(root, rec, buf);
2873                         if (!ret)
2874                                 rec->owner_ref_checked = 1;
2875                 }
2876         }
2877         if (!ret)
2878                 maybe_free_extent_rec(extent_cache, rec);
2879         return ret;
2880 }
2881
2882 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2883                                                 u64 parent, u64 root)
2884 {
2885         struct list_head *cur = rec->backrefs.next;
2886         struct extent_backref *node;
2887         struct tree_backref *back;
2888
2889         while(cur != &rec->backrefs) {
2890                 node = list_entry(cur, struct extent_backref, list);
2891                 cur = cur->next;
2892                 if (node->is_data)
2893                         continue;
2894                 back = (struct tree_backref *)node;
2895                 if (parent > 0) {
2896                         if (!node->full_backref)
2897                                 continue;
2898                         if (parent == back->parent)
2899                                 return back;
2900                 } else {
2901                         if (node->full_backref)
2902                                 continue;
2903                         if (back->root == root)
2904                                 return back;
2905                 }
2906         }
2907         return NULL;
2908 }
2909
2910 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2911                                                 u64 parent, u64 root)
2912 {
2913         struct tree_backref *ref = malloc(sizeof(*ref));
2914         memset(&ref->node, 0, sizeof(ref->node));
2915         if (parent > 0) {
2916                 ref->parent = parent;
2917                 ref->node.full_backref = 1;
2918         } else {
2919                 ref->root = root;
2920                 ref->node.full_backref = 0;
2921         }
2922         list_add_tail(&ref->node.list, &rec->backrefs);
2923
2924         return ref;
2925 }
2926
2927 static struct data_backref *find_data_backref(struct extent_record *rec,
2928                                                 u64 parent, u64 root,
2929                                                 u64 owner, u64 offset,
2930                                                 int found_ref,
2931                                                 u64 disk_bytenr, u64 bytes)
2932 {
2933         struct list_head *cur = rec->backrefs.next;
2934         struct extent_backref *node;
2935         struct data_backref *back;
2936
2937         while(cur != &rec->backrefs) {
2938                 node = list_entry(cur, struct extent_backref, list);
2939                 cur = cur->next;
2940                 if (!node->is_data)
2941                         continue;
2942                 back = (struct data_backref *)node;
2943                 if (parent > 0) {
2944                         if (!node->full_backref)
2945                                 continue;
2946                         if (parent == back->parent)
2947                                 return back;
2948                 } else {
2949                         if (node->full_backref)
2950                                 continue;
2951                         if (back->root == root && back->owner == owner &&
2952                             back->offset == offset) {
2953                                 if (found_ref && node->found_ref &&
2954                                     (back->bytes != bytes ||
2955                                     back->disk_bytenr != disk_bytenr))
2956                                         continue;
2957                                 return back;
2958                         }
2959                 }
2960         }
2961         return NULL;
2962 }
2963
2964 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2965                                                 u64 parent, u64 root,
2966                                                 u64 owner, u64 offset,
2967                                                 u64 max_size)
2968 {
2969         struct data_backref *ref = malloc(sizeof(*ref));
2970         memset(&ref->node, 0, sizeof(ref->node));
2971         ref->node.is_data = 1;
2972
2973         if (parent > 0) {
2974                 ref->parent = parent;
2975                 ref->owner = 0;
2976                 ref->offset = 0;
2977                 ref->node.full_backref = 1;
2978         } else {
2979                 ref->root = root;
2980                 ref->owner = owner;
2981                 ref->offset = offset;
2982                 ref->node.full_backref = 0;
2983         }
2984         ref->bytes = max_size;
2985         ref->found_ref = 0;
2986         ref->num_refs = 0;
2987         list_add_tail(&ref->node.list, &rec->backrefs);
2988         if (max_size > rec->max_size)
2989                 rec->max_size = max_size;
2990         return ref;
2991 }
2992
2993 static int add_extent_rec(struct cache_tree *extent_cache,
2994                           struct btrfs_key *parent_key, u64 parent_gen,
2995                           u64 start, u64 nr, u64 extent_item_refs,
2996                           int is_root, int inc_ref, int set_checked,
2997                           int metadata, int extent_rec, u64 max_size)
2998 {
2999         struct extent_record *rec;
3000         struct cache_extent *cache;
3001         int ret = 0;
3002         int dup = 0;
3003
3004         cache = lookup_cache_extent(extent_cache, start, nr);
3005         if (cache) {
3006                 rec = container_of(cache, struct extent_record, cache);
3007                 if (inc_ref)
3008                         rec->refs++;
3009                 if (rec->nr == 1)
3010                         rec->nr = max(nr, max_size);
3011
3012                 /*
3013                  * We need to make sure to reset nr to whatever the extent
3014                  * record says was the real size, this way we can compare it to
3015                  * the backrefs.
3016                  */
3017                 if (extent_rec) {
3018                         if (start != rec->start || rec->found_rec) {
3019                                 struct extent_record *tmp;
3020
3021                                 dup = 1;
3022                                 if (list_empty(&rec->list))
3023                                         list_add_tail(&rec->list,
3024                                                       &duplicate_extents);
3025
3026                                 /*
3027                                  * We have to do this song and dance in case we
3028                                  * find an extent record that falls inside of
3029                                  * our current extent record but does not have
3030                                  * the same objectid.
3031                                  */
3032                                 tmp = malloc(sizeof(*tmp));
3033                                 if (!tmp)
3034                                         return -ENOMEM;
3035                                 tmp->start = start;
3036                                 tmp->max_size = max_size;
3037                                 tmp->nr = nr;
3038                                 tmp->found_rec = 1;
3039                                 tmp->metadata = metadata;
3040                                 tmp->extent_item_refs = extent_item_refs;
3041                                 INIT_LIST_HEAD(&tmp->list);
3042                                 list_add_tail(&tmp->list, &rec->dups);
3043                                 rec->num_duplicates++;
3044                         } else {
3045                                 rec->nr = nr;
3046                                 rec->found_rec = 1;
3047                         }
3048                 }
3049
3050                 if (extent_item_refs && !dup) {
3051                         if (rec->extent_item_refs) {
3052                                 fprintf(stderr, "block %llu rec "
3053                                         "extent_item_refs %llu, passed %llu\n",
3054                                         (unsigned long long)start,
3055                                         (unsigned long long)
3056                                                         rec->extent_item_refs,
3057                                         (unsigned long long)extent_item_refs);
3058                         }
3059                         rec->extent_item_refs = extent_item_refs;
3060                 }
3061                 if (is_root)
3062                         rec->is_root = 1;
3063                 if (set_checked) {
3064                         rec->content_checked = 1;
3065                         rec->owner_ref_checked = 1;
3066                 }
3067
3068                 if (parent_key)
3069                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
3070                 if (parent_gen)
3071                         rec->parent_generation = parent_gen;
3072
3073                 if (rec->max_size < max_size)
3074                         rec->max_size = max_size;
3075
3076                 maybe_free_extent_rec(extent_cache, rec);
3077                 return ret;
3078         }
3079         rec = malloc(sizeof(*rec));
3080         rec->start = start;
3081         rec->max_size = max_size;
3082         rec->nr = max(nr, max_size);
3083         rec->found_rec = !!extent_rec;
3084         rec->content_checked = 0;
3085         rec->owner_ref_checked = 0;
3086         rec->num_duplicates = 0;
3087         rec->metadata = metadata;
3088         INIT_LIST_HEAD(&rec->backrefs);
3089         INIT_LIST_HEAD(&rec->dups);
3090         INIT_LIST_HEAD(&rec->list);
3091
3092         if (is_root)
3093                 rec->is_root = 1;
3094         else
3095                 rec->is_root = 0;
3096
3097         if (inc_ref)
3098                 rec->refs = 1;
3099         else
3100                 rec->refs = 0;
3101
3102         if (extent_item_refs)
3103                 rec->extent_item_refs = extent_item_refs;
3104         else
3105                 rec->extent_item_refs = 0;
3106
3107         if (parent_key)
3108                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
3109         else
3110                 memset(&rec->parent_key, 0, sizeof(*parent_key));
3111
3112         if (parent_gen)
3113                 rec->parent_generation = parent_gen;
3114         else
3115                 rec->parent_generation = 0;
3116
3117         rec->cache.start = start;
3118         rec->cache.size = nr;
3119         ret = insert_cache_extent(extent_cache, &rec->cache);
3120         BUG_ON(ret);
3121         bytes_used += nr;
3122         if (set_checked) {
3123                 rec->content_checked = 1;
3124                 rec->owner_ref_checked = 1;
3125         }
3126         return ret;
3127 }
3128
3129 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
3130                             u64 parent, u64 root, int found_ref)
3131 {
3132         struct extent_record *rec;
3133         struct tree_backref *back;
3134         struct cache_extent *cache;
3135
3136         cache = lookup_cache_extent(extent_cache, bytenr, 1);
3137         if (!cache) {
3138                 add_extent_rec(extent_cache, NULL, 0, bytenr,
3139                                1, 0, 0, 0, 0, 1, 0, 0);
3140                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
3141                 if (!cache)
3142                         abort();
3143         }
3144
3145         rec = container_of(cache, struct extent_record, cache);
3146         if (rec->start != bytenr) {
3147                 abort();
3148         }
3149
3150         back = find_tree_backref(rec, parent, root);
3151         if (!back)
3152                 back = alloc_tree_backref(rec, parent, root);
3153
3154         if (found_ref) {
3155                 if (back->node.found_ref) {
3156                         fprintf(stderr, "Extent back ref already exists "
3157                                 "for %llu parent %llu root %llu \n",
3158                                 (unsigned long long)bytenr,
3159                                 (unsigned long long)parent,
3160                                 (unsigned long long)root);
3161                 }
3162                 back->node.found_ref = 1;
3163         } else {
3164                 if (back->node.found_extent_tree) {
3165                         fprintf(stderr, "Extent back ref already exists "
3166                                 "for %llu parent %llu root %llu \n",
3167                                 (unsigned long long)bytenr,
3168                                 (unsigned long long)parent,
3169                                 (unsigned long long)root);
3170                 }
3171                 back->node.found_extent_tree = 1;
3172         }
3173         maybe_free_extent_rec(extent_cache, rec);
3174         return 0;
3175 }
3176
3177 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
3178                             u64 parent, u64 root, u64 owner, u64 offset,
3179                             u32 num_refs, int found_ref, u64 max_size)
3180 {
3181         struct extent_record *rec;
3182         struct data_backref *back;
3183         struct cache_extent *cache;
3184
3185         cache = lookup_cache_extent(extent_cache, bytenr, 1);
3186         if (!cache) {
3187                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
3188                                0, 0, max_size);
3189                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
3190                 if (!cache)
3191                         abort();
3192         }
3193
3194         rec = container_of(cache, struct extent_record, cache);
3195         if (rec->max_size < max_size)
3196                 rec->max_size = max_size;
3197
3198         /*
3199          * If found_ref is set then max_size is the real size and must match the
3200          * existing refs.  So if we have already found a ref then we need to
3201          * make sure that this ref matches the existing one, otherwise we need
3202          * to add a new backref so we can notice that the backrefs don't match
3203          * and we need to figure out who is telling the truth.  This is to
3204          * account for that awful fsync bug I introduced where we'd end up with
3205          * a btrfs_file_extent_item that would have its length include multiple
3206          * prealloc extents or point inside of a prealloc extent.
3207          */
3208         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
3209                                  bytenr, max_size);
3210         if (!back)
3211                 back = alloc_data_backref(rec, parent, root, owner, offset,
3212                                           max_size);
3213
3214         if (found_ref) {
3215                 BUG_ON(num_refs != 1);
3216                 if (back->node.found_ref)
3217                         BUG_ON(back->bytes != max_size);
3218                 back->node.found_ref = 1;
3219                 back->found_ref += 1;
3220                 back->bytes = max_size;
3221                 back->disk_bytenr = bytenr;
3222                 rec->refs += 1;
3223                 rec->content_checked = 1;
3224                 rec->owner_ref_checked = 1;
3225         } else {
3226                 if (back->node.found_extent_tree) {
3227                         fprintf(stderr, "Extent back ref already exists "
3228                                 "for %llu parent %llu root %llu "
3229                                 "owner %llu offset %llu num_refs %lu\n",
3230                                 (unsigned long long)bytenr,
3231                                 (unsigned long long)parent,
3232                                 (unsigned long long)root,
3233                                 (unsigned long long)owner,
3234                                 (unsigned long long)offset,
3235                                 (unsigned long)num_refs);
3236                 }
3237                 back->num_refs = num_refs;
3238                 back->node.found_extent_tree = 1;
3239         }
3240         maybe_free_extent_rec(extent_cache, rec);
3241         return 0;
3242 }
3243
3244 static int add_pending(struct cache_tree *pending,
3245                        struct cache_tree *seen, u64 bytenr, u32 size)
3246 {
3247         int ret;
3248         ret = add_cache_extent(seen, bytenr, size);
3249         if (ret)
3250                 return ret;
3251         add_cache_extent(pending, bytenr, size);
3252         return 0;
3253 }
3254
3255 static int pick_next_pending(struct cache_tree *pending,
3256                         struct cache_tree *reada,
3257                         struct cache_tree *nodes,
3258                         u64 last, struct block_info *bits, int bits_nr,
3259                         int *reada_bits)
3260 {
3261         unsigned long node_start = last;
3262         struct cache_extent *cache;
3263         int ret;
3264
3265         cache = search_cache_extent(reada, 0);
3266         if (cache) {
3267                 bits[0].start = cache->start;
3268                 bits[0].size = cache->size;
3269                 *reada_bits = 1;
3270                 return 1;
3271         }
3272         *reada_bits = 0;
3273         if (node_start > 32768)
3274                 node_start -= 32768;
3275
3276         cache = search_cache_extent(nodes, node_start);
3277         if (!cache)
3278                 cache = search_cache_extent(nodes, 0);
3279
3280         if (!cache) {
3281                  cache = search_cache_extent(pending, 0);
3282                  if (!cache)
3283                          return 0;
3284                  ret = 0;
3285                  do {
3286                          bits[ret].start = cache->start;
3287                          bits[ret].size = cache->size;
3288                          cache = next_cache_extent(cache);
3289                          ret++;
3290                  } while (cache && ret < bits_nr);
3291                  return ret;
3292         }
3293
3294         ret = 0;
3295         do {
3296                 bits[ret].start = cache->start;
3297                 bits[ret].size = cache->size;
3298                 cache = next_cache_extent(cache);
3299                 ret++;
3300         } while (cache && ret < bits_nr);
3301
3302         if (bits_nr - ret > 8) {
3303                 u64 lookup = bits[0].start + bits[0].size;
3304                 struct cache_extent *next;
3305                 next = search_cache_extent(pending, lookup);
3306                 while(next) {
3307                         if (next->start - lookup > 32768)
3308                                 break;
3309                         bits[ret].start = next->start;
3310                         bits[ret].size = next->size;
3311                         lookup = next->start + next->size;
3312                         ret++;
3313                         if (ret == bits_nr)
3314                                 break;
3315                         next = next_cache_extent(next);
3316                         if (!next)
3317                                 break;
3318                 }
3319         }
3320         return ret;
3321 }
3322
3323 static void free_chunk_record(struct cache_extent *cache)
3324 {
3325         struct chunk_record *rec;
3326
3327         rec = container_of(cache, struct chunk_record, cache);
3328         list_del_init(&rec->list);
3329         list_del_init(&rec->dextents);
3330         free(rec);
3331 }
3332
3333 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
3334 {
3335         cache_tree_free_extents(chunk_cache, free_chunk_record);
3336 }
3337
3338 static void free_device_record(struct rb_node *node)
3339 {
3340         struct device_record *rec;
3341
3342         rec = container_of(node, struct device_record, node);
3343         free(rec);
3344 }
3345
3346 FREE_RB_BASED_TREE(device_cache, free_device_record);
3347
3348 int insert_block_group_record(struct block_group_tree *tree,
3349                               struct block_group_record *bg_rec)
3350 {
3351         int ret;
3352
3353         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3354         if (ret)
3355                 return ret;
3356
3357         list_add_tail(&bg_rec->list, &tree->block_groups);
3358         return 0;
3359 }
3360
3361 static void free_block_group_record(struct cache_extent *cache)
3362 {
3363         struct block_group_record *rec;
3364
3365         rec = container_of(cache, struct block_group_record, cache);
3366         list_del_init(&rec->list);
3367         free(rec);
3368 }
3369
3370 void free_block_group_tree(struct block_group_tree *tree)
3371 {
3372         cache_tree_free_extents(&tree->tree, free_block_group_record);
3373 }
3374
3375 int insert_device_extent_record(struct device_extent_tree *tree,
3376                                 struct device_extent_record *de_rec)
3377 {
3378         int ret;
3379
3380         /*
3381          * Device extent is a bit different from the other extents, because
3382          * the extents which belong to the different devices may have the
3383          * same start and size, so we need use the special extent cache
3384          * search/insert functions.
3385          */
3386         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3387         if (ret)
3388                 return ret;
3389
3390         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3391         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3392         return 0;
3393 }
3394
3395 static void free_device_extent_record(struct cache_extent *cache)
3396 {
3397         struct device_extent_record *rec;
3398
3399         rec = container_of(cache, struct device_extent_record, cache);
3400         if (!list_empty(&rec->chunk_list))
3401                 list_del_init(&rec->chunk_list);
3402         if (!list_empty(&rec->device_list))
3403                 list_del_init(&rec->device_list);
3404         free(rec);
3405 }
3406
3407 void free_device_extent_tree(struct device_extent_tree *tree)
3408 {
3409         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3410 }
3411
3412 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3413 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3414                                  struct extent_buffer *leaf, int slot)
3415 {
3416         struct btrfs_extent_ref_v0 *ref0;
3417         struct btrfs_key key;
3418
3419         btrfs_item_key_to_cpu(leaf, &key, slot);
3420         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3421         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3422                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3423         } else {
3424                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3425                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3426         }
3427         return 0;
3428 }
3429 #endif
3430
3431 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3432                                             struct btrfs_key *key,
3433                                             int slot)
3434 {
3435         struct btrfs_chunk *ptr;
3436         struct chunk_record *rec;
3437         int num_stripes, i;
3438
3439         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3440         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3441
3442         rec = malloc(btrfs_chunk_record_size(num_stripes));
3443         if (!rec) {
3444                 fprintf(stderr, "memory allocation failed\n");
3445                 exit(-1);
3446         }
3447
3448         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3449
3450         INIT_LIST_HEAD(&rec->list);
3451         INIT_LIST_HEAD(&rec->dextents);
3452         rec->bg_rec = NULL;
3453
3454         rec->cache.start = key->offset;
3455         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3456
3457         rec->generation = btrfs_header_generation(leaf);
3458
3459         rec->objectid = key->objectid;
3460         rec->type = key->type;
3461         rec->offset = key->offset;
3462
3463         rec->length = rec->cache.size;
3464         rec->owner = btrfs_chunk_owner(leaf, ptr);
3465         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3466         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3467         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3468         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3469         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3470         rec->num_stripes = num_stripes;
3471         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3472
3473         for (i = 0; i < rec->num_stripes; ++i) {
3474                 rec->stripes[i].devid =
3475                         btrfs_stripe_devid_nr(leaf, ptr, i);
3476                 rec->stripes[i].offset =
3477                         btrfs_stripe_offset_nr(leaf, ptr, i);
3478                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3479                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3480                                 BTRFS_UUID_SIZE);
3481         }
3482
3483         return rec;
3484 }
3485
3486 static int process_chunk_item(struct cache_tree *chunk_cache,
3487                               struct btrfs_key *key, struct extent_buffer *eb,
3488                               int slot)
3489 {
3490         struct chunk_record *rec;
3491         int ret = 0;
3492
3493         rec = btrfs_new_chunk_record(eb, key, slot);
3494         ret = insert_cache_extent(chunk_cache, &rec->cache);
3495         if (ret) {
3496                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3497                         rec->offset, rec->length);
3498                 free(rec);
3499         }
3500
3501         return ret;
3502 }
3503
3504 static int process_device_item(struct rb_root *dev_cache,
3505                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3506 {
3507         struct btrfs_dev_item *ptr;
3508         struct device_record *rec;
3509         int ret = 0;
3510
3511         ptr = btrfs_item_ptr(eb,
3512                 slot, struct btrfs_dev_item);
3513
3514         rec = malloc(sizeof(*rec));
3515         if (!rec) {
3516                 fprintf(stderr, "memory allocation failed\n");
3517                 return -ENOMEM;
3518         }
3519
3520         rec->devid = key->offset;
3521         rec->generation = btrfs_header_generation(eb);
3522
3523         rec->objectid = key->objectid;
3524         rec->type = key->type;
3525         rec->offset = key->offset;
3526
3527         rec->devid = btrfs_device_id(eb, ptr);
3528         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3529         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3530
3531         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3532         if (ret) {
3533                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3534                 free(rec);
3535         }
3536
3537         return ret;
3538 }
3539
3540 struct block_group_record *
3541 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3542                              int slot)
3543 {
3544         struct btrfs_block_group_item *ptr;
3545         struct block_group_record *rec;
3546
3547         rec = malloc(sizeof(*rec));
3548         if (!rec) {
3549                 fprintf(stderr, "memory allocation failed\n");
3550                 exit(-1);
3551         }
3552         memset(rec, 0, sizeof(*rec));
3553
3554         rec->cache.start = key->objectid;
3555         rec->cache.size = key->offset;
3556
3557         rec->generation = btrfs_header_generation(leaf);
3558
3559         rec->objectid = key->objectid;
3560         rec->type = key->type;
3561         rec->offset = key->offset;
3562
3563         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3564         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3565
3566         INIT_LIST_HEAD(&rec->list);
3567
3568         return rec;
3569 }
3570
3571 static int process_block_group_item(struct block_group_tree *block_group_cache,
3572                                     struct btrfs_key *key,
3573                                     struct extent_buffer *eb, int slot)
3574 {
3575         struct block_group_record *rec;
3576         int ret = 0;
3577
3578         rec = btrfs_new_block_group_record(eb, key, slot);
3579         ret = insert_block_group_record(block_group_cache, rec);
3580         if (ret) {
3581                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3582                         rec->objectid, rec->offset);
3583                 free(rec);
3584         }
3585
3586         return ret;
3587 }
3588
3589 struct device_extent_record *
3590 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3591                                struct btrfs_key *key, int slot)
3592 {
3593         struct device_extent_record *rec;
3594         struct btrfs_dev_extent *ptr;
3595
3596         rec = malloc(sizeof(*rec));
3597         if (!rec) {
3598                 fprintf(stderr, "memory allocation failed\n");
3599                 exit(-1);
3600         }
3601         memset(rec, 0, sizeof(*rec));
3602
3603         rec->cache.objectid = key->objectid;
3604         rec->cache.start = key->offset;
3605
3606         rec->generation = btrfs_header_generation(leaf);
3607
3608         rec->objectid = key->objectid;
3609         rec->type = key->type;
3610         rec->offset = key->offset;
3611
3612         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3613         rec->chunk_objecteid =
3614                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3615         rec->chunk_offset =
3616                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3617         rec->length = btrfs_dev_extent_length(leaf, ptr);
3618         rec->cache.size = rec->length;
3619
3620         INIT_LIST_HEAD(&rec->chunk_list);
3621         INIT_LIST_HEAD(&rec->device_list);
3622
3623         return rec;
3624 }
3625
3626 static int
3627 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3628                            struct btrfs_key *key, struct extent_buffer *eb,
3629                            int slot)
3630 {
3631         struct device_extent_record *rec;
3632         int ret;
3633
3634         rec = btrfs_new_device_extent_record(eb, key, slot);
3635         ret = insert_device_extent_record(dev_extent_cache, rec);
3636         if (ret) {
3637                 fprintf(stderr,
3638                         "Device extent[%llu, %llu, %llu] existed.\n",
3639                         rec->objectid, rec->offset, rec->length);
3640                 free(rec);
3641         }
3642
3643         return ret;
3644 }
3645
3646 static int process_extent_item(struct btrfs_root *root,
3647                                struct cache_tree *extent_cache,
3648                                struct extent_buffer *eb, int slot)
3649 {
3650         struct btrfs_extent_item *ei;
3651         struct btrfs_extent_inline_ref *iref;
3652         struct btrfs_extent_data_ref *dref;
3653         struct btrfs_shared_data_ref *sref;
3654         struct btrfs_key key;
3655         unsigned long end;
3656         unsigned long ptr;
3657         int type;
3658         u32 item_size = btrfs_item_size_nr(eb, slot);
3659         u64 refs = 0;
3660         u64 offset;
3661         u64 num_bytes;
3662         int metadata = 0;
3663
3664         btrfs_item_key_to_cpu(eb, &key, slot);
3665
3666         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3667                 metadata = 1;
3668                 num_bytes = root->leafsize;
3669         } else {
3670                 num_bytes = key.offset;
3671         }
3672
3673         if (item_size < sizeof(*ei)) {
3674 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3675                 struct btrfs_extent_item_v0 *ei0;
3676                 BUG_ON(item_size != sizeof(*ei0));
3677                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3678                 refs = btrfs_extent_refs_v0(eb, ei0);
3679 #else
3680                 BUG();
3681 #endif
3682                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3683                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3684                                       num_bytes);
3685         }
3686
3687         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3688         refs = btrfs_extent_refs(eb, ei);
3689
3690         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3691                        refs, 0, 0, 0, metadata, 1, num_bytes);
3692
3693         ptr = (unsigned long)(ei + 1);
3694         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3695             key.type == BTRFS_EXTENT_ITEM_KEY)
3696                 ptr += sizeof(struct btrfs_tree_block_info);
3697
3698         end = (unsigned long)ei + item_size;
3699         while (ptr < end) {
3700                 iref = (struct btrfs_extent_inline_ref *)ptr;
3701                 type = btrfs_extent_inline_ref_type(eb, iref);
3702                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3703                 switch (type) {
3704                 case BTRFS_TREE_BLOCK_REF_KEY:
3705                         add_tree_backref(extent_cache, key.objectid,
3706                                          0, offset, 0);
3707                         break;
3708                 case BTRFS_SHARED_BLOCK_REF_KEY:
3709                         add_tree_backref(extent_cache, key.objectid,
3710                                          offset, 0, 0);
3711                         break;
3712                 case BTRFS_EXTENT_DATA_REF_KEY:
3713                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3714                         add_data_backref(extent_cache, key.objectid, 0,
3715                                         btrfs_extent_data_ref_root(eb, dref),
3716                                         btrfs_extent_data_ref_objectid(eb,
3717                                                                        dref),
3718                                         btrfs_extent_data_ref_offset(eb, dref),
3719                                         btrfs_extent_data_ref_count(eb, dref),
3720                                         0, num_bytes);
3721                         break;
3722                 case BTRFS_SHARED_DATA_REF_KEY:
3723                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3724                         add_data_backref(extent_cache, key.objectid, offset,
3725                                         0, 0, 0,
3726                                         btrfs_shared_data_ref_count(eb, sref),
3727                                         0, num_bytes);
3728                         break;
3729                 default:
3730                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3731                                 key.objectid, key.type, num_bytes);
3732                         goto out;
3733                 }
3734                 ptr += btrfs_extent_inline_ref_size(type);
3735         }
3736         WARN_ON(ptr > end);
3737 out:
3738         return 0;
3739 }
3740
3741 static int check_cache_range(struct btrfs_root *root,
3742                              struct btrfs_block_group_cache *cache,
3743                              u64 offset, u64 bytes)
3744 {
3745         struct btrfs_free_space *entry;
3746         u64 *logical;
3747         u64 bytenr;
3748         int stripe_len;
3749         int i, nr, ret;
3750
3751         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3752                 bytenr = btrfs_sb_offset(i);
3753                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3754                                        cache->key.objectid, bytenr, 0,
3755                                        &logical, &nr, &stripe_len);
3756                 if (ret)
3757                         return ret;
3758
3759                 while (nr--) {
3760                         if (logical[nr] + stripe_len <= offset)
3761                                 continue;
3762                         if (offset + bytes <= logical[nr])
3763                                 continue;
3764                         if (logical[nr] == offset) {
3765                                 if (stripe_len >= bytes) {
3766                                         kfree(logical);
3767                                         return 0;
3768                                 }
3769                                 bytes -= stripe_len;
3770                                 offset += stripe_len;
3771                         } else if (logical[nr] < offset) {
3772                                 if (logical[nr] + stripe_len >=
3773                                     offset + bytes) {
3774                                         kfree(logical);
3775                                         return 0;
3776                                 }
3777                                 bytes = (offset + bytes) -
3778                                         (logical[nr] + stripe_len);
3779                                 offset = logical[nr] + stripe_len;
3780                         } else {
3781                                 /*
3782                                  * Could be tricky, the super may land in the
3783                                  * middle of the area we're checking.  First
3784                                  * check the easiest case, it's at the end.
3785                                  */
3786                                 if (logical[nr] + stripe_len >=
3787                                     bytes + offset) {
3788                                         bytes = logical[nr] - offset;
3789                                         continue;
3790                                 }
3791
3792                                 /* Check the left side */
3793                                 ret = check_cache_range(root, cache,
3794                                                         offset,
3795                                                         logical[nr] - offset);
3796                                 if (ret) {
3797                                         kfree(logical);
3798                                         return ret;
3799                                 }
3800
3801                                 /* Now we continue with the right side */
3802                                 bytes = (offset + bytes) -
3803                                         (logical[nr] + stripe_len);
3804                                 offset = logical[nr] + stripe_len;
3805                         }
3806                 }
3807
3808                 kfree(logical);
3809         }
3810
3811         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3812         if (!entry) {
3813                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3814                         offset, offset+bytes);
3815                 return -EINVAL;
3816         }
3817
3818         if (entry->offset != offset) {
3819                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3820                         entry->offset);
3821                 return -EINVAL;
3822         }
3823
3824         if (entry->bytes != bytes) {
3825                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3826                         bytes, entry->bytes, offset);
3827                 return -EINVAL;
3828         }
3829
3830         unlink_free_space(cache->free_space_ctl, entry);
3831         free(entry);
3832         return 0;
3833 }
3834
3835 static int verify_space_cache(struct btrfs_root *root,
3836                               struct btrfs_block_group_cache *cache)
3837 {
3838         struct btrfs_path *path;
3839         struct extent_buffer *leaf;
3840         struct btrfs_key key;
3841         u64 last;
3842         int ret = 0;
3843
3844         path = btrfs_alloc_path();
3845         if (!path)
3846                 return -ENOMEM;
3847
3848         root = root->fs_info->extent_root;
3849
3850         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3851
3852         key.objectid = last;
3853         key.offset = 0;
3854         key.type = BTRFS_EXTENT_ITEM_KEY;
3855
3856         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3857         if (ret < 0)
3858                 goto out;
3859         ret = 0;
3860         while (1) {
3861                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3862                         ret = btrfs_next_leaf(root, path);
3863                         if (ret < 0)
3864                                 goto out;
3865                         if (ret > 0) {
3866                                 ret = 0;
3867                                 break;
3868                         }
3869                 }
3870                 leaf = path->nodes[0];
3871                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3872                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3873                         break;
3874                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3875                     key.type != BTRFS_METADATA_ITEM_KEY) {
3876                         path->slots[0]++;
3877                         continue;
3878                 }
3879
3880                 if (last == key.objectid) {
3881                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3882                                 last = key.objectid + key.offset;
3883                         else
3884                                 last = key.objectid + root->leafsize;
3885                         path->slots[0]++;
3886                         continue;
3887                 }
3888
3889                 ret = check_cache_range(root, cache, last,
3890                                         key.objectid - last);
3891                 if (ret)
3892                         break;
3893                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3894                         last = key.objectid + key.offset;
3895                 else
3896                         last = key.objectid + root->leafsize;
3897                 path->slots[0]++;
3898         }
3899
3900         if (last < cache->key.objectid + cache->key.offset)
3901                 ret = check_cache_range(root, cache, last,
3902                                         cache->key.objectid +
3903                                         cache->key.offset - last);
3904
3905 out:
3906         btrfs_free_path(path);
3907
3908         if (!ret &&
3909             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3910                 fprintf(stderr, "There are still entries left in the space "
3911                         "cache\n");
3912                 ret = -EINVAL;
3913         }
3914
3915         return ret;
3916 }
3917
3918 static int check_space_cache(struct btrfs_root *root)
3919 {
3920         struct btrfs_block_group_cache *cache;
3921         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3922         int ret;
3923         int error = 0;
3924
3925         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3926             btrfs_super_generation(root->fs_info->super_copy) !=
3927             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3928                 printf("cache and super generation don't match, space cache "
3929                        "will be invalidated\n");
3930                 return 0;
3931         }
3932
3933         while (1) {
3934                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3935                 if (!cache)
3936                         break;
3937
3938                 start = cache->key.objectid + cache->key.offset;
3939                 if (!cache->free_space_ctl) {
3940                         if (btrfs_init_free_space_ctl(cache,
3941                                                       root->sectorsize)) {
3942                                 ret = -ENOMEM;
3943                                 break;
3944                         }
3945                 } else {
3946                         btrfs_remove_free_space_cache(cache);
3947                 }
3948
3949                 ret = load_free_space_cache(root->fs_info, cache);
3950                 if (!ret)
3951                         continue;
3952
3953                 ret = verify_space_cache(root, cache);
3954                 if (ret) {
3955                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3956                                 cache->key.objectid);
3957                         error++;
3958                 }
3959         }
3960
3961         return error ? -EINVAL : 0;
3962 }
3963
3964 static int read_extent_data(struct btrfs_root *root, char *data,
3965                         u64 logical, u64 *len, int mirror)
3966 {
3967         u64 offset = 0;
3968         struct btrfs_multi_bio *multi = NULL;
3969         struct btrfs_fs_info *info = root->fs_info;
3970         struct btrfs_device *device;
3971         int ret = 0;
3972         u64 max_len = *len;
3973
3974         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
3975                               &multi, mirror, NULL);
3976         if (ret) {
3977                 fprintf(stderr, "Couldn't map the block %llu\n",
3978                                 logical + offset);
3979                 goto err;
3980         }
3981         device = multi->stripes[0].dev;
3982
3983         if (device->fd == 0)
3984                 goto err;
3985         if (*len > max_len)
3986                 *len = max_len;
3987
3988         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
3989         if (ret != *len)
3990                 ret = -EIO;
3991         else
3992                 ret = 0;
3993 err:
3994         kfree(multi);
3995         return ret;
3996 }
3997
3998 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
3999                         u64 num_bytes, unsigned long leaf_offset,
4000                         struct extent_buffer *eb) {
4001
4002         u64 offset = 0;
4003         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
4004         char *data;
4005         unsigned long csum_offset;
4006         u32 csum;
4007         u32 csum_expected;
4008         u64 read_len;
4009         u64 data_checked = 0;
4010         u64 tmp;
4011         int ret = 0;
4012         int mirror;
4013         int num_copies;
4014
4015         if (num_bytes % root->sectorsize)
4016                 return -EINVAL;
4017
4018         data = malloc(num_bytes);
4019         if (!data)
4020                 return -ENOMEM;
4021
4022         while (offset < num_bytes) {
4023                 mirror = 0;
4024 again:
4025                 read_len = num_bytes - offset;
4026                 /* read as much space once a time */
4027                 ret = read_extent_data(root, data + offset,
4028                                 bytenr + offset, &read_len, mirror);
4029                 if (ret)
4030                         goto out;
4031                 data_checked = 0;
4032                 /* verify every 4k data's checksum */
4033                 while (data_checked < read_len) {
4034                         csum = ~(u32)0;
4035                         tmp = offset + data_checked;
4036
4037                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
4038                                                csum, root->sectorsize);
4039                         btrfs_csum_final(csum, (char *)&csum);
4040
4041                         csum_offset = leaf_offset +
4042                                  tmp / root->sectorsize * csum_size;
4043                         read_extent_buffer(eb, (char *)&csum_expected,
4044                                            csum_offset, csum_size);
4045                         /* try another mirror */
4046                         if (csum != csum_expected) {
4047                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
4048                                                 mirror, bytenr + tmp,
4049                                                 csum, csum_expected);
4050                                 num_copies = btrfs_num_copies(
4051                                                 &root->fs_info->mapping_tree,
4052                                                 bytenr, num_bytes);
4053                                 if (mirror < num_copies - 1) {
4054                                         mirror += 1;
4055                                         goto again;
4056                                 }
4057                         }
4058                         data_checked += root->sectorsize;
4059                 }
4060                 offset += read_len;
4061         }
4062 out:
4063         free(data);
4064         return ret;
4065 }
4066
4067 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
4068                                u64 num_bytes)
4069 {
4070         struct btrfs_path *path;
4071         struct extent_buffer *leaf;
4072         struct btrfs_key key;
4073         int ret;
4074
4075         path = btrfs_alloc_path();
4076         if (!path) {
4077                 fprintf(stderr, "Error allocing path\n");
4078                 return -ENOMEM;
4079         }
4080
4081         key.objectid = bytenr;
4082         key.type = BTRFS_EXTENT_ITEM_KEY;
4083         key.offset = (u64)-1;
4084
4085 again:
4086         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
4087                                 0, 0);
4088         if (ret < 0) {
4089                 fprintf(stderr, "Error looking up extent record %d\n", ret);
4090                 btrfs_free_path(path);
4091                 return ret;
4092         } else if (ret) {
4093                 if (path->slots[0] > 0) {
4094                         path->slots[0]--;
4095                 } else {
4096                         ret = btrfs_prev_leaf(root, path);
4097                         if (ret < 0) {
4098                                 goto out;
4099                         } else if (ret > 0) {
4100                                 ret = 0;
4101                                 goto out;
4102                         }
4103                 }
4104         }
4105
4106         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4107
4108         /*
4109          * Block group items come before extent items if they have the same
4110          * bytenr, so walk back one more just in case.  Dear future traveler,
4111          * first congrats on mastering time travel.  Now if it's not too much
4112          * trouble could you go back to 2006 and tell Chris to make the
4113          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
4114          * EXTENT_ITEM_KEY please?
4115          */
4116         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
4117                 if (path->slots[0] > 0) {
4118                         path->slots[0]--;
4119                 } else {
4120                         ret = btrfs_prev_leaf(root, path);
4121                         if (ret < 0) {
4122                                 goto out;
4123                         } else if (ret > 0) {
4124                                 ret = 0;
4125                                 goto out;
4126                         }
4127                 }
4128                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4129         }
4130
4131         while (num_bytes) {
4132                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4133                         ret = btrfs_next_leaf(root, path);
4134                         if (ret < 0) {
4135                                 fprintf(stderr, "Error going to next leaf "
4136                                         "%d\n", ret);
4137                                 btrfs_free_path(path);
4138                                 return ret;
4139                         } else if (ret) {
4140                                 break;
4141                         }
4142                 }
4143                 leaf = path->nodes[0];
4144                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4145                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
4146                         path->slots[0]++;
4147                         continue;
4148                 }
4149                 if (key.objectid + key.offset < bytenr) {
4150                         path->slots[0]++;
4151                         continue;
4152                 }
4153                 if (key.objectid > bytenr + num_bytes)
4154                         break;
4155
4156                 if (key.objectid == bytenr) {
4157                         if (key.offset >= num_bytes) {
4158                                 num_bytes = 0;
4159                                 break;
4160                         }
4161                         num_bytes -= key.offset;
4162                         bytenr += key.offset;
4163                 } else if (key.objectid < bytenr) {
4164                         if (key.objectid + key.offset >= bytenr + num_bytes) {
4165                                 num_bytes = 0;
4166                                 break;
4167                         }
4168                         num_bytes = (bytenr + num_bytes) -
4169                                 (key.objectid + key.offset);
4170                         bytenr = key.objectid + key.offset;
4171                 } else {
4172                         if (key.objectid + key.offset < bytenr + num_bytes) {
4173                                 u64 new_start = key.objectid + key.offset;
4174                                 u64 new_bytes = bytenr + num_bytes - new_start;
4175
4176                                 /*
4177                                  * Weird case, the extent is in the middle of
4178                                  * our range, we'll have to search one side
4179                                  * and then the other.  Not sure if this happens
4180                                  * in real life, but no harm in coding it up
4181                                  * anyway just in case.
4182                                  */
4183                                 btrfs_release_path(path);
4184                                 ret = check_extent_exists(root, new_start,
4185                                                           new_bytes);
4186                                 if (ret) {
4187                                         fprintf(stderr, "Right section didn't "
4188                                                 "have a record\n");
4189                                         break;
4190                                 }
4191                                 num_bytes = key.objectid - bytenr;
4192                                 goto again;
4193                         }
4194                         num_bytes = key.objectid - bytenr;
4195                 }
4196                 path->slots[0]++;
4197         }
4198         ret = 0;
4199
4200 out:
4201         if (num_bytes && !ret) {
4202                 fprintf(stderr, "There are no extents for csum range "
4203                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
4204                 ret = 1;
4205         }
4206
4207         btrfs_free_path(path);
4208         return ret;
4209 }
4210
4211 static int check_csums(struct btrfs_root *root)
4212 {
4213         struct btrfs_path *path;
4214         struct extent_buffer *leaf;
4215         struct btrfs_key key;
4216         u64 offset = 0, num_bytes = 0;
4217         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
4218         int errors = 0;
4219         int ret;
4220         u64 data_len;
4221         unsigned long leaf_offset;
4222
4223         root = root->fs_info->csum_root;
4224         if (!extent_buffer_uptodate(root->node)) {
4225                 fprintf(stderr, "No valid csum tree found\n");
4226                 return -ENOENT;
4227         }
4228
4229         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
4230         key.type = BTRFS_EXTENT_CSUM_KEY;
4231         key.offset = 0;
4232
4233         path = btrfs_alloc_path();
4234         if (!path)
4235                 return -ENOMEM;
4236
4237         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4238         if (ret < 0) {
4239                 fprintf(stderr, "Error searching csum tree %d\n", ret);
4240                 btrfs_free_path(path);
4241                 return ret;
4242         }
4243
4244         if (ret > 0 && path->slots[0])
4245                 path->slots[0]--;
4246         ret = 0;
4247
4248         while (1) {
4249                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4250                         ret = btrfs_next_leaf(root, path);
4251                         if (ret < 0) {
4252                                 fprintf(stderr, "Error going to next leaf "
4253                                         "%d\n", ret);
4254                                 break;
4255                         }
4256                         if (ret)
4257                                 break;
4258                 }
4259                 leaf = path->nodes[0];
4260
4261                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4262                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
4263                         path->slots[0]++;
4264                         continue;
4265                 }
4266
4267                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
4268                               csum_size) * root->sectorsize;
4269                 if (!check_data_csum)
4270                         goto skip_csum_check;
4271                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
4272                 ret = check_extent_csums(root, key.offset, data_len,
4273                                          leaf_offset, leaf);
4274                 if (ret)
4275                         break;
4276 skip_csum_check:
4277                 if (!num_bytes) {
4278                         offset = key.offset;
4279                 } else if (key.offset != offset + num_bytes) {
4280                         ret = check_extent_exists(root, offset, num_bytes);
4281                         if (ret) {
4282                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
4283                                         "there is no extent record\n",
4284                                         offset, offset+num_bytes);
4285                                 errors++;
4286                         }
4287                         offset = key.offset;
4288                         num_bytes = 0;
4289                 }
4290                 num_bytes += data_len;
4291                 path->slots[0]++;
4292         }
4293
4294         btrfs_free_path(path);
4295         return errors;
4296 }
4297
4298 static int is_dropped_key(struct btrfs_key *key,
4299                           struct btrfs_key *drop_key) {
4300         if (key->objectid < drop_key->objectid)
4301                 return 1;
4302         else if (key->objectid == drop_key->objectid) {
4303                 if (key->type < drop_key->type)
4304                         return 1;
4305                 else if (key->type == drop_key->type) {
4306                         if (key->offset < drop_key->offset)
4307                                 return 1;
4308                 }
4309         }
4310         return 0;
4311 }
4312
4313 static int run_next_block(struct btrfs_trans_handle *trans,
4314                           struct btrfs_root *root,
4315                           struct block_info *bits,
4316                           int bits_nr,
4317                           u64 *last,
4318                           struct cache_tree *pending,
4319                           struct cache_tree *seen,
4320                           struct cache_tree *reada,
4321                           struct cache_tree *nodes,
4322                           struct cache_tree *extent_cache,
4323                           struct cache_tree *chunk_cache,
4324                           struct rb_root *dev_cache,
4325                           struct block_group_tree *block_group_cache,
4326                           struct device_extent_tree *dev_extent_cache,
4327                           struct btrfs_root_item *ri)
4328 {
4329         struct extent_buffer *buf;
4330         u64 bytenr;
4331         u32 size;
4332         u64 parent;
4333         u64 owner;
4334         u64 flags;
4335         u64 ptr;
4336         u64 gen = 0;
4337         int ret = 0;
4338         int i;
4339         int nritems;
4340         struct btrfs_key key;
4341         struct cache_extent *cache;
4342         int reada_bits;
4343
4344         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
4345                                     bits_nr, &reada_bits);
4346         if (nritems == 0)
4347                 return 1;
4348
4349         if (!reada_bits) {
4350                 for(i = 0; i < nritems; i++) {
4351                         ret = add_cache_extent(reada, bits[i].start,
4352                                                bits[i].size);
4353                         if (ret == -EEXIST)
4354                                 continue;
4355
4356                         /* fixme, get the parent transid */
4357                         readahead_tree_block(root, bits[i].start,
4358                                              bits[i].size, 0);
4359                 }
4360         }
4361         *last = bits[0].start;
4362         bytenr = bits[0].start;
4363         size = bits[0].size;
4364
4365         cache = lookup_cache_extent(pending, bytenr, size);
4366         if (cache) {
4367                 remove_cache_extent(pending, cache);
4368                 free(cache);
4369         }
4370         cache = lookup_cache_extent(reada, bytenr, size);
4371         if (cache) {
4372                 remove_cache_extent(reada, cache);
4373                 free(cache);
4374         }
4375         cache = lookup_cache_extent(nodes, bytenr, size);
4376         if (cache) {
4377                 remove_cache_extent(nodes, cache);
4378                 free(cache);
4379         }
4380         cache = lookup_cache_extent(extent_cache, bytenr, size);
4381         if (cache) {
4382                 struct extent_record *rec;
4383
4384                 rec = container_of(cache, struct extent_record, cache);
4385                 gen = rec->parent_generation;
4386         }
4387
4388         /* fixme, get the real parent transid */
4389         buf = read_tree_block(root, bytenr, size, gen);
4390         if (!extent_buffer_uptodate(buf)) {
4391                 record_bad_block_io(root->fs_info,
4392                                     extent_cache, bytenr, size);
4393                 goto out;
4394         }
4395
4396         nritems = btrfs_header_nritems(buf);
4397
4398         /*
4399          * FIXME, this only works only if we don't have any full
4400          * backref mode.
4401          */
4402         if (!init_extent_tree) {
4403                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4404                                        btrfs_header_level(buf), 1, NULL,
4405                                        &flags);
4406                 if (ret < 0)
4407                         flags = 0;
4408         } else {
4409                 flags = 0;
4410         }
4411
4412         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4413                 parent = bytenr;
4414                 owner = 0;
4415         } else {
4416                 parent = 0;
4417                 owner = btrfs_header_owner(buf);
4418         }
4419
4420         ret = check_block(trans, root, extent_cache, buf, flags);
4421         if (ret)
4422                 goto out;
4423
4424         if (btrfs_is_leaf(buf)) {
4425                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4426                 for (i = 0; i < nritems; i++) {
4427                         struct btrfs_file_extent_item *fi;
4428                         btrfs_item_key_to_cpu(buf, &key, i);
4429                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4430                                 process_extent_item(root, extent_cache, buf,
4431                                                     i);
4432                                 continue;
4433                         }
4434                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4435                                 process_extent_item(root, extent_cache, buf,
4436                                                     i);
4437                                 continue;
4438                         }
4439                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4440                                 total_csum_bytes +=
4441                                         btrfs_item_size_nr(buf, i);
4442                                 continue;
4443                         }
4444                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4445                                 process_chunk_item(chunk_cache, &key, buf, i);
4446                                 continue;
4447                         }
4448                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4449                                 process_device_item(dev_cache, &key, buf, i);
4450                                 continue;
4451                         }
4452                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4453                                 process_block_group_item(block_group_cache,
4454                                         &key, buf, i);
4455                                 continue;
4456                         }
4457                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4458                                 process_device_extent_item(dev_extent_cache,
4459                                         &key, buf, i);
4460                                 continue;
4461
4462                         }
4463                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4464 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4465                                 process_extent_ref_v0(extent_cache, buf, i);
4466 #else
4467                                 BUG();
4468 #endif
4469                                 continue;
4470                         }
4471
4472                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4473                                 add_tree_backref(extent_cache, key.objectid, 0,
4474                                                  key.offset, 0);
4475                                 continue;
4476                         }
4477                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4478                                 add_tree_backref(extent_cache, key.objectid,
4479                                                  key.offset, 0, 0);
4480                                 continue;
4481                         }
4482                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4483                                 struct btrfs_extent_data_ref *ref;
4484                                 ref = btrfs_item_ptr(buf, i,
4485                                                 struct btrfs_extent_data_ref);
4486                                 add_data_backref(extent_cache,
4487                                         key.objectid, 0,
4488                                         btrfs_extent_data_ref_root(buf, ref),
4489                                         btrfs_extent_data_ref_objectid(buf,
4490                                                                        ref),
4491                                         btrfs_extent_data_ref_offset(buf, ref),
4492                                         btrfs_extent_data_ref_count(buf, ref),
4493                                         0, root->sectorsize);
4494                                 continue;
4495                         }
4496                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4497                                 struct btrfs_shared_data_ref *ref;
4498                                 ref = btrfs_item_ptr(buf, i,
4499                                                 struct btrfs_shared_data_ref);
4500                                 add_data_backref(extent_cache,
4501                                         key.objectid, key.offset, 0, 0, 0,
4502                                         btrfs_shared_data_ref_count(buf, ref),
4503                                         0, root->sectorsize);
4504                                 continue;
4505                         }
4506                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4507                                 struct bad_item *bad;
4508
4509                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4510                                         continue;
4511                                 if (!owner)
4512                                         continue;
4513                                 bad = malloc(sizeof(struct bad_item));
4514                                 if (!bad)
4515                                         continue;
4516                                 INIT_LIST_HEAD(&bad->list);
4517                                 memcpy(&bad->key, &key,
4518                                        sizeof(struct btrfs_key));
4519                                 bad->root_id = owner;
4520                                 list_add_tail(&bad->list, &delete_items);
4521                                 continue;
4522                         }
4523                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4524                                 continue;
4525                         fi = btrfs_item_ptr(buf, i,
4526                                             struct btrfs_file_extent_item);
4527                         if (btrfs_file_extent_type(buf, fi) ==
4528                             BTRFS_FILE_EXTENT_INLINE)
4529                                 continue;
4530                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4531                                 continue;
4532
4533                         data_bytes_allocated +=
4534                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4535                         if (data_bytes_allocated < root->sectorsize) {
4536                                 abort();
4537                         }
4538                         data_bytes_referenced +=
4539                                 btrfs_file_extent_num_bytes(buf, fi);
4540                         add_data_backref(extent_cache,
4541                                 btrfs_file_extent_disk_bytenr(buf, fi),
4542                                 parent, owner, key.objectid, key.offset -
4543                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4544                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4545                 }
4546         } else {
4547                 int level;
4548                 struct btrfs_key first_key;
4549
4550                 first_key.objectid = 0;
4551
4552                 if (nritems > 0)
4553                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4554                 level = btrfs_header_level(buf);
4555                 for (i = 0; i < nritems; i++) {
4556                         ptr = btrfs_node_blockptr(buf, i);
4557                         size = btrfs_level_size(root, level - 1);
4558                         btrfs_node_key_to_cpu(buf, &key, i);
4559                         if (ri != NULL) {
4560                                 struct btrfs_key drop_key;
4561                                 btrfs_disk_key_to_cpu(&drop_key,
4562                                                       &ri->drop_progress);
4563                                 if ((level == ri->drop_level)
4564                                     && is_dropped_key(&key, &drop_key)) {
4565                                         continue;
4566                                 }
4567                         }
4568                         ret = add_extent_rec(extent_cache, &key,
4569                                              btrfs_node_ptr_generation(buf, i),
4570                                              ptr, size, 0, 0, 1, 0, 1, 0,
4571                                              size);
4572                         BUG_ON(ret);
4573
4574                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4575
4576                         if (level > 1) {
4577                                 add_pending(nodes, seen, ptr, size);
4578                         } else {
4579                                 add_pending(pending, seen, ptr, size);
4580                         }
4581                 }
4582                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4583                                       nritems) * sizeof(struct btrfs_key_ptr);
4584         }
4585         total_btree_bytes += buf->len;
4586         if (fs_root_objectid(btrfs_header_owner(buf)))
4587                 total_fs_tree_bytes += buf->len;
4588         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4589                 total_extent_tree_bytes += buf->len;
4590         if (!found_old_backref &&
4591             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4592             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4593             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4594                 found_old_backref = 1;
4595 out:
4596         free_extent_buffer(buf);
4597         return ret;
4598 }
4599
4600 static int add_root_to_pending(struct extent_buffer *buf,
4601                                struct cache_tree *extent_cache,
4602                                struct cache_tree *pending,
4603                                struct cache_tree *seen,
4604                                struct cache_tree *nodes,
4605                                struct btrfs_key *root_key)
4606 {
4607         if (btrfs_header_level(buf) > 0)
4608                 add_pending(nodes, seen, buf->start, buf->len);
4609         else
4610                 add_pending(pending, seen, buf->start, buf->len);
4611         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4612                        0, 1, 1, 0, 1, 0, buf->len);
4613
4614         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4615             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4616                 add_tree_backref(extent_cache, buf->start, buf->start,
4617                                  0, 1);
4618         else
4619                 add_tree_backref(extent_cache, buf->start, 0,
4620                                  root_key->objectid, 1);
4621         return 0;
4622 }
4623
4624 /* as we fix the tree, we might be deleting blocks that
4625  * we're tracking for repair.  This hook makes sure we
4626  * remove any backrefs for blocks as we are fixing them.
4627  */
4628 static int free_extent_hook(struct btrfs_trans_handle *trans,
4629                             struct btrfs_root *root,
4630                             u64 bytenr, u64 num_bytes, u64 parent,
4631                             u64 root_objectid, u64 owner, u64 offset,
4632                             int refs_to_drop)
4633 {
4634         struct extent_record *rec;
4635         struct cache_extent *cache;
4636         int is_data;
4637         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4638
4639         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4640         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4641         if (!cache)
4642                 return 0;
4643
4644         rec = container_of(cache, struct extent_record, cache);
4645         if (is_data) {
4646                 struct data_backref *back;
4647                 back = find_data_backref(rec, parent, root_objectid, owner,
4648                                          offset, 1, bytenr, num_bytes);
4649                 if (!back)
4650                         goto out;
4651                 if (back->node.found_ref) {
4652                         back->found_ref -= refs_to_drop;
4653                         if (rec->refs)
4654                                 rec->refs -= refs_to_drop;
4655                 }
4656                 if (back->node.found_extent_tree) {
4657                         back->num_refs -= refs_to_drop;
4658                         if (rec->extent_item_refs)
4659                                 rec->extent_item_refs -= refs_to_drop;
4660                 }
4661                 if (back->found_ref == 0)
4662                         back->node.found_ref = 0;
4663                 if (back->num_refs == 0)
4664                         back->node.found_extent_tree = 0;
4665
4666                 if (!back->node.found_extent_tree && back->node.found_ref) {
4667                         list_del(&back->node.list);
4668                         free(back);
4669                 }
4670         } else {
4671                 struct tree_backref *back;
4672                 back = find_tree_backref(rec, parent, root_objectid);
4673                 if (!back)
4674                         goto out;
4675                 if (back->node.found_ref) {
4676                         if (rec->refs)
4677                                 rec->refs--;
4678                         back->node.found_ref = 0;
4679                 }
4680                 if (back->node.found_extent_tree) {
4681                         if (rec->extent_item_refs)
4682                                 rec->extent_item_refs--;
4683                         back->node.found_extent_tree = 0;
4684                 }
4685                 if (!back->node.found_extent_tree && back->node.found_ref) {
4686                         list_del(&back->node.list);
4687                         free(back);
4688                 }
4689         }
4690         maybe_free_extent_rec(extent_cache, rec);
4691 out:
4692         return 0;
4693 }
4694
4695 static int delete_extent_records(struct btrfs_trans_handle *trans,
4696                                  struct btrfs_root *root,
4697                                  struct btrfs_path *path,
4698                                  u64 bytenr, u64 new_len)
4699 {
4700         struct btrfs_key key;
4701         struct btrfs_key found_key;
4702         struct extent_buffer *leaf;
4703         int ret;
4704         int slot;
4705
4706
4707         key.objectid = bytenr;
4708         key.type = (u8)-1;
4709         key.offset = (u64)-1;
4710
4711         while(1) {
4712                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4713                                         &key, path, 0, 1);
4714                 if (ret < 0)
4715                         break;
4716
4717                 if (ret > 0) {
4718                         ret = 0;
4719                         if (path->slots[0] == 0)
4720                                 break;
4721                         path->slots[0]--;
4722                 }
4723                 ret = 0;
4724
4725                 leaf = path->nodes[0];
4726                 slot = path->slots[0];
4727
4728                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4729                 if (found_key.objectid != bytenr)
4730                         break;
4731
4732                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4733                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4734                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4735                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4736                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4737                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4738                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4739                         btrfs_release_path(path);
4740                         if (found_key.type == 0) {
4741                                 if (found_key.offset == 0)
4742                                         break;
4743                                 key.offset = found_key.offset - 1;
4744                                 key.type = found_key.type;
4745                         }
4746                         key.type = found_key.type - 1;
4747                         key.offset = (u64)-1;
4748                         continue;
4749                 }
4750
4751                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4752                         found_key.objectid, found_key.type, found_key.offset);
4753
4754                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4755                 if (ret)
4756                         break;
4757                 btrfs_release_path(path);
4758
4759                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4760                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4761                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4762                                 found_key.offset : root->leafsize;
4763
4764                         ret = btrfs_update_block_group(trans, root, bytenr,
4765                                                        bytes, 0, 0);
4766                         if (ret)
4767                                 break;
4768                 }
4769         }
4770
4771         btrfs_release_path(path);
4772         return ret;
4773 }
4774
4775 /*
4776  * for a single backref, this will allocate a new extent
4777  * and add the backref to it.
4778  */
4779 static int record_extent(struct btrfs_trans_handle *trans,
4780                          struct btrfs_fs_info *info,
4781                          struct btrfs_path *path,
4782                          struct extent_record *rec,
4783                          struct extent_backref *back,
4784                          int allocated, u64 flags)
4785 {
4786         int ret;
4787         struct btrfs_root *extent_root = info->extent_root;
4788         struct extent_buffer *leaf;
4789         struct btrfs_key ins_key;
4790         struct btrfs_extent_item *ei;
4791         struct tree_backref *tback;
4792         struct data_backref *dback;
4793         struct btrfs_tree_block_info *bi;
4794
4795         if (!back->is_data)
4796                 rec->max_size = max_t(u64, rec->max_size,
4797                                     info->extent_root->leafsize);
4798
4799         if (!allocated) {
4800                 u32 item_size = sizeof(*ei);
4801
4802                 if (!back->is_data)
4803                         item_size += sizeof(*bi);
4804
4805                 ins_key.objectid = rec->start;
4806                 ins_key.offset = rec->max_size;
4807                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4808
4809                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4810                                         &ins_key, item_size);
4811                 if (ret)
4812                         goto fail;
4813
4814                 leaf = path->nodes[0];
4815                 ei = btrfs_item_ptr(leaf, path->slots[0],
4816                                     struct btrfs_extent_item);
4817
4818                 btrfs_set_extent_refs(leaf, ei, 0);
4819                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4820
4821                 if (back->is_data) {
4822                         btrfs_set_extent_flags(leaf, ei,
4823                                                BTRFS_EXTENT_FLAG_DATA);
4824                 } else {
4825                         struct btrfs_disk_key copy_key;;
4826
4827                         tback = (struct tree_backref *)back;
4828                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4829                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4830                                              sizeof(*bi));
4831
4832                         btrfs_set_disk_key_objectid(&copy_key,
4833                                                     rec->info_objectid);
4834                         btrfs_set_disk_key_type(&copy_key, 0);
4835                         btrfs_set_disk_key_offset(&copy_key, 0);
4836
4837                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4838                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4839
4840                         btrfs_set_extent_flags(leaf, ei,
4841                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4842                 }
4843
4844                 btrfs_mark_buffer_dirty(leaf);
4845                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4846                                                rec->max_size, 1, 0);
4847                 if (ret)
4848                         goto fail;
4849                 btrfs_release_path(path);
4850         }
4851
4852         if (back->is_data) {
4853                 u64 parent;
4854                 int i;
4855
4856                 dback = (struct data_backref *)back;
4857                 if (back->full_backref)
4858                         parent = dback->parent;
4859                 else
4860                         parent = 0;
4861
4862                 for (i = 0; i < dback->found_ref; i++) {
4863                         /* if parent != 0, we're doing a full backref
4864                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4865                          * just makes the backref allocator create a data
4866                          * backref
4867                          */
4868                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4869                                                    rec->start, rec->max_size,
4870                                                    parent,
4871                                                    dback->root,
4872                                                    parent ?
4873                                                    BTRFS_FIRST_FREE_OBJECTID :
4874                                                    dback->owner,
4875                                                    dback->offset);
4876                         if (ret)
4877                                 break;
4878                 }
4879                 fprintf(stderr, "adding new data backref"
4880                                 " on %llu %s %llu owner %llu"
4881                                 " offset %llu found %d\n",
4882                                 (unsigned long long)rec->start,
4883                                 back->full_backref ?
4884                                 "parent" : "root",
4885                                 back->full_backref ?
4886                                 (unsigned long long)parent :
4887                                 (unsigned long long)dback->root,
4888                                 (unsigned long long)dback->owner,
4889                                 (unsigned long long)dback->offset,
4890                                 dback->found_ref);
4891         } else {
4892                 u64 parent;
4893
4894                 tback = (struct tree_backref *)back;
4895                 if (back->full_backref)
4896                         parent = tback->parent;
4897                 else
4898                         parent = 0;
4899
4900                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4901                                            rec->start, rec->max_size,
4902                                            parent, tback->root, 0, 0);
4903                 fprintf(stderr, "adding new tree backref on "
4904                         "start %llu len %llu parent %llu root %llu\n",
4905                         rec->start, rec->max_size, tback->parent, tback->root);
4906         }
4907         if (ret)
4908                 goto fail;
4909 fail:
4910         btrfs_release_path(path);
4911         return ret;
4912 }
4913
4914 struct extent_entry {
4915         u64 bytenr;
4916         u64 bytes;
4917         int count;
4918         int broken;
4919         struct list_head list;
4920 };
4921
4922 static struct extent_entry *find_entry(struct list_head *entries,
4923                                        u64 bytenr, u64 bytes)
4924 {
4925         struct extent_entry *entry = NULL;
4926
4927         list_for_each_entry(entry, entries, list) {
4928                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4929                         return entry;
4930         }
4931
4932         return NULL;
4933 }
4934
4935 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4936 {
4937         struct extent_entry *entry, *best = NULL, *prev = NULL;
4938
4939         list_for_each_entry(entry, entries, list) {
4940                 if (!prev) {
4941                         prev = entry;
4942                         continue;
4943                 }
4944
4945                 /*
4946                  * If there are as many broken entries as entries then we know
4947                  * not to trust this particular entry.
4948                  */
4949                 if (entry->broken == entry->count)
4950                         continue;
4951
4952                 /*
4953                  * If our current entry == best then we can't be sure our best
4954                  * is really the best, so we need to keep searching.
4955                  */
4956                 if (best && best->count == entry->count) {
4957                         prev = entry;
4958                         best = NULL;
4959                         continue;
4960                 }
4961
4962                 /* Prev == entry, not good enough, have to keep searching */
4963                 if (!prev->broken && prev->count == entry->count)
4964                         continue;
4965
4966                 if (!best)
4967                         best = (prev->count > entry->count) ? prev : entry;
4968                 else if (best->count < entry->count)
4969                         best = entry;
4970                 prev = entry;
4971         }
4972
4973         return best;
4974 }
4975
4976 static int repair_ref(struct btrfs_trans_handle *trans,
4977                       struct btrfs_fs_info *info, struct btrfs_path *path,
4978                       struct data_backref *dback, struct extent_entry *entry)
4979 {
4980         struct btrfs_root *root;
4981         struct btrfs_file_extent_item *fi;
4982         struct extent_buffer *leaf;
4983         struct btrfs_key key;
4984         u64 bytenr, bytes;
4985         int ret;
4986
4987         key.objectid = dback->root;
4988         key.type = BTRFS_ROOT_ITEM_KEY;
4989         key.offset = (u64)-1;
4990         root = btrfs_read_fs_root(info, &key);
4991         if (IS_ERR(root)) {
4992                 fprintf(stderr, "Couldn't find root for our ref\n");
4993                 return -EINVAL;
4994         }
4995
4996         /*
4997          * The backref points to the original offset of the extent if it was
4998          * split, so we need to search down to the offset we have and then walk
4999          * forward until we find the backref we're looking for.
5000          */
5001         key.objectid = dback->owner;
5002         key.type = BTRFS_EXTENT_DATA_KEY;
5003         key.offset = dback->offset;
5004         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5005         if (ret < 0) {
5006                 fprintf(stderr, "Error looking up ref %d\n", ret);
5007                 return ret;
5008         }
5009
5010         while (1) {
5011                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5012                         ret = btrfs_next_leaf(root, path);
5013                         if (ret) {
5014                                 fprintf(stderr, "Couldn't find our ref, next\n");
5015                                 return -EINVAL;
5016                         }
5017                 }
5018                 leaf = path->nodes[0];
5019                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5020                 if (key.objectid != dback->owner ||
5021                     key.type != BTRFS_EXTENT_DATA_KEY) {
5022                         fprintf(stderr, "Couldn't find our ref, search\n");
5023                         return -EINVAL;
5024                 }
5025                 fi = btrfs_item_ptr(leaf, path->slots[0],
5026                                     struct btrfs_file_extent_item);
5027                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5028                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
5029
5030                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
5031                         break;
5032                 path->slots[0]++;
5033         }
5034
5035         btrfs_release_path(path);
5036
5037         /*
5038          * Have to make sure that this root gets updated when we commit the
5039          * transaction
5040          */
5041         record_root_in_trans(trans, root);
5042
5043         /*
5044          * Ok we have the key of the file extent we want to fix, now we can cow
5045          * down to the thing and fix it.
5046          */
5047         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5048         if (ret < 0) {
5049                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
5050                         key.objectid, key.type, key.offset, ret);
5051                 return ret;
5052         }
5053         if (ret > 0) {
5054                 fprintf(stderr, "Well that's odd, we just found this key "
5055                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
5056                         key.offset);
5057                 return -EINVAL;
5058         }
5059         leaf = path->nodes[0];
5060         fi = btrfs_item_ptr(leaf, path->slots[0],
5061                             struct btrfs_file_extent_item);
5062
5063         if (btrfs_file_extent_compression(leaf, fi) &&
5064             dback->disk_bytenr != entry->bytenr) {
5065                 fprintf(stderr, "Ref doesn't match the record start and is "
5066                         "compressed, please take a btrfs-image of this file "
5067                         "system and send it to a btrfs developer so they can "
5068                         "complete this functionality for bytenr %Lu\n",
5069                         dback->disk_bytenr);
5070                 return -EINVAL;
5071         }
5072
5073         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
5074                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
5075         } else if (dback->disk_bytenr > entry->bytenr) {
5076                 u64 off_diff, offset;
5077
5078                 off_diff = dback->disk_bytenr - entry->bytenr;
5079                 offset = btrfs_file_extent_offset(leaf, fi);
5080                 if (dback->disk_bytenr + offset +
5081                     btrfs_file_extent_num_bytes(leaf, fi) >
5082                     entry->bytenr + entry->bytes) {
5083                         fprintf(stderr, "Ref is past the entry end, please "
5084                                 "take a btrfs-image of this file system and "
5085                                 "send it to a btrfs developer, ref %Lu\n",
5086                                 dback->disk_bytenr);
5087                         return -EINVAL;
5088                 }
5089                 offset += off_diff;
5090                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
5091                 btrfs_set_file_extent_offset(leaf, fi, offset);
5092         } else if (dback->disk_bytenr < entry->bytenr) {
5093                 u64 offset;
5094
5095                 offset = btrfs_file_extent_offset(leaf, fi);
5096                 if (dback->disk_bytenr + offset < entry->bytenr) {
5097                         fprintf(stderr, "Ref is before the entry start, please"
5098                                 " take a btrfs-image of this file system and "
5099                                 "send it to a btrfs developer, ref %Lu\n",
5100                                 dback->disk_bytenr);
5101                         return -EINVAL;
5102                 }
5103
5104                 offset += dback->disk_bytenr;
5105                 offset -= entry->bytenr;
5106                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
5107                 btrfs_set_file_extent_offset(leaf, fi, offset);
5108         }
5109
5110         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
5111
5112         /*
5113          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
5114          * only do this if we aren't using compression, otherwise it's a
5115          * trickier case.
5116          */
5117         if (!btrfs_file_extent_compression(leaf, fi))
5118                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
5119         else
5120                 printf("ram bytes may be wrong?\n");
5121         btrfs_mark_buffer_dirty(leaf);
5122         btrfs_release_path(path);
5123         return 0;
5124 }
5125
5126 static int verify_backrefs(struct btrfs_trans_handle *trans,
5127                            struct btrfs_fs_info *info, struct btrfs_path *path,
5128                            struct extent_record *rec)
5129 {
5130         struct extent_backref *back;
5131         struct data_backref *dback;
5132         struct extent_entry *entry, *best = NULL;
5133         LIST_HEAD(entries);
5134         int nr_entries = 0;
5135         int broken_entries = 0;
5136         int ret = 0;
5137         short mismatch = 0;
5138
5139         /*
5140          * Metadata is easy and the backrefs should always agree on bytenr and
5141          * size, if not we've got bigger issues.
5142          */
5143         if (rec->metadata)
5144                 return 0;
5145
5146         list_for_each_entry(back, &rec->backrefs, list) {
5147                 if (back->full_backref || !back->is_data)
5148                         continue;
5149
5150                 dback = (struct data_backref *)back;
5151
5152                 /*
5153                  * We only pay attention to backrefs that we found a real
5154                  * backref for.
5155                  */
5156                 if (dback->found_ref == 0)
5157                         continue;
5158
5159                 /*
5160                  * For now we only catch when the bytes don't match, not the
5161                  * bytenr.  We can easily do this at the same time, but I want
5162                  * to have a fs image to test on before we just add repair
5163                  * functionality willy-nilly so we know we won't screw up the
5164                  * repair.
5165                  */
5166
5167                 entry = find_entry(&entries, dback->disk_bytenr,
5168                                    dback->bytes);
5169                 if (!entry) {
5170                         entry = malloc(sizeof(struct extent_entry));
5171                         if (!entry) {
5172                                 ret = -ENOMEM;
5173                                 goto out;
5174                         }
5175                         memset(entry, 0, sizeof(*entry));
5176                         entry->bytenr = dback->disk_bytenr;
5177                         entry->bytes = dback->bytes;
5178                         list_add_tail(&entry->list, &entries);
5179                         nr_entries++;
5180                 }
5181
5182                 /*
5183                  * If we only have on entry we may think the entries agree when
5184                  * in reality they don't so we have to do some extra checking.
5185                  */
5186                 if (dback->disk_bytenr != rec->start ||
5187                     dback->bytes != rec->nr || back->broken)
5188                         mismatch = 1;
5189
5190                 if (back->broken) {
5191                         entry->broken++;
5192                         broken_entries++;
5193                 }
5194
5195                 entry->count++;
5196         }
5197
5198         /* Yay all the backrefs agree, carry on good sir */
5199         if (nr_entries <= 1 && !mismatch)
5200                 goto out;
5201
5202         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
5203                 "%Lu\n", rec->start);
5204
5205         /*
5206          * First we want to see if the backrefs can agree amongst themselves who
5207          * is right, so figure out which one of the entries has the highest
5208          * count.
5209          */
5210         best = find_most_right_entry(&entries);
5211
5212         /*
5213          * Ok so we may have an even split between what the backrefs think, so
5214          * this is where we use the extent ref to see what it thinks.
5215          */
5216         if (!best) {
5217                 entry = find_entry(&entries, rec->start, rec->nr);
5218                 if (!entry && (!broken_entries || !rec->found_rec)) {
5219                         fprintf(stderr, "Backrefs don't agree with each other "
5220                                 "and extent record doesn't agree with anybody,"
5221                                 " so we can't fix bytenr %Lu bytes %Lu\n",
5222                                 rec->start, rec->nr);
5223                         ret = -EINVAL;
5224                         goto out;
5225                 } else if (!entry) {
5226                         /*
5227                          * Ok our backrefs were broken, we'll assume this is the
5228                          * correct value and add an entry for this range.
5229                          */
5230                         entry = malloc(sizeof(struct extent_entry));
5231                         if (!entry) {
5232                                 ret = -ENOMEM;
5233                                 goto out;
5234                         }
5235                         memset(entry, 0, sizeof(*entry));
5236                         entry->bytenr = rec->start;
5237                         entry->bytes = rec->nr;
5238                         list_add_tail(&entry->list, &entries);
5239                         nr_entries++;
5240                 }
5241                 entry->count++;
5242                 best = find_most_right_entry(&entries);
5243                 if (!best) {
5244                         fprintf(stderr, "Backrefs and extent record evenly "
5245                                 "split on who is right, this is going to "
5246                                 "require user input to fix bytenr %Lu bytes "
5247                                 "%Lu\n", rec->start, rec->nr);
5248                         ret = -EINVAL;
5249                         goto out;
5250                 }
5251         }
5252
5253         /*
5254          * I don't think this can happen currently as we'll abort() if we catch
5255          * this case higher up, but in case somebody removes that we still can't
5256          * deal with it properly here yet, so just bail out of that's the case.
5257          */
5258         if (best->bytenr != rec->start) {
5259                 fprintf(stderr, "Extent start and backref starts don't match, "
5260                         "please use btrfs-image on this file system and send "
5261                         "it to a btrfs developer so they can make fsck fix "
5262                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
5263                         rec->start, rec->nr);
5264                 ret = -EINVAL;
5265                 goto out;
5266         }
5267
5268         /*
5269          * Ok great we all agreed on an extent record, let's go find the real
5270          * references and fix up the ones that don't match.
5271          */
5272         list_for_each_entry(back, &rec->backrefs, list) {
5273                 if (back->full_backref || !back->is_data)
5274                         continue;
5275
5276                 dback = (struct data_backref *)back;
5277
5278                 /*
5279                  * Still ignoring backrefs that don't have a real ref attached
5280                  * to them.
5281                  */
5282                 if (dback->found_ref == 0)
5283                         continue;
5284
5285                 if (dback->bytes == best->bytes &&
5286                     dback->disk_bytenr == best->bytenr)
5287                         continue;
5288
5289                 ret = repair_ref(trans, info, path, dback, best);
5290                 if (ret)
5291                         goto out;
5292         }
5293
5294         /*
5295          * Ok we messed with the actual refs, which means we need to drop our
5296          * entire cache and go back and rescan.  I know this is a huge pain and
5297          * adds a lot of extra work, but it's the only way to be safe.  Once all
5298          * the backrefs agree we may not need to do anything to the extent
5299          * record itself.
5300          */
5301         ret = -EAGAIN;
5302 out:
5303         while (!list_empty(&entries)) {
5304                 entry = list_entry(entries.next, struct extent_entry, list);
5305                 list_del_init(&entry->list);
5306                 free(entry);
5307         }
5308         return ret;
5309 }
5310
5311 static int process_duplicates(struct btrfs_root *root,
5312                               struct cache_tree *extent_cache,
5313                               struct extent_record *rec)
5314 {
5315         struct extent_record *good, *tmp;
5316         struct cache_extent *cache;
5317         int ret;
5318
5319         /*
5320          * If we found a extent record for this extent then return, or if we
5321          * have more than one duplicate we are likely going to need to delete
5322          * something.
5323          */
5324         if (rec->found_rec || rec->num_duplicates > 1)
5325                 return 0;
5326
5327         /* Shouldn't happen but just in case */
5328         BUG_ON(!rec->num_duplicates);
5329
5330         /*
5331          * So this happens if we end up with a backref that doesn't match the
5332          * actual extent entry.  So either the backref is bad or the extent
5333          * entry is bad.  Either way we want to have the extent_record actually
5334          * reflect what we found in the extent_tree, so we need to take the
5335          * duplicate out and use that as the extent_record since the only way we
5336          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
5337          */
5338         remove_cache_extent(extent_cache, &rec->cache);
5339
5340         good = list_entry(rec->dups.next, struct extent_record, list);
5341         list_del_init(&good->list);
5342         INIT_LIST_HEAD(&good->backrefs);
5343         INIT_LIST_HEAD(&good->dups);
5344         good->cache.start = good->start;
5345         good->cache.size = good->nr;
5346         good->content_checked = 0;
5347         good->owner_ref_checked = 0;
5348         good->num_duplicates = 0;
5349         good->refs = rec->refs;
5350         list_splice_init(&rec->backrefs, &good->backrefs);
5351         while (1) {
5352                 cache = lookup_cache_extent(extent_cache, good->start,
5353                                             good->nr);
5354                 if (!cache)
5355                         break;
5356                 tmp = container_of(cache, struct extent_record, cache);
5357
5358                 /*
5359                  * If we find another overlapping extent and it's found_rec is
5360                  * set then it's a duplicate and we need to try and delete
5361                  * something.
5362                  */
5363                 if (tmp->found_rec || tmp->num_duplicates > 0) {
5364                         if (list_empty(&good->list))
5365                                 list_add_tail(&good->list,
5366                                               &duplicate_extents);
5367                         good->num_duplicates += tmp->num_duplicates + 1;
5368                         list_splice_init(&tmp->dups, &good->dups);
5369                         list_del_init(&tmp->list);
5370                         list_add_tail(&tmp->list, &good->dups);
5371                         remove_cache_extent(extent_cache, &tmp->cache);
5372                         continue;
5373                 }
5374
5375                 /*
5376                  * Ok we have another non extent item backed extent rec, so lets
5377                  * just add it to this extent and carry on like we did above.
5378                  */
5379                 good->refs += tmp->refs;
5380                 list_splice_init(&tmp->backrefs, &good->backrefs);
5381                 remove_cache_extent(extent_cache, &tmp->cache);
5382                 free(tmp);
5383         }
5384         ret = insert_cache_extent(extent_cache, &good->cache);
5385         BUG_ON(ret);
5386         free(rec);
5387         return good->num_duplicates ? 0 : 1;
5388 }
5389
5390 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5391                                     struct btrfs_root *root,
5392                                     struct extent_record *rec)
5393 {
5394         LIST_HEAD(delete_list);
5395         struct btrfs_path *path;
5396         struct extent_record *tmp, *good, *n;
5397         int nr_del = 0;
5398         int ret = 0;
5399         struct btrfs_key key;
5400
5401         path = btrfs_alloc_path();
5402         if (!path) {
5403                 ret = -ENOMEM;
5404                 goto out;
5405         }
5406
5407         good = rec;
5408         /* Find the record that covers all of the duplicates. */
5409         list_for_each_entry(tmp, &rec->dups, list) {
5410                 if (good->start < tmp->start)
5411                         continue;
5412                 if (good->nr > tmp->nr)
5413                         continue;
5414
5415                 if (tmp->start + tmp->nr < good->start + good->nr) {
5416                         fprintf(stderr, "Ok we have overlapping extents that "
5417                                 "aren't completely covered by eachother, this "
5418                                 "is going to require more careful thought.  "
5419                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5420                                 tmp->start, tmp->nr, good->start, good->nr);
5421                         abort();
5422                 }
5423                 good = tmp;
5424         }
5425
5426         if (good != rec)
5427                 list_add_tail(&rec->list, &delete_list);
5428
5429         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5430                 if (tmp == good)
5431                         continue;
5432                 list_move_tail(&tmp->list, &delete_list);
5433         }
5434
5435         root = root->fs_info->extent_root;
5436         list_for_each_entry(tmp, &delete_list, list) {
5437                 if (tmp->found_rec == 0)
5438                         continue;
5439                 key.objectid = tmp->start;
5440                 key.type = BTRFS_EXTENT_ITEM_KEY;
5441                 key.offset = tmp->nr;
5442
5443                 /* Shouldn't happen but just in case */
5444                 if (tmp->metadata) {
5445                         fprintf(stderr, "Well this shouldn't happen, extent "
5446                                 "record overlaps but is metadata? "
5447                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5448                         abort();
5449                 }
5450
5451                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5452                 if (ret) {
5453                         if (ret > 0)
5454                                 ret = -EINVAL;
5455                         goto out;
5456                 }
5457                 ret = btrfs_del_item(trans, root, path);
5458                 if (ret)
5459                         goto out;
5460                 btrfs_release_path(path);
5461                 nr_del++;
5462         }
5463
5464 out:
5465         while (!list_empty(&delete_list)) {
5466                 tmp = list_entry(delete_list.next, struct extent_record, list);
5467                 list_del_init(&tmp->list);
5468                 if (tmp == rec)
5469                         continue;
5470                 free(tmp);
5471         }
5472
5473         while (!list_empty(&rec->dups)) {
5474                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5475                 list_del_init(&tmp->list);
5476                 free(tmp);
5477         }
5478
5479         btrfs_free_path(path);
5480
5481         if (!ret && !nr_del)
5482                 rec->num_duplicates = 0;
5483
5484         return ret ? ret : nr_del;
5485 }
5486
5487 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5488                                   struct btrfs_fs_info *info,
5489                                   struct btrfs_path *path,
5490                                   struct cache_tree *extent_cache,
5491                                   struct extent_record *rec)
5492 {
5493         struct btrfs_root *root;
5494         struct extent_backref *back;
5495         struct data_backref *dback;
5496         struct cache_extent *cache;
5497         struct btrfs_file_extent_item *fi;
5498         struct btrfs_key key;
5499         u64 bytenr, bytes;
5500         int ret;
5501
5502         list_for_each_entry(back, &rec->backrefs, list) {
5503                 /* Don't care about full backrefs (poor unloved backrefs) */
5504                 if (back->full_backref || !back->is_data)
5505                         continue;
5506
5507                 dback = (struct data_backref *)back;
5508
5509                 /* We found this one, we don't need to do a lookup */
5510                 if (dback->found_ref)
5511                         continue;
5512
5513                 key.objectid = dback->root;
5514                 key.type = BTRFS_ROOT_ITEM_KEY;
5515                 key.offset = (u64)-1;
5516
5517                 root = btrfs_read_fs_root(info, &key);
5518
5519                 /* No root, definitely a bad ref, skip */
5520                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5521                         continue;
5522                 /* Other err, exit */
5523                 if (IS_ERR(root))
5524                         return PTR_ERR(root);
5525
5526                 key.objectid = dback->owner;
5527                 key.type = BTRFS_EXTENT_DATA_KEY;
5528                 key.offset = dback->offset;
5529                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5530                 if (ret) {
5531                         btrfs_release_path(path);
5532                         if (ret < 0)
5533                                 return ret;
5534                         /* Didn't find it, we can carry on */
5535                         ret = 0;
5536                         continue;
5537                 }
5538
5539                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5540                                     struct btrfs_file_extent_item);
5541                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5542                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5543                 btrfs_release_path(path);
5544                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5545                 if (cache) {
5546                         struct extent_record *tmp;
5547                         tmp = container_of(cache, struct extent_record, cache);
5548
5549                         /*
5550                          * If we found an extent record for the bytenr for this
5551                          * particular backref then we can't add it to our
5552                          * current extent record.  We only want to add backrefs
5553                          * that don't have a corresponding extent item in the
5554                          * extent tree since they likely belong to this record
5555                          * and we need to fix it if it doesn't match bytenrs.
5556                          */
5557                         if  (tmp->found_rec)
5558                                 continue;
5559                 }
5560
5561                 dback->found_ref += 1;
5562                 dback->disk_bytenr = bytenr;
5563                 dback->bytes = bytes;
5564
5565                 /*
5566                  * Set this so the verify backref code knows not to trust the
5567                  * values in this backref.
5568                  */
5569                 back->broken = 1;
5570         }
5571
5572         return 0;
5573 }
5574
5575 /*
5576  * when an incorrect extent item is found, this will delete
5577  * all of the existing entries for it and recreate them
5578  * based on what the tree scan found.
5579  */
5580 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5581                              struct btrfs_fs_info *info,
5582                              struct cache_tree *extent_cache,
5583                              struct extent_record *rec)
5584 {
5585         int ret;
5586         struct btrfs_path *path;
5587         struct list_head *cur = rec->backrefs.next;
5588         struct cache_extent *cache;
5589         struct extent_backref *back;
5590         int allocated = 0;
5591         u64 flags = 0;
5592
5593         /*
5594          * remember our flags for recreating the extent.
5595          * FIXME, if we have cleared extent tree, we can not
5596          * lookup extent info in extent tree.
5597          */
5598         if (!init_extent_tree) {
5599                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5600                                         rec->start, rec->max_size,
5601                                         rec->metadata, NULL, &flags);
5602                 if (ret < 0)
5603                         flags = 0;
5604         } else {
5605                 flags = 0;
5606         }
5607
5608         path = btrfs_alloc_path();
5609         if (!path)
5610                 return -ENOMEM;
5611
5612         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5613                 /*
5614                  * Sometimes the backrefs themselves are so broken they don't
5615                  * get attached to any meaningful rec, so first go back and
5616                  * check any of our backrefs that we couldn't find and throw
5617                  * them into the list if we find the backref so that
5618                  * verify_backrefs can figure out what to do.
5619                  */
5620                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5621                                              rec);
5622                 if (ret < 0)
5623                         goto out;
5624         }
5625
5626         /* step one, make sure all of the backrefs agree */
5627         ret = verify_backrefs(trans, info, path, rec);
5628         if (ret < 0)
5629                 goto out;
5630
5631         /* step two, delete all the existing records */
5632         ret = delete_extent_records(trans, info->extent_root, path,
5633                                     rec->start, rec->max_size);
5634
5635         if (ret < 0)
5636                 goto out;
5637
5638         /* was this block corrupt?  If so, don't add references to it */
5639         cache = lookup_cache_extent(info->corrupt_blocks,
5640                                     rec->start, rec->max_size);
5641         if (cache) {
5642                 ret = 0;
5643                 goto out;
5644         }
5645
5646         /* step three, recreate all the refs we did find */
5647         while(cur != &rec->backrefs) {
5648                 back = list_entry(cur, struct extent_backref, list);
5649                 cur = cur->next;
5650
5651                 /*
5652                  * if we didn't find any references, don't create a
5653                  * new extent record
5654                  */
5655                 if (!back->found_ref)
5656                         continue;
5657
5658                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5659                 allocated = 1;
5660
5661                 if (ret)
5662                         goto out;
5663         }
5664 out:
5665         btrfs_free_path(path);
5666         return ret;
5667 }
5668
5669 /* right now we only prune from the extent allocation tree */
5670 static int prune_one_block(struct btrfs_trans_handle *trans,
5671                            struct btrfs_fs_info *info,
5672                            struct btrfs_corrupt_block *corrupt)
5673 {
5674         int ret;
5675         struct btrfs_path path;
5676         struct extent_buffer *eb;
5677         u64 found;
5678         int slot;
5679         int nritems;
5680         int level = corrupt->level + 1;
5681
5682         btrfs_init_path(&path);
5683 again:
5684         /* we want to stop at the parent to our busted block */
5685         path.lowest_level = level;
5686
5687         ret = btrfs_search_slot(trans, info->extent_root,
5688                                 &corrupt->key, &path, -1, 1);
5689
5690         if (ret < 0)
5691                 goto out;
5692
5693         eb = path.nodes[level];
5694         if (!eb) {
5695                 ret = -ENOENT;
5696                 goto out;
5697         }
5698
5699         /*
5700          * hopefully the search gave us the block we want to prune,
5701          * lets try that first
5702          */
5703         slot = path.slots[level];
5704         found =  btrfs_node_blockptr(eb, slot);
5705         if (found == corrupt->cache.start)
5706                 goto del_ptr;
5707
5708         nritems = btrfs_header_nritems(eb);
5709
5710         /* the search failed, lets scan this node and hope we find it */
5711         for (slot = 0; slot < nritems; slot++) {
5712                 found =  btrfs_node_blockptr(eb, slot);
5713                 if (found == corrupt->cache.start)
5714                         goto del_ptr;
5715         }
5716         /*
5717          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5718          * to this block
5719          */
5720         if (eb == info->extent_root->node) {
5721                 ret = -ENOENT;
5722                 goto out;
5723         } else {
5724                 level++;
5725                 btrfs_release_path(&path);
5726                 goto again;
5727         }
5728
5729 del_ptr:
5730         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5731         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5732
5733 out:
5734         btrfs_release_path(&path);
5735         return ret;
5736 }
5737
5738 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5739                                 struct btrfs_fs_info *info)
5740 {
5741         struct cache_extent *cache;
5742         struct btrfs_corrupt_block *corrupt;
5743
5744         cache = search_cache_extent(info->corrupt_blocks, 0);
5745         while (1) {
5746                 if (!cache)
5747                         break;
5748                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5749                 prune_one_block(trans, info, corrupt);
5750                 cache = next_cache_extent(cache);
5751         }
5752         return 0;
5753 }
5754
5755 static void free_corrupt_block(struct cache_extent *cache)
5756 {
5757         struct btrfs_corrupt_block *corrupt;
5758
5759         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5760         free(corrupt);
5761 }
5762
5763 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5764
5765 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5766 {
5767         struct btrfs_block_group_cache *cache;
5768         u64 start, end;
5769         int ret;
5770
5771         while (1) {
5772                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5773                                             &start, &end, EXTENT_DIRTY);
5774                 if (ret)
5775                         break;
5776                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5777                                    GFP_NOFS);
5778         }
5779
5780         start = 0;
5781         while (1) {
5782                 cache = btrfs_lookup_first_block_group(fs_info, start);
5783                 if (!cache)
5784                         break;
5785                 if (cache->cached)
5786                         cache->cached = 0;
5787                 start = cache->key.objectid + cache->key.offset;
5788         }
5789 }
5790
5791 static int check_extent_refs(struct btrfs_trans_handle *trans,
5792                              struct btrfs_root *root,
5793                              struct cache_tree *extent_cache)
5794 {
5795         struct extent_record *rec;
5796         struct cache_extent *cache;
5797         int err = 0;
5798         int ret = 0;
5799         int fixed = 0;
5800         int had_dups = 0;
5801
5802         if (repair) {
5803                 /*
5804                  * if we're doing a repair, we have to make sure
5805                  * we don't allocate from the problem extents.
5806                  * In the worst case, this will be all the
5807                  * extents in the FS
5808                  */
5809                 cache = search_cache_extent(extent_cache, 0);
5810                 while(cache) {
5811                         rec = container_of(cache, struct extent_record, cache);
5812                         btrfs_pin_extent(root->fs_info,
5813                                          rec->start, rec->max_size);
5814                         cache = next_cache_extent(cache);
5815                 }
5816
5817                 /* pin down all the corrupted blocks too */
5818                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5819                 while(cache) {
5820                         btrfs_pin_extent(root->fs_info,
5821                                          cache->start, cache->size);
5822                         cache = next_cache_extent(cache);
5823                 }
5824                 prune_corrupt_blocks(trans, root->fs_info);
5825                 reset_cached_block_groups(root->fs_info);
5826         }
5827
5828         /*
5829          * We need to delete any duplicate entries we find first otherwise we
5830          * could mess up the extent tree when we have backrefs that actually
5831          * belong to a different extent item and not the weird duplicate one.
5832          */
5833         while (repair && !list_empty(&duplicate_extents)) {
5834                 rec = list_entry(duplicate_extents.next, struct extent_record,
5835                                  list);
5836                 list_del_init(&rec->list);
5837
5838                 /* Sometimes we can find a backref before we find an actual
5839                  * extent, so we need to process it a little bit to see if there
5840                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5841                  * if this is a backref screwup.  If we need to delete stuff
5842                  * process_duplicates() will return 0, otherwise it will return
5843                  * 1 and we
5844                  */
5845                 if (process_duplicates(root, extent_cache, rec))
5846                         continue;
5847                 ret = delete_duplicate_records(trans, root, rec);
5848                 if (ret < 0)
5849                         return ret;
5850                 /*
5851                  * delete_duplicate_records will return the number of entries
5852                  * deleted, so if it's greater than 0 then we know we actually
5853                  * did something and we need to remove.
5854                  */
5855                 if (ret)
5856                         had_dups = 1;
5857         }
5858
5859         if (had_dups)
5860                 return -EAGAIN;
5861
5862         while(1) {
5863                 fixed = 0;
5864                 cache = search_cache_extent(extent_cache, 0);
5865                 if (!cache)
5866                         break;
5867                 rec = container_of(cache, struct extent_record, cache);
5868                 if (rec->num_duplicates) {
5869                         fprintf(stderr, "extent item %llu has multiple extent "
5870                                 "items\n", (unsigned long long)rec->start);
5871                         err = 1;
5872                 }
5873
5874                 if (rec->refs != rec->extent_item_refs) {
5875                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5876                                 (unsigned long long)rec->start,
5877                                 (unsigned long long)rec->nr);
5878                         fprintf(stderr, "extent item %llu, found %llu\n",
5879                                 (unsigned long long)rec->extent_item_refs,
5880                                 (unsigned long long)rec->refs);
5881                         if (!fixed && repair) {
5882                                 ret = fixup_extent_refs(trans, root->fs_info,
5883                                                         extent_cache, rec);
5884                                 if (ret)
5885                                         goto repair_abort;
5886                                 fixed = 1;
5887                         }
5888                         err = 1;
5889
5890                 }
5891                 if (all_backpointers_checked(rec, 1)) {
5892                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5893                                 (unsigned long long)rec->start,
5894                                 (unsigned long long)rec->nr);
5895
5896                         if (!fixed && repair) {
5897                                 ret = fixup_extent_refs(trans, root->fs_info,
5898                                                         extent_cache, rec);
5899                                 if (ret)
5900                                         goto repair_abort;
5901                                 fixed = 1;
5902                         }
5903
5904                         err = 1;
5905                 }
5906                 if (!rec->owner_ref_checked) {
5907                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5908                                 (unsigned long long)rec->start,
5909                                 (unsigned long long)rec->nr);
5910                         if (!fixed && repair) {
5911                                 ret = fixup_extent_refs(trans, root->fs_info,
5912                                                         extent_cache, rec);
5913                                 if (ret)
5914                                         goto repair_abort;
5915                                 fixed = 1;
5916                         }
5917                         err = 1;
5918                 }
5919
5920                 remove_cache_extent(extent_cache, cache);
5921                 free_all_extent_backrefs(rec);
5922                 free(rec);
5923         }
5924 repair_abort:
5925         if (repair) {
5926                 if (ret && ret != -EAGAIN) {
5927                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5928                         exit(1);
5929                 } else if (!ret) {
5930                         btrfs_fix_block_accounting(trans, root);
5931                 }
5932                 if (err)
5933                         fprintf(stderr, "repaired damaged extent references\n");
5934                 return ret;
5935         }
5936         return err;
5937 }
5938
5939 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5940 {
5941         u64 stripe_size;
5942
5943         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5944                 stripe_size = length;
5945                 stripe_size /= num_stripes;
5946         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5947                 stripe_size = length * 2;
5948                 stripe_size /= num_stripes;
5949         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5950                 stripe_size = length;
5951                 stripe_size /= (num_stripes - 1);
5952         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5953                 stripe_size = length;
5954                 stripe_size /= (num_stripes - 2);
5955         } else {
5956                 stripe_size = length;
5957         }
5958         return stripe_size;
5959 }
5960
5961 static int check_chunk_refs(struct chunk_record *chunk_rec,
5962                             struct block_group_tree *block_group_cache,
5963                             struct device_extent_tree *dev_extent_cache,
5964                             int silent)
5965 {
5966         struct cache_extent *block_group_item;
5967         struct block_group_record *block_group_rec;
5968         struct cache_extent *dev_extent_item;
5969         struct device_extent_record *dev_extent_rec;
5970         u64 devid;
5971         u64 offset;
5972         u64 length;
5973         int i;
5974         int ret = 0;
5975
5976         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5977                                                chunk_rec->offset,
5978                                                chunk_rec->length);
5979         if (block_group_item) {
5980                 block_group_rec = container_of(block_group_item,
5981                                                struct block_group_record,
5982                                                cache);
5983                 if (chunk_rec->length != block_group_rec->offset ||
5984                     chunk_rec->offset != block_group_rec->objectid ||
5985                     chunk_rec->type_flags != block_group_rec->flags) {
5986                         if (!silent)
5987                                 fprintf(stderr,
5988                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5989                                         chunk_rec->objectid,
5990                                         chunk_rec->type,
5991                                         chunk_rec->offset,
5992                                         chunk_rec->length,
5993                                         chunk_rec->offset,
5994                                         chunk_rec->type_flags,
5995                                         block_group_rec->objectid,
5996                                         block_group_rec->type,
5997                                         block_group_rec->offset,
5998                                         block_group_rec->offset,
5999                                         block_group_rec->objectid,
6000                                         block_group_rec->flags);
6001                         ret = -1;
6002                 } else {
6003                         list_del_init(&block_group_rec->list);
6004                         chunk_rec->bg_rec = block_group_rec;
6005                 }
6006         } else {
6007                 if (!silent)
6008                         fprintf(stderr,
6009                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
6010                                 chunk_rec->objectid,
6011                                 chunk_rec->type,
6012                                 chunk_rec->offset,
6013                                 chunk_rec->length,
6014                                 chunk_rec->offset,
6015                                 chunk_rec->type_flags);
6016                 ret = -1;
6017         }
6018
6019         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
6020                                     chunk_rec->num_stripes);
6021         for (i = 0; i < chunk_rec->num_stripes; ++i) {
6022                 devid = chunk_rec->stripes[i].devid;
6023                 offset = chunk_rec->stripes[i].offset;
6024                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
6025                                                        devid, offset, length);
6026                 if (dev_extent_item) {
6027                         dev_extent_rec = container_of(dev_extent_item,
6028                                                 struct device_extent_record,
6029                                                 cache);
6030                         if (dev_extent_rec->objectid != devid ||
6031                             dev_extent_rec->offset != offset ||
6032                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
6033                             dev_extent_rec->length != length) {
6034                                 if (!silent)
6035                                         fprintf(stderr,
6036                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
6037                                                 chunk_rec->objectid,
6038                                                 chunk_rec->type,
6039                                                 chunk_rec->offset,
6040                                                 chunk_rec->stripes[i].devid,
6041                                                 chunk_rec->stripes[i].offset,
6042                                                 dev_extent_rec->objectid,
6043                                                 dev_extent_rec->offset,
6044                                                 dev_extent_rec->length);
6045                                 ret = -1;
6046                         } else {
6047                                 list_move(&dev_extent_rec->chunk_list,
6048                                           &chunk_rec->dextents);
6049                         }
6050                 } else {
6051                         if (!silent)
6052                                 fprintf(stderr,
6053                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
6054                                         chunk_rec->objectid,
6055                                         chunk_rec->type,
6056                                         chunk_rec->offset,
6057                                         chunk_rec->stripes[i].devid,
6058                                         chunk_rec->stripes[i].offset);
6059                         ret = -1;
6060                 }
6061         }
6062         return ret;
6063 }
6064
6065 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
6066 int check_chunks(struct cache_tree *chunk_cache,
6067                  struct block_group_tree *block_group_cache,
6068                  struct device_extent_tree *dev_extent_cache,
6069                  struct list_head *good, struct list_head *bad, int silent)
6070 {
6071         struct cache_extent *chunk_item;
6072         struct chunk_record *chunk_rec;
6073         struct block_group_record *bg_rec;
6074         struct device_extent_record *dext_rec;
6075         int err;
6076         int ret = 0;
6077
6078         chunk_item = first_cache_extent(chunk_cache);
6079         while (chunk_item) {
6080                 chunk_rec = container_of(chunk_item, struct chunk_record,
6081                                          cache);
6082                 err = check_chunk_refs(chunk_rec, block_group_cache,
6083                                        dev_extent_cache, silent);
6084                 if (err) {
6085                         ret = err;
6086                         if (bad)
6087                                 list_add_tail(&chunk_rec->list, bad);
6088                 } else {
6089                         if (good)
6090                                 list_add_tail(&chunk_rec->list, good);
6091                 }
6092
6093                 chunk_item = next_cache_extent(chunk_item);
6094         }
6095
6096         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
6097                 if (!silent)
6098                         fprintf(stderr,
6099                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
6100                                 bg_rec->objectid,
6101                                 bg_rec->offset,
6102                                 bg_rec->flags);
6103                 if (!ret)
6104                         ret = 1;
6105         }
6106
6107         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
6108                             chunk_list) {
6109                 if (!silent)
6110                         fprintf(stderr,
6111                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
6112                                 dext_rec->objectid,
6113                                 dext_rec->offset,
6114                                 dext_rec->length);
6115                 if (!ret)
6116                         ret = 1;
6117         }
6118         return ret;
6119 }
6120
6121
6122 static int check_device_used(struct device_record *dev_rec,
6123                              struct device_extent_tree *dext_cache)
6124 {
6125         struct cache_extent *cache;
6126         struct device_extent_record *dev_extent_rec;
6127         u64 total_byte = 0;
6128
6129         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
6130         while (cache) {
6131                 dev_extent_rec = container_of(cache,
6132                                               struct device_extent_record,
6133                                               cache);
6134                 if (dev_extent_rec->objectid != dev_rec->devid)
6135                         break;
6136
6137                 list_del_init(&dev_extent_rec->device_list);
6138                 total_byte += dev_extent_rec->length;
6139                 cache = next_cache_extent(cache);
6140         }
6141
6142         if (total_byte != dev_rec->byte_used) {
6143                 fprintf(stderr,
6144                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
6145                         total_byte, dev_rec->byte_used, dev_rec->objectid,
6146                         dev_rec->type, dev_rec->offset);
6147                 return -1;
6148         } else {
6149                 return 0;
6150         }
6151 }
6152
6153 /* check btrfs_dev_item -> btrfs_dev_extent */
6154 static int check_devices(struct rb_root *dev_cache,
6155                          struct device_extent_tree *dev_extent_cache)
6156 {
6157         struct rb_node *dev_node;
6158         struct device_record *dev_rec;
6159         struct device_extent_record *dext_rec;
6160         int err;
6161         int ret = 0;
6162
6163         dev_node = rb_first(dev_cache);
6164         while (dev_node) {
6165                 dev_rec = container_of(dev_node, struct device_record, node);
6166                 err = check_device_used(dev_rec, dev_extent_cache);
6167                 if (err)
6168                         ret = err;
6169
6170                 dev_node = rb_next(dev_node);
6171         }
6172         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
6173                             device_list) {
6174                 fprintf(stderr,
6175                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
6176                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
6177                 if (!ret)
6178                         ret = 1;
6179         }
6180         return ret;
6181 }
6182
6183 static int check_chunks_and_extents(struct btrfs_root *root)
6184 {
6185         struct rb_root dev_cache;
6186         struct cache_tree chunk_cache;
6187         struct block_group_tree block_group_cache;
6188         struct device_extent_tree dev_extent_cache;
6189         struct cache_tree extent_cache;
6190         struct cache_tree seen;
6191         struct cache_tree pending;
6192         struct cache_tree reada;
6193         struct cache_tree nodes;
6194         struct cache_tree corrupt_blocks;
6195         struct btrfs_path path;
6196         struct btrfs_key key;
6197         struct btrfs_key found_key;
6198         int ret, err = 0;
6199         u64 last = 0;
6200         struct block_info *bits;
6201         int bits_nr;
6202         struct extent_buffer *leaf;
6203         struct btrfs_trans_handle *trans = NULL;
6204         int slot;
6205         struct btrfs_root_item ri;
6206         struct list_head dropping_trees;
6207
6208         dev_cache = RB_ROOT;
6209         cache_tree_init(&chunk_cache);
6210         block_group_tree_init(&block_group_cache);
6211         device_extent_tree_init(&dev_extent_cache);
6212
6213         cache_tree_init(&extent_cache);
6214         cache_tree_init(&seen);
6215         cache_tree_init(&pending);
6216         cache_tree_init(&nodes);
6217         cache_tree_init(&reada);
6218         cache_tree_init(&corrupt_blocks);
6219         INIT_LIST_HEAD(&dropping_trees);
6220
6221         if (repair) {
6222                 trans = btrfs_start_transaction(root, 1);
6223                 if (IS_ERR(trans)) {
6224                         fprintf(stderr, "Error starting transaction\n");
6225                         return PTR_ERR(trans);
6226                 }
6227                 root->fs_info->fsck_extent_cache = &extent_cache;
6228                 root->fs_info->free_extent_hook = free_extent_hook;
6229                 root->fs_info->corrupt_blocks = &corrupt_blocks;
6230         }
6231
6232         bits_nr = 1024;
6233         bits = malloc(bits_nr * sizeof(struct block_info));
6234         if (!bits) {
6235                 perror("malloc");
6236                 exit(1);
6237         }
6238
6239 again:
6240         add_root_to_pending(root->fs_info->tree_root->node,
6241                             &extent_cache, &pending, &seen, &nodes,
6242                             &root->fs_info->tree_root->root_key);
6243
6244         add_root_to_pending(root->fs_info->chunk_root->node,
6245                             &extent_cache, &pending, &seen, &nodes,
6246                             &root->fs_info->chunk_root->root_key);
6247
6248         btrfs_init_path(&path);
6249         key.offset = 0;
6250         key.objectid = 0;
6251         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
6252         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
6253                                         &key, &path, 0, 0);
6254         if (ret < 0)
6255                 goto out;
6256         while(1) {
6257                 leaf = path.nodes[0];
6258                 slot = path.slots[0];
6259                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
6260                         ret = btrfs_next_leaf(root, &path);
6261                         if (ret != 0)
6262                                 break;
6263                         leaf = path.nodes[0];
6264                         slot = path.slots[0];
6265                 }
6266                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
6267                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
6268                         unsigned long offset;
6269                         struct extent_buffer *buf;
6270
6271                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6272                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
6273                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
6274                                 buf = read_tree_block(root->fs_info->tree_root,
6275                                                       btrfs_root_bytenr(&ri),
6276                                                       btrfs_level_size(root,
6277                                                       btrfs_root_level(&ri)),
6278                                                       0);
6279                                 if (!buf) {
6280                                         ret = -EIO;
6281                                         goto out;
6282                                 }
6283                                 add_root_to_pending(buf, &extent_cache,
6284                                                     &pending, &seen, &nodes,
6285                                                     &found_key);
6286                                 free_extent_buffer(buf);
6287                         } else {
6288                                 struct dropping_root_item_record *dri_rec;
6289                                 dri_rec = malloc(sizeof(*dri_rec));
6290                                 if (!dri_rec) {
6291                                         perror("malloc");
6292                                         exit(1);
6293                                 }
6294                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
6295                                 memcpy(&dri_rec->found_key, &found_key,
6296                                        sizeof(found_key));
6297                                 list_add_tail(&dri_rec->list, &dropping_trees);
6298                         }
6299                 }
6300                 path.slots[0]++;
6301         }
6302         btrfs_release_path(&path);
6303         while (1) {
6304                 ret = run_next_block(trans, root, bits, bits_nr, &last,
6305                                      &pending, &seen, &reada, &nodes,
6306                                      &extent_cache, &chunk_cache, &dev_cache,
6307                                      &block_group_cache, &dev_extent_cache,
6308                                      NULL);
6309                 if (ret != 0)
6310                         break;
6311         }
6312
6313         while (!list_empty(&dropping_trees)) {
6314                 struct dropping_root_item_record *rec;
6315                 struct extent_buffer *buf;
6316                 rec = list_entry(dropping_trees.next,
6317                                  struct dropping_root_item_record, list);
6318                 last = 0;
6319                 if (!bits) {
6320                         perror("realloc");
6321                         exit(1);
6322                 }
6323                 buf = read_tree_block(root->fs_info->tree_root,
6324                                       btrfs_root_bytenr(&rec->ri),
6325                                       btrfs_level_size(root,
6326                                       btrfs_root_level(&rec->ri)), 0);
6327                 if (!buf) {
6328                         ret = -EIO;
6329                         goto out;
6330                 }
6331                 add_root_to_pending(buf, &extent_cache, &pending,
6332                                     &seen, &nodes, &rec->found_key);
6333                 while (1) {
6334                         ret = run_next_block(trans, root, bits, bits_nr, &last,
6335                                              &pending, &seen, &reada,
6336                                              &nodes, &extent_cache,
6337                                              &chunk_cache, &dev_cache,
6338                                              &block_group_cache,
6339                                              &dev_extent_cache,
6340                                              &rec->ri);
6341                         if (ret != 0)
6342                                 break;
6343                 }
6344                 free_extent_buffer(buf);
6345                 list_del(&rec->list);
6346                 free(rec);
6347         }
6348
6349         if (ret >= 0)
6350                 ret = check_extent_refs(trans, root, &extent_cache);
6351         if (ret == -EAGAIN) {
6352                 ret = btrfs_commit_transaction(trans, root);
6353                 if (ret)
6354                         goto out;
6355
6356                 trans = btrfs_start_transaction(root, 1);
6357                 if (IS_ERR(trans)) {
6358                         ret = PTR_ERR(trans);
6359                         goto out;
6360                 }
6361
6362                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6363                 free_extent_cache_tree(&seen);
6364                 free_extent_cache_tree(&pending);
6365                 free_extent_cache_tree(&reada);
6366                 free_extent_cache_tree(&nodes);
6367                 free_chunk_cache_tree(&chunk_cache);
6368                 free_block_group_tree(&block_group_cache);
6369                 free_device_cache_tree(&dev_cache);
6370                 free_device_extent_tree(&dev_extent_cache);
6371                 free_extent_record_cache(root->fs_info, &extent_cache);
6372                 goto again;
6373         }
6374
6375         err = check_chunks(&chunk_cache, &block_group_cache,
6376                            &dev_extent_cache, NULL, NULL, 0);
6377         if (err && !ret)
6378                 ret = err;
6379
6380         err = check_devices(&dev_cache, &dev_extent_cache);
6381         if (err && !ret)
6382                 ret = err;
6383
6384 out:
6385         if (trans) {
6386                 err = btrfs_commit_transaction(trans, root);
6387                 if (!ret)
6388                         ret = err;
6389         }
6390         if (repair) {
6391                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6392                 root->fs_info->fsck_extent_cache = NULL;
6393                 root->fs_info->free_extent_hook = NULL;
6394                 root->fs_info->corrupt_blocks = NULL;
6395         }
6396         free(bits);
6397         free_chunk_cache_tree(&chunk_cache);
6398         free_device_cache_tree(&dev_cache);
6399         free_block_group_tree(&block_group_cache);
6400         free_device_extent_tree(&dev_extent_cache);
6401         free_extent_cache_tree(&seen);
6402         free_extent_cache_tree(&pending);
6403         free_extent_cache_tree(&reada);
6404         free_extent_cache_tree(&nodes);
6405         return ret;
6406 }
6407
6408 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6409                            struct btrfs_root *root, int overwrite)
6410 {
6411         struct extent_buffer *c;
6412         struct extent_buffer *old = root->node;
6413         int level;
6414         int ret;
6415         struct btrfs_disk_key disk_key = {0,0,0};
6416
6417         level = 0;
6418
6419         if (overwrite) {
6420                 c = old;
6421                 extent_buffer_get(c);
6422                 goto init;
6423         }
6424         c = btrfs_alloc_free_block(trans, root,
6425                                    btrfs_level_size(root, 0),
6426                                    root->root_key.objectid,
6427                                    &disk_key, level, 0, 0);
6428         if (IS_ERR(c)) {
6429                 c = old;
6430                 extent_buffer_get(c);
6431                 overwrite = 1;
6432         }
6433 init:
6434         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6435         btrfs_set_header_level(c, level);
6436         btrfs_set_header_bytenr(c, c->start);
6437         btrfs_set_header_generation(c, trans->transid);
6438         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6439         btrfs_set_header_owner(c, root->root_key.objectid);
6440
6441         write_extent_buffer(c, root->fs_info->fsid,
6442                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6443
6444         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6445                             btrfs_header_chunk_tree_uuid(c),
6446                             BTRFS_UUID_SIZE);
6447
6448         btrfs_mark_buffer_dirty(c);
6449         /*
6450          * this case can happen in the following case:
6451          *
6452          * 1.overwrite previous root.
6453          *
6454          * 2.reinit reloc data root, this is because we skip pin
6455          * down reloc data tree before which means we can allocate
6456          * same block bytenr here.
6457          */
6458         if (old->start == c->start) {
6459                 btrfs_set_root_generation(&root->root_item,
6460                                           trans->transid);
6461                 root->root_item.level = btrfs_header_level(root->node);
6462                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6463                                         &root->root_key, &root->root_item);
6464                 if (ret) {
6465                         free_extent_buffer(c);
6466                         return ret;
6467                 }
6468         }
6469         free_extent_buffer(old);
6470         root->node = c;
6471         add_root_to_dirty_list(root);
6472         return 0;
6473 }
6474
6475 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6476                                 struct extent_buffer *eb, int tree_root)
6477 {
6478         struct extent_buffer *tmp;
6479         struct btrfs_root_item *ri;
6480         struct btrfs_key key;
6481         u64 bytenr;
6482         u32 leafsize;
6483         int level = btrfs_header_level(eb);
6484         int nritems;
6485         int ret;
6486         int i;
6487
6488         /*
6489          * If we have pinned this block before, don't pin it again.
6490          * This can not only avoid forever loop with broken filesystem
6491          * but also give us some speedups.
6492          */
6493         if (test_range_bit(&fs_info->pinned_extents, eb->start,
6494                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
6495                 return 0;
6496
6497         btrfs_pin_extent(fs_info, eb->start, eb->len);
6498
6499         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6500         nritems = btrfs_header_nritems(eb);
6501         for (i = 0; i < nritems; i++) {
6502                 if (level == 0) {
6503                         btrfs_item_key_to_cpu(eb, &key, i);
6504                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6505                                 continue;
6506                         /* Skip the extent root and reloc roots */
6507                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6508                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6509                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6510                                 continue;
6511                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6512                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6513
6514                         /*
6515                          * If at any point we start needing the real root we
6516                          * will have to build a stump root for the root we are
6517                          * in, but for now this doesn't actually use the root so
6518                          * just pass in extent_root.
6519                          */
6520                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6521                                               leafsize, 0);
6522                         if (!tmp) {
6523                                 fprintf(stderr, "Error reading root block\n");
6524                                 return -EIO;
6525                         }
6526                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6527                         free_extent_buffer(tmp);
6528                         if (ret)
6529                                 return ret;
6530                 } else {
6531                         bytenr = btrfs_node_blockptr(eb, i);
6532
6533                         /* If we aren't the tree root don't read the block */
6534                         if (level == 1 && !tree_root) {
6535                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6536                                 continue;
6537                         }
6538
6539                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6540                                               leafsize, 0);
6541                         if (!tmp) {
6542                                 fprintf(stderr, "Error reading tree block\n");
6543                                 return -EIO;
6544                         }
6545                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6546                         free_extent_buffer(tmp);
6547                         if (ret)
6548                                 return ret;
6549                 }
6550         }
6551
6552         return 0;
6553 }
6554
6555 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6556 {
6557         int ret;
6558
6559         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6560         if (ret)
6561                 return ret;
6562
6563         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6564 }
6565
6566 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6567 {
6568         struct btrfs_block_group_cache *cache;
6569         struct btrfs_path *path;
6570         struct extent_buffer *leaf;
6571         struct btrfs_chunk *chunk;
6572         struct btrfs_key key;
6573         int ret;
6574         u64 start;
6575
6576         path = btrfs_alloc_path();
6577         if (!path)
6578                 return -ENOMEM;
6579
6580         key.objectid = 0;
6581         key.type = BTRFS_CHUNK_ITEM_KEY;
6582         key.offset = 0;
6583
6584         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6585         if (ret < 0) {
6586                 btrfs_free_path(path);
6587                 return ret;
6588         }
6589
6590         /*
6591          * We do this in case the block groups were screwed up and had alloc
6592          * bits that aren't actually set on the chunks.  This happens with
6593          * restored images every time and could happen in real life I guess.
6594          */
6595         fs_info->avail_data_alloc_bits = 0;
6596         fs_info->avail_metadata_alloc_bits = 0;
6597         fs_info->avail_system_alloc_bits = 0;
6598
6599         /* First we need to create the in-memory block groups */
6600         while (1) {
6601                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6602                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6603                         if (ret < 0) {
6604                                 btrfs_free_path(path);
6605                                 return ret;
6606                         }
6607                         if (ret) {
6608                                 ret = 0;
6609                                 break;
6610                         }
6611                 }
6612                 leaf = path->nodes[0];
6613                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6614                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6615                         path->slots[0]++;
6616                         continue;
6617                 }
6618
6619                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6620                                        struct btrfs_chunk);
6621                 btrfs_add_block_group(fs_info, 0,
6622                                       btrfs_chunk_type(leaf, chunk),
6623                                       key.objectid, key.offset,
6624                                       btrfs_chunk_length(leaf, chunk));
6625                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6626                                  key.offset + btrfs_chunk_length(leaf, chunk),
6627                                  GFP_NOFS);
6628                 path->slots[0]++;
6629         }
6630         start = 0;
6631         while (1) {
6632                 cache = btrfs_lookup_first_block_group(fs_info, start);
6633                 if (!cache)
6634                         break;
6635                 cache->cached = 1;
6636                 start = cache->key.objectid + cache->key.offset;
6637         }
6638
6639         btrfs_free_path(path);
6640         return 0;
6641 }
6642
6643 static int reset_balance(struct btrfs_trans_handle *trans,
6644                          struct btrfs_fs_info *fs_info)
6645 {
6646         struct btrfs_root *root = fs_info->tree_root;
6647         struct btrfs_path *path;
6648         struct extent_buffer *leaf;
6649         struct btrfs_key key;
6650         int del_slot, del_nr = 0;
6651         int ret;
6652         int found = 0;
6653
6654         path = btrfs_alloc_path();
6655         if (!path)
6656                 return -ENOMEM;
6657
6658         key.objectid = BTRFS_BALANCE_OBJECTID;
6659         key.type = BTRFS_BALANCE_ITEM_KEY;
6660         key.offset = 0;
6661
6662         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6663         if (ret) {
6664                 if (ret > 0)
6665                         ret = 0;
6666                 if (!ret)
6667                         goto reinit_data_reloc;
6668                 else
6669                         goto out;
6670         }
6671
6672         ret = btrfs_del_item(trans, root, path);
6673         if (ret)
6674                 goto out;
6675         btrfs_release_path(path);
6676
6677         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6678         key.type = BTRFS_ROOT_ITEM_KEY;
6679         key.offset = 0;
6680
6681         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6682         if (ret < 0)
6683                 goto out;
6684         while (1) {
6685                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6686                         if (!found)
6687                                 break;
6688
6689                         if (del_nr) {
6690                                 ret = btrfs_del_items(trans, root, path,
6691                                                       del_slot, del_nr);
6692                                 del_nr = 0;
6693                                 if (ret)
6694                                         goto out;
6695                         }
6696                         key.offset++;
6697                         btrfs_release_path(path);
6698
6699                         found = 0;
6700                         ret = btrfs_search_slot(trans, root, &key, path,
6701                                                 -1, 1);
6702                         if (ret < 0)
6703                                 goto out;
6704                         continue;
6705                 }
6706                 found = 1;
6707                 leaf = path->nodes[0];
6708                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6709                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6710                         break;
6711                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6712                         path->slots[0]++;
6713                         continue;
6714                 }
6715                 if (!del_nr) {
6716                         del_slot = path->slots[0];
6717                         del_nr = 1;
6718                 } else {
6719                         del_nr++;
6720                 }
6721                 path->slots[0]++;
6722         }
6723
6724         if (del_nr) {
6725                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6726                 if (ret)
6727                         goto out;
6728         }
6729         btrfs_release_path(path);
6730
6731 reinit_data_reloc:
6732         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6733         key.type = BTRFS_ROOT_ITEM_KEY;
6734         key.offset = (u64)-1;
6735         root = btrfs_read_fs_root(fs_info, &key);
6736         if (IS_ERR(root)) {
6737                 fprintf(stderr, "Error reading data reloc tree\n");
6738                 return PTR_ERR(root);
6739         }
6740         record_root_in_trans(trans, root);
6741         ret = btrfs_fsck_reinit_root(trans, root, 0);
6742         if (ret)
6743                 goto out;
6744         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6745 out:
6746         btrfs_free_path(path);
6747         return ret;
6748 }
6749
6750 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6751                               struct btrfs_fs_info *fs_info)
6752 {
6753         u64 start = 0;
6754         int ret;
6755
6756         /*
6757          * The only reason we don't do this is because right now we're just
6758          * walking the trees we find and pinning down their bytes, we don't look
6759          * at any of the leaves.  In order to do mixed groups we'd have to check
6760          * the leaves of any fs roots and pin down the bytes for any file
6761          * extents we find.  Not hard but why do it if we don't have to?
6762          */
6763         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6764                 fprintf(stderr, "We don't support re-initing the extent tree "
6765                         "for mixed block groups yet, please notify a btrfs "
6766                         "developer you want to do this so they can add this "
6767                         "functionality.\n");
6768                 return -EINVAL;
6769         }
6770
6771         /*
6772          * first we need to walk all of the trees except the extent tree and pin
6773          * down the bytes that are in use so we don't overwrite any existing
6774          * metadata.
6775          */
6776         ret = pin_metadata_blocks(fs_info);
6777         if (ret) {
6778                 fprintf(stderr, "error pinning down used bytes\n");
6779                 return ret;
6780         }
6781
6782         /*
6783          * Need to drop all the block groups since we're going to recreate all
6784          * of them again.
6785          */
6786         btrfs_free_block_groups(fs_info);
6787         ret = reset_block_groups(fs_info);
6788         if (ret) {
6789                 fprintf(stderr, "error resetting the block groups\n");
6790                 return ret;
6791         }
6792
6793         /* Ok we can allocate now, reinit the extent root */
6794         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6795         if (ret) {
6796                 fprintf(stderr, "extent root initialization failed\n");
6797                 /*
6798                  * When the transaction code is updated we should end the
6799                  * transaction, but for now progs only knows about commit so
6800                  * just return an error.
6801                  */
6802                 return ret;
6803         }
6804
6805         /*
6806          * Now we have all the in-memory block groups setup so we can make
6807          * allocations properly, and the metadata we care about is safe since we
6808          * pinned all of it above.
6809          */
6810         while (1) {
6811                 struct btrfs_block_group_cache *cache;
6812
6813                 cache = btrfs_lookup_first_block_group(fs_info, start);
6814                 if (!cache)
6815                         break;
6816                 start = cache->key.objectid + cache->key.offset;
6817                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6818                                         &cache->key, &cache->item,
6819                                         sizeof(cache->item));
6820                 if (ret) {
6821                         fprintf(stderr, "Error adding block group\n");
6822                         return ret;
6823                 }
6824                 btrfs_extent_post_op(trans, fs_info->extent_root);
6825         }
6826
6827         ret = reset_balance(trans, fs_info);
6828         if (ret)
6829                 fprintf(stderr, "error reseting the pending balance\n");
6830
6831         return ret;
6832 }
6833
6834 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6835 {
6836         struct btrfs_path *path;
6837         struct btrfs_trans_handle *trans;
6838         struct btrfs_key key;
6839         int ret;
6840
6841         printf("Recowing metadata block %llu\n", eb->start);
6842         key.objectid = btrfs_header_owner(eb);
6843         key.type = BTRFS_ROOT_ITEM_KEY;
6844         key.offset = (u64)-1;
6845
6846         root = btrfs_read_fs_root(root->fs_info, &key);
6847         if (IS_ERR(root)) {
6848                 fprintf(stderr, "Couldn't find owner root %llu\n",
6849                         key.objectid);
6850                 return PTR_ERR(root);
6851         }
6852
6853         path = btrfs_alloc_path();
6854         if (!path)
6855                 return -ENOMEM;
6856
6857         trans = btrfs_start_transaction(root, 1);
6858         if (IS_ERR(trans)) {
6859                 btrfs_free_path(path);
6860                 return PTR_ERR(trans);
6861         }
6862
6863         path->lowest_level = btrfs_header_level(eb);
6864         if (path->lowest_level)
6865                 btrfs_node_key_to_cpu(eb, &key, 0);
6866         else
6867                 btrfs_item_key_to_cpu(eb, &key, 0);
6868
6869         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6870         btrfs_commit_transaction(trans, root);
6871         btrfs_free_path(path);
6872         return ret;
6873 }
6874
6875 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6876 {
6877         struct btrfs_path *path;
6878         struct btrfs_trans_handle *trans;
6879         struct btrfs_key key;
6880         int ret;
6881
6882         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6883                bad->key.type, bad->key.offset);
6884         key.objectid = bad->root_id;
6885         key.type = BTRFS_ROOT_ITEM_KEY;
6886         key.offset = (u64)-1;
6887
6888         root = btrfs_read_fs_root(root->fs_info, &key);
6889         if (IS_ERR(root)) {
6890                 fprintf(stderr, "Couldn't find owner root %llu\n",
6891                         key.objectid);
6892                 return PTR_ERR(root);
6893         }
6894
6895         path = btrfs_alloc_path();
6896         if (!path)
6897                 return -ENOMEM;
6898
6899         trans = btrfs_start_transaction(root, 1);
6900         if (IS_ERR(trans)) {
6901                 btrfs_free_path(path);
6902                 return PTR_ERR(trans);
6903         }
6904
6905         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6906         if (ret) {
6907                 if (ret > 0)
6908                         ret = 0;
6909                 goto out;
6910         }
6911         ret = btrfs_del_item(trans, root, path);
6912 out:
6913         btrfs_commit_transaction(trans, root);
6914         btrfs_free_path(path);
6915         return ret;
6916 }
6917
6918 static int zero_log_tree(struct btrfs_root *root)
6919 {
6920         struct btrfs_trans_handle *trans;
6921         int ret;
6922
6923         trans = btrfs_start_transaction(root, 1);
6924         if (IS_ERR(trans)) {
6925                 ret = PTR_ERR(trans);
6926                 return ret;
6927         }
6928         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
6929         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
6930         ret = btrfs_commit_transaction(trans, root);
6931         return ret;
6932 }
6933
6934 static int populate_csum(struct btrfs_trans_handle *trans,
6935                          struct btrfs_root *csum_root, char *buf, u64 start,
6936                          u64 len)
6937 {
6938         u64 offset = 0;
6939         u64 sectorsize;
6940         int ret = 0;
6941
6942         while (offset < len) {
6943                 sectorsize = csum_root->sectorsize;
6944                 ret = read_extent_data(csum_root, buf, start + offset,
6945                                        &sectorsize, 0);
6946                 if (ret)
6947                         break;
6948                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
6949                                             start + offset, buf, sectorsize);
6950                 if (ret)
6951                         break;
6952                 offset += sectorsize;
6953         }
6954         return ret;
6955 }
6956
6957 static int fill_csum_tree(struct btrfs_trans_handle *trans,
6958                           struct btrfs_root *csum_root)
6959 {
6960         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
6961         struct btrfs_path *path;
6962         struct btrfs_extent_item *ei;
6963         struct extent_buffer *leaf;
6964         char *buf;
6965         struct btrfs_key key;
6966         int ret;
6967
6968         path = btrfs_alloc_path();
6969         if (!path)
6970                 return -ENOMEM;
6971
6972         key.objectid = 0;
6973         key.type = BTRFS_EXTENT_ITEM_KEY;
6974         key.offset = 0;
6975
6976         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
6977         if (ret < 0) {
6978                 btrfs_free_path(path);
6979                 return ret;
6980         }
6981
6982         buf = malloc(csum_root->sectorsize);
6983         if (!buf) {
6984                 btrfs_free_path(path);
6985                 return -ENOMEM;
6986         }
6987
6988         while (1) {
6989                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6990                         ret = btrfs_next_leaf(extent_root, path);
6991                         if (ret < 0)
6992                                 break;
6993                         if (ret) {
6994                                 ret = 0;
6995                                 break;
6996                         }
6997                 }
6998                 leaf = path->nodes[0];
6999
7000                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7001                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7002                         path->slots[0]++;
7003                         continue;
7004                 }
7005
7006                 ei = btrfs_item_ptr(leaf, path->slots[0],
7007                                     struct btrfs_extent_item);
7008                 if (!(btrfs_extent_flags(leaf, ei) &
7009                       BTRFS_EXTENT_FLAG_DATA)) {
7010                         path->slots[0]++;
7011                         continue;
7012                 }
7013
7014                 ret = populate_csum(trans, csum_root, buf, key.objectid,
7015                                     key.offset);
7016                 if (ret)
7017                         break;
7018                 path->slots[0]++;
7019         }
7020
7021         btrfs_free_path(path);
7022         free(buf);
7023         return ret;
7024 }
7025
7026 static struct option long_options[] = {
7027         { "super", 1, NULL, 's' },
7028         { "repair", 0, NULL, 0 },
7029         { "init-csum-tree", 0, NULL, 0 },
7030         { "init-extent-tree", 0, NULL, 0 },
7031         { "check-data-csum", 0, NULL, 0 },
7032         { "backup", 0, NULL, 0 },
7033         { "subvol-extents", no_argument, NULL, 'E' },
7034         { "qgroup-report", 0, NULL, 'Q' },
7035         { NULL, 0, NULL, 0}
7036 };
7037
7038 const char * const cmd_check_usage[] = {
7039         "btrfs check [options] <device>",
7040         "Check an unmounted btrfs filesystem.",
7041         "",
7042         "-s|--super <superblock>     use this superblock copy",
7043         "-b|--backup                 use the backup root copy",
7044         "--repair                    try to repair the filesystem",
7045         "--init-csum-tree            create a new CRC tree",
7046         "--init-extent-tree          create a new extent tree",
7047         "--check-data-csum           verify checkums of data blocks",
7048         "--qgroup-report             print a report on qgroup consistency",
7049         "--subvol-extents            print subvolume extents and sharing state",
7050         NULL
7051 };
7052
7053 int cmd_check(int argc, char **argv)
7054 {
7055         struct cache_tree root_cache;
7056         struct btrfs_root *root;
7057         struct btrfs_fs_info *info;
7058         u64 bytenr = 0;
7059         u64 subvolid = 0;
7060         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
7061         int ret;
7062         u64 num;
7063         int option_index = 0;
7064         int init_csum_tree = 0;
7065         int qgroup_report = 0;
7066         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
7067
7068         while(1) {
7069                 int c;
7070                 c = getopt_long(argc, argv, "as:b", long_options,
7071                                 &option_index);
7072                 if (c < 0)
7073                         break;
7074                 switch(c) {
7075                         case 'a': /* ignored */ break;
7076                         case 'b':
7077                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
7078                                 break;
7079                         case 's':
7080                                 num = arg_strtou64(optarg);
7081                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
7082                                         fprintf(stderr,
7083                                                 "ERROR: super mirror should be less than: %d\n",
7084                                                 BTRFS_SUPER_MIRROR_MAX);
7085                                         exit(1);
7086                                 }
7087                                 bytenr = btrfs_sb_offset(((int)num));
7088                                 printf("using SB copy %llu, bytenr %llu\n", num,
7089                                        (unsigned long long)bytenr);
7090                                 break;
7091                         case 'Q':
7092                                 qgroup_report = 1;
7093                                 break;
7094                         case 'E':
7095                                 subvolid = arg_strtou64(optarg);
7096                                 break;
7097                         case '?':
7098                         case 'h':
7099                                 usage(cmd_check_usage);
7100                 }
7101                 if (option_index == 1) {
7102                         printf("enabling repair mode\n");
7103                         repair = 1;
7104                         ctree_flags |= OPEN_CTREE_WRITES;
7105                 } else if (option_index == 2) {
7106                         printf("Creating a new CRC tree\n");
7107                         init_csum_tree = 1;
7108                         repair = 1;
7109                         ctree_flags |= OPEN_CTREE_WRITES;
7110                 } else if (option_index == 3) {
7111                         init_extent_tree = 1;
7112                         ctree_flags |= (OPEN_CTREE_WRITES |
7113                                         OPEN_CTREE_NO_BLOCK_GROUPS);
7114                         repair = 1;
7115                 } else if (option_index == 4) {
7116                         check_data_csum = 1;
7117                 }
7118         }
7119         argc = argc - optind;
7120
7121         if (check_argc_exact(argc, 1))
7122                 usage(cmd_check_usage);
7123
7124         radix_tree_init();
7125         cache_tree_init(&root_cache);
7126
7127         if((ret = check_mounted(argv[optind])) < 0) {
7128                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
7129                 goto err_out;
7130         } else if(ret) {
7131                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
7132                 ret = -EBUSY;
7133                 goto err_out;
7134         }
7135
7136         /* only allow partial opening under repair mode */
7137         if (repair)
7138                 ctree_flags |= OPEN_CTREE_PARTIAL;
7139
7140         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
7141         if (!info) {
7142                 fprintf(stderr, "Couldn't open file system\n");
7143                 ret = -EIO;
7144                 goto err_out;
7145         }
7146
7147         root = info->fs_root;
7148         /*
7149          * repair mode will force us to commit transaction which
7150          * will make us fail to load log tree when mounting.
7151          */
7152         if (repair && btrfs_super_log_root(info->super_copy)) {
7153                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
7154                 if (!ret) {
7155                         ret = 1;
7156                         goto close_out;
7157                 }
7158                 ret = zero_log_tree(root);
7159                 if (ret) {
7160                         fprintf(stderr, "fail to zero log tree\n");
7161                         goto close_out;
7162                 }
7163         }
7164
7165         uuid_unparse(info->super_copy->fsid, uuidbuf);
7166         if (qgroup_report) {
7167                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
7168                        uuidbuf);
7169                 ret = qgroup_verify_all(info);
7170                 if (ret == 0)
7171                         print_qgroup_report(1);
7172                 goto close_out;
7173         }
7174         if (subvolid) {
7175                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
7176                        subvolid, argv[optind], uuidbuf);
7177                 ret = print_extent_state(info, subvolid);
7178                 goto close_out;
7179         }
7180         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
7181
7182         if (!extent_buffer_uptodate(info->tree_root->node) ||
7183             !extent_buffer_uptodate(info->dev_root->node) ||
7184             !extent_buffer_uptodate(info->chunk_root->node)) {
7185                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
7186                 ret = -EIO;
7187                 goto close_out;
7188         }
7189
7190         if (init_extent_tree || init_csum_tree) {
7191                 struct btrfs_trans_handle *trans;
7192
7193                 trans = btrfs_start_transaction(info->extent_root, 0);
7194                 if (IS_ERR(trans)) {
7195                         fprintf(stderr, "Error starting transaction\n");
7196                         ret = PTR_ERR(trans);
7197                         goto close_out;
7198                 }
7199
7200                 if (init_extent_tree) {
7201                         printf("Creating a new extent tree\n");
7202                         ret = reinit_extent_tree(trans, info);
7203                         if (ret)
7204                                 goto close_out;
7205                 }
7206
7207                 if (init_csum_tree) {
7208                         fprintf(stderr, "Reinit crc root\n");
7209                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
7210                         if (ret) {
7211                                 fprintf(stderr, "crc root initialization failed\n");
7212                                 ret = -EIO;
7213                                 goto close_out;
7214                         }
7215
7216                         ret = fill_csum_tree(trans, info->csum_root);
7217                         if (ret) {
7218                                 fprintf(stderr, "crc refilling failed\n");
7219                                 return -EIO;
7220                         }
7221                 }
7222                 /*
7223                  * Ok now we commit and run the normal fsck, which will add
7224                  * extent entries for all of the items it finds.
7225                  */
7226                 ret = btrfs_commit_transaction(trans, info->extent_root);
7227                 if (ret)
7228                         goto close_out;
7229         }
7230         if (!extent_buffer_uptodate(info->extent_root->node)) {
7231                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
7232                 ret = -EIO;
7233                 goto close_out;
7234         }
7235         if (!extent_buffer_uptodate(info->csum_root->node)) {
7236                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
7237                 ret = -EIO;
7238                 goto close_out;
7239         }
7240
7241         fprintf(stderr, "checking extents\n");
7242         ret = check_chunks_and_extents(root);
7243         if (ret)
7244                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
7245
7246         fprintf(stderr, "checking free space cache\n");
7247         ret = check_space_cache(root);
7248         if (ret)
7249                 goto out;
7250
7251         /*
7252          * We used to have to have these hole extents in between our real
7253          * extents so if we don't have this flag set we need to make sure there
7254          * are no gaps in the file extents for inodes, otherwise we can just
7255          * ignore it when this happens.
7256          */
7257         no_holes = btrfs_fs_incompat(root->fs_info,
7258                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
7259         fprintf(stderr, "checking fs roots\n");
7260         ret = check_fs_roots(root, &root_cache);
7261         if (ret)
7262                 goto out;
7263
7264         fprintf(stderr, "checking csums\n");
7265         ret = check_csums(root);
7266         if (ret)
7267                 goto out;
7268
7269         fprintf(stderr, "checking root refs\n");
7270         ret = check_root_refs(root, &root_cache);
7271         if (ret)
7272                 goto out;
7273
7274         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
7275                 struct extent_buffer *eb;
7276
7277                 eb = list_first_entry(&root->fs_info->recow_ebs,
7278                                       struct extent_buffer, recow);
7279                 list_del_init(&eb->recow);
7280                 ret = recow_extent_buffer(root, eb);
7281                 if (ret)
7282                         break;
7283         }
7284
7285         while (!list_empty(&delete_items)) {
7286                 struct bad_item *bad;
7287
7288                 bad = list_first_entry(&delete_items, struct bad_item, list);
7289                 list_del_init(&bad->list);
7290                 if (repair)
7291                         ret = delete_bad_item(root, bad);
7292                 free(bad);
7293         }
7294
7295         if (info->quota_enabled) {
7296                 int err;
7297                 fprintf(stderr, "checking quota groups\n");
7298                 err = qgroup_verify_all(info);
7299                 if (err)
7300                         goto out;
7301         }
7302
7303         if (!list_empty(&root->fs_info->recow_ebs)) {
7304                 fprintf(stderr, "Transid errors in file system\n");
7305                 ret = 1;
7306         }
7307 out:
7308         print_qgroup_report(0);
7309         if (found_old_backref) { /*
7310                  * there was a disk format change when mixed
7311                  * backref was in testing tree. The old format
7312                  * existed about one week.
7313                  */
7314                 printf("\n * Found old mixed backref format. "
7315                        "The old format is not supported! *"
7316                        "\n * Please mount the FS in readonly mode, "
7317                        "backup data and re-format the FS. *\n\n");
7318                 ret = 1;
7319         }
7320         printf("found %llu bytes used err is %d\n",
7321                (unsigned long long)bytes_used, ret);
7322         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
7323         printf("total tree bytes: %llu\n",
7324                (unsigned long long)total_btree_bytes);
7325         printf("total fs tree bytes: %llu\n",
7326                (unsigned long long)total_fs_tree_bytes);
7327         printf("total extent tree bytes: %llu\n",
7328                (unsigned long long)total_extent_tree_bytes);
7329         printf("btree space waste bytes: %llu\n",
7330                (unsigned long long)btree_space_waste);
7331         printf("file data blocks allocated: %llu\n referenced %llu\n",
7332                 (unsigned long long)data_bytes_allocated,
7333                 (unsigned long long)data_bytes_referenced);
7334         printf("%s\n", BTRFS_BUILD_VERSION);
7335
7336         free_root_recs_tree(&root_cache);
7337 close_out:
7338         close_ctree(root);
7339 err_out:
7340         return ret;
7341 }