d479361a578452ba5c430c6f368fde8a034d5968
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 struct tree_backref {
84         struct extent_backref node;
85         union {
86                 u64 parent;
87                 u64 root;
88         };
89 };
90
91 struct extent_record {
92         struct list_head backrefs;
93         struct list_head dups;
94         struct list_head list;
95         struct cache_extent cache;
96         struct btrfs_disk_key parent_key;
97         u64 start;
98         u64 max_size;
99         u64 nr;
100         u64 refs;
101         u64 extent_item_refs;
102         u64 generation;
103         u64 parent_generation;
104         u64 info_objectid;
105         u32 num_duplicates;
106         u8 info_level;
107         unsigned int found_rec:1;
108         unsigned int content_checked:1;
109         unsigned int owner_ref_checked:1;
110         unsigned int is_root:1;
111         unsigned int metadata:1;
112 };
113
114 struct inode_backref {
115         struct list_head list;
116         unsigned int found_dir_item:1;
117         unsigned int found_dir_index:1;
118         unsigned int found_inode_ref:1;
119         unsigned int filetype:8;
120         int errors;
121         unsigned int ref_type;
122         u64 dir;
123         u64 index;
124         u16 namelen;
125         char name[0];
126 };
127
128 struct dropping_root_item_record {
129         struct list_head list;
130         struct btrfs_root_item ri;
131         struct btrfs_key found_key;
132 };
133
134 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
135 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
136 #define REF_ERR_NO_INODE_REF            (1 << 2)
137 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
138 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
139 #define REF_ERR_DUP_INODE_REF           (1 << 5)
140 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
141 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
142 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
143 #define REF_ERR_NO_ROOT_REF             (1 << 9)
144 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
145 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
146 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
147
148 struct inode_record {
149         struct list_head backrefs;
150         unsigned int checked:1;
151         unsigned int merging:1;
152         unsigned int found_inode_item:1;
153         unsigned int found_dir_item:1;
154         unsigned int found_file_extent:1;
155         unsigned int found_csum_item:1;
156         unsigned int some_csum_missing:1;
157         unsigned int nodatasum:1;
158         int errors;
159
160         u64 ino;
161         u32 nlink;
162         u32 imode;
163         u64 isize;
164         u64 nbytes;
165
166         u32 found_link;
167         u64 found_size;
168         u64 extent_start;
169         u64 extent_end;
170         u64 first_extent_gap;
171
172         u32 refs;
173 };
174
175 #define I_ERR_NO_INODE_ITEM             (1 << 0)
176 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
177 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
178 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
179 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
180 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
181 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
182 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
183 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
184 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
185 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
186 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
187 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
188 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
189
190 struct root_backref {
191         struct list_head list;
192         unsigned int found_dir_item:1;
193         unsigned int found_dir_index:1;
194         unsigned int found_back_ref:1;
195         unsigned int found_forward_ref:1;
196         unsigned int reachable:1;
197         int errors;
198         u64 ref_root;
199         u64 dir;
200         u64 index;
201         u16 namelen;
202         char name[0];
203 };
204
205 struct root_record {
206         struct list_head backrefs;
207         struct cache_extent cache;
208         unsigned int found_root_item:1;
209         u64 objectid;
210         u32 found_ref;
211 };
212
213 struct ptr_node {
214         struct cache_extent cache;
215         void *data;
216 };
217
218 struct shared_node {
219         struct cache_extent cache;
220         struct cache_tree root_cache;
221         struct cache_tree inode_cache;
222         struct inode_record *current;
223         u32 refs;
224 };
225
226 struct block_info {
227         u64 start;
228         u32 size;
229 };
230
231 struct walk_control {
232         struct cache_tree shared;
233         struct shared_node *nodes[BTRFS_MAX_LEVEL];
234         int active_node;
235         int root_level;
236 };
237
238 struct bad_item {
239         struct btrfs_key key;
240         u64 root_id;
241         struct list_head list;
242 };
243
244 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
245
246 static u8 imode_to_type(u32 imode)
247 {
248 #define S_SHIFT 12
249         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
250                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
251                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
252                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
253                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
254                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
255                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
256                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
257         };
258
259         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
260 #undef S_SHIFT
261 }
262
263 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
264 {
265         struct device_record *rec1;
266         struct device_record *rec2;
267
268         rec1 = rb_entry(node1, struct device_record, node);
269         rec2 = rb_entry(node2, struct device_record, node);
270         if (rec1->devid > rec2->devid)
271                 return -1;
272         else if (rec1->devid < rec2->devid)
273                 return 1;
274         else
275                 return 0;
276 }
277
278 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
279 {
280         struct inode_record *rec;
281         struct inode_backref *backref;
282         struct inode_backref *orig;
283         size_t size;
284
285         rec = malloc(sizeof(*rec));
286         memcpy(rec, orig_rec, sizeof(*rec));
287         rec->refs = 1;
288         INIT_LIST_HEAD(&rec->backrefs);
289
290         list_for_each_entry(orig, &orig_rec->backrefs, list) {
291                 size = sizeof(*orig) + orig->namelen + 1;
292                 backref = malloc(size);
293                 memcpy(backref, orig, size);
294                 list_add_tail(&backref->list, &rec->backrefs);
295         }
296         return rec;
297 }
298
299 static void print_inode_error(int errors)
300 {
301         if (errors & I_ERR_NO_INODE_ITEM)
302                 fprintf(stderr, ", no inode item");
303         if (errors & I_ERR_NO_ORPHAN_ITEM)
304                 fprintf(stderr, ", no orphan item");
305         if (errors & I_ERR_DUP_INODE_ITEM)
306                 fprintf(stderr, ", dup inode item");
307         if (errors & I_ERR_DUP_DIR_INDEX)
308                 fprintf(stderr, ", dup dir index");
309         if (errors & I_ERR_ODD_DIR_ITEM)
310                 fprintf(stderr, ", odd dir item");
311         if (errors & I_ERR_ODD_FILE_EXTENT)
312                 fprintf(stderr, ", odd file extent");
313         if (errors & I_ERR_BAD_FILE_EXTENT)
314                 fprintf(stderr, ", bad file extent");
315         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
316                 fprintf(stderr, ", file extent overlap");
317         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
318                 fprintf(stderr, ", file extent discount");
319         if (errors & I_ERR_DIR_ISIZE_WRONG)
320                 fprintf(stderr, ", dir isize wrong");
321         if (errors & I_ERR_FILE_NBYTES_WRONG)
322                 fprintf(stderr, ", nbytes wrong");
323         if (errors & I_ERR_ODD_CSUM_ITEM)
324                 fprintf(stderr, ", odd csum item");
325         if (errors & I_ERR_SOME_CSUM_MISSING)
326                 fprintf(stderr, ", some csum missing");
327         if (errors & I_ERR_LINK_COUNT_WRONG)
328                 fprintf(stderr, ", link count wrong");
329         fprintf(stderr, "\n");
330 }
331
332 static void print_ref_error(int errors)
333 {
334         if (errors & REF_ERR_NO_DIR_ITEM)
335                 fprintf(stderr, ", no dir item");
336         if (errors & REF_ERR_NO_DIR_INDEX)
337                 fprintf(stderr, ", no dir index");
338         if (errors & REF_ERR_NO_INODE_REF)
339                 fprintf(stderr, ", no inode ref");
340         if (errors & REF_ERR_DUP_DIR_ITEM)
341                 fprintf(stderr, ", dup dir item");
342         if (errors & REF_ERR_DUP_DIR_INDEX)
343                 fprintf(stderr, ", dup dir index");
344         if (errors & REF_ERR_DUP_INODE_REF)
345                 fprintf(stderr, ", dup inode ref");
346         if (errors & REF_ERR_INDEX_UNMATCH)
347                 fprintf(stderr, ", index unmatch");
348         if (errors & REF_ERR_FILETYPE_UNMATCH)
349                 fprintf(stderr, ", filetype unmatch");
350         if (errors & REF_ERR_NAME_TOO_LONG)
351                 fprintf(stderr, ", name too long");
352         if (errors & REF_ERR_NO_ROOT_REF)
353                 fprintf(stderr, ", no root ref");
354         if (errors & REF_ERR_NO_ROOT_BACKREF)
355                 fprintf(stderr, ", no root backref");
356         if (errors & REF_ERR_DUP_ROOT_REF)
357                 fprintf(stderr, ", dup root ref");
358         if (errors & REF_ERR_DUP_ROOT_BACKREF)
359                 fprintf(stderr, ", dup root backref");
360         fprintf(stderr, "\n");
361 }
362
363 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
364                                           u64 ino, int mod)
365 {
366         struct ptr_node *node;
367         struct cache_extent *cache;
368         struct inode_record *rec = NULL;
369         int ret;
370
371         cache = lookup_cache_extent(inode_cache, ino, 1);
372         if (cache) {
373                 node = container_of(cache, struct ptr_node, cache);
374                 rec = node->data;
375                 if (mod && rec->refs > 1) {
376                         node->data = clone_inode_rec(rec);
377                         rec->refs--;
378                         rec = node->data;
379                 }
380         } else if (mod) {
381                 rec = calloc(1, sizeof(*rec));
382                 rec->ino = ino;
383                 rec->extent_start = (u64)-1;
384                 rec->first_extent_gap = (u64)-1;
385                 rec->refs = 1;
386                 INIT_LIST_HEAD(&rec->backrefs);
387
388                 node = malloc(sizeof(*node));
389                 node->cache.start = ino;
390                 node->cache.size = 1;
391                 node->data = rec;
392
393                 if (ino == BTRFS_FREE_INO_OBJECTID)
394                         rec->found_link = 1;
395
396                 ret = insert_cache_extent(inode_cache, &node->cache);
397                 BUG_ON(ret);
398         }
399         return rec;
400 }
401
402 static void free_inode_rec(struct inode_record *rec)
403 {
404         struct inode_backref *backref;
405
406         if (--rec->refs > 0)
407                 return;
408
409         while (!list_empty(&rec->backrefs)) {
410                 backref = list_entry(rec->backrefs.next,
411                                      struct inode_backref, list);
412                 list_del(&backref->list);
413                 free(backref);
414         }
415         free(rec);
416 }
417
418 static int can_free_inode_rec(struct inode_record *rec)
419 {
420         if (!rec->errors && rec->checked && rec->found_inode_item &&
421             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
422                 return 1;
423         return 0;
424 }
425
426 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
427                                  struct inode_record *rec)
428 {
429         struct cache_extent *cache;
430         struct inode_backref *tmp, *backref;
431         struct ptr_node *node;
432         unsigned char filetype;
433
434         if (!rec->found_inode_item)
435                 return;
436
437         filetype = imode_to_type(rec->imode);
438         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
439                 if (backref->found_dir_item && backref->found_dir_index) {
440                         if (backref->filetype != filetype)
441                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
442                         if (!backref->errors && backref->found_inode_ref) {
443                                 list_del(&backref->list);
444                                 free(backref);
445                         }
446                 }
447         }
448
449         if (!rec->checked || rec->merging)
450                 return;
451
452         if (S_ISDIR(rec->imode)) {
453                 if (rec->found_size != rec->isize)
454                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
455                 if (rec->found_file_extent)
456                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
457         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
458                 if (rec->found_dir_item)
459                         rec->errors |= I_ERR_ODD_DIR_ITEM;
460                 if (rec->found_size != rec->nbytes)
461                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
462                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
463                         rec->first_extent_gap = 0;
464                 if (rec->nlink > 0 && !no_holes &&
465                     (rec->extent_end < rec->isize ||
466                      rec->first_extent_gap < rec->isize))
467                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
468         }
469
470         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
471                 if (rec->found_csum_item && rec->nodatasum)
472                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
473                 if (rec->some_csum_missing && !rec->nodatasum)
474                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
475         }
476
477         BUG_ON(rec->refs != 1);
478         if (can_free_inode_rec(rec)) {
479                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
480                 node = container_of(cache, struct ptr_node, cache);
481                 BUG_ON(node->data != rec);
482                 remove_cache_extent(inode_cache, &node->cache);
483                 free(node);
484                 free_inode_rec(rec);
485         }
486 }
487
488 static int check_orphan_item(struct btrfs_root *root, u64 ino)
489 {
490         struct btrfs_path path;
491         struct btrfs_key key;
492         int ret;
493
494         key.objectid = BTRFS_ORPHAN_OBJECTID;
495         key.type = BTRFS_ORPHAN_ITEM_KEY;
496         key.offset = ino;
497
498         btrfs_init_path(&path);
499         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
500         btrfs_release_path(&path);
501         if (ret > 0)
502                 ret = -ENOENT;
503         return ret;
504 }
505
506 static int process_inode_item(struct extent_buffer *eb,
507                               int slot, struct btrfs_key *key,
508                               struct shared_node *active_node)
509 {
510         struct inode_record *rec;
511         struct btrfs_inode_item *item;
512
513         rec = active_node->current;
514         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
515         if (rec->found_inode_item) {
516                 rec->errors |= I_ERR_DUP_INODE_ITEM;
517                 return 1;
518         }
519         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
520         rec->nlink = btrfs_inode_nlink(eb, item);
521         rec->isize = btrfs_inode_size(eb, item);
522         rec->nbytes = btrfs_inode_nbytes(eb, item);
523         rec->imode = btrfs_inode_mode(eb, item);
524         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
525                 rec->nodatasum = 1;
526         rec->found_inode_item = 1;
527         if (rec->nlink == 0)
528                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
529         maybe_free_inode_rec(&active_node->inode_cache, rec);
530         return 0;
531 }
532
533 static struct inode_backref *get_inode_backref(struct inode_record *rec,
534                                                 const char *name,
535                                                 int namelen, u64 dir)
536 {
537         struct inode_backref *backref;
538
539         list_for_each_entry(backref, &rec->backrefs, list) {
540                 if (backref->dir != dir || backref->namelen != namelen)
541                         continue;
542                 if (memcmp(name, backref->name, namelen))
543                         continue;
544                 return backref;
545         }
546
547         backref = malloc(sizeof(*backref) + namelen + 1);
548         memset(backref, 0, sizeof(*backref));
549         backref->dir = dir;
550         backref->namelen = namelen;
551         memcpy(backref->name, name, namelen);
552         backref->name[namelen] = '\0';
553         list_add_tail(&backref->list, &rec->backrefs);
554         return backref;
555 }
556
557 static int add_inode_backref(struct cache_tree *inode_cache,
558                              u64 ino, u64 dir, u64 index,
559                              const char *name, int namelen,
560                              int filetype, int itemtype, int errors)
561 {
562         struct inode_record *rec;
563         struct inode_backref *backref;
564
565         rec = get_inode_rec(inode_cache, ino, 1);
566         backref = get_inode_backref(rec, name, namelen, dir);
567         if (errors)
568                 backref->errors |= errors;
569         if (itemtype == BTRFS_DIR_INDEX_KEY) {
570                 if (backref->found_dir_index)
571                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
572                 if (backref->found_inode_ref && backref->index != index)
573                         backref->errors |= REF_ERR_INDEX_UNMATCH;
574                 if (backref->found_dir_item && backref->filetype != filetype)
575                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
576
577                 backref->index = index;
578                 backref->filetype = filetype;
579                 backref->found_dir_index = 1;
580         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
581                 rec->found_link++;
582                 if (backref->found_dir_item)
583                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
584                 if (backref->found_dir_index && backref->filetype != filetype)
585                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
586
587                 backref->filetype = filetype;
588                 backref->found_dir_item = 1;
589         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
590                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
591                 if (backref->found_inode_ref)
592                         backref->errors |= REF_ERR_DUP_INODE_REF;
593                 if (backref->found_dir_index && backref->index != index)
594                         backref->errors |= REF_ERR_INDEX_UNMATCH;
595
596                 backref->ref_type = itemtype;
597                 backref->index = index;
598                 backref->found_inode_ref = 1;
599         } else {
600                 BUG_ON(1);
601         }
602
603         maybe_free_inode_rec(inode_cache, rec);
604         return 0;
605 }
606
607 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
608                             struct cache_tree *dst_cache)
609 {
610         struct inode_backref *backref;
611         u32 dir_count = 0;
612
613         dst->merging = 1;
614         list_for_each_entry(backref, &src->backrefs, list) {
615                 if (backref->found_dir_index) {
616                         add_inode_backref(dst_cache, dst->ino, backref->dir,
617                                         backref->index, backref->name,
618                                         backref->namelen, backref->filetype,
619                                         BTRFS_DIR_INDEX_KEY, backref->errors);
620                 }
621                 if (backref->found_dir_item) {
622                         dir_count++;
623                         add_inode_backref(dst_cache, dst->ino,
624                                         backref->dir, 0, backref->name,
625                                         backref->namelen, backref->filetype,
626                                         BTRFS_DIR_ITEM_KEY, backref->errors);
627                 }
628                 if (backref->found_inode_ref) {
629                         add_inode_backref(dst_cache, dst->ino,
630                                         backref->dir, backref->index,
631                                         backref->name, backref->namelen, 0,
632                                         backref->ref_type, backref->errors);
633                 }
634         }
635
636         if (src->found_dir_item)
637                 dst->found_dir_item = 1;
638         if (src->found_file_extent)
639                 dst->found_file_extent = 1;
640         if (src->found_csum_item)
641                 dst->found_csum_item = 1;
642         if (src->some_csum_missing)
643                 dst->some_csum_missing = 1;
644         if (dst->first_extent_gap > src->first_extent_gap)
645                 dst->first_extent_gap = src->first_extent_gap;
646
647         BUG_ON(src->found_link < dir_count);
648         dst->found_link += src->found_link - dir_count;
649         dst->found_size += src->found_size;
650         if (src->extent_start != (u64)-1) {
651                 if (dst->extent_start == (u64)-1) {
652                         dst->extent_start = src->extent_start;
653                         dst->extent_end = src->extent_end;
654                 } else {
655                         if (dst->extent_end > src->extent_start)
656                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
657                         else if (dst->extent_end < src->extent_start &&
658                                  dst->extent_end < dst->first_extent_gap)
659                                 dst->first_extent_gap = dst->extent_end;
660                         if (dst->extent_end < src->extent_end)
661                                 dst->extent_end = src->extent_end;
662                 }
663         }
664
665         dst->errors |= src->errors;
666         if (src->found_inode_item) {
667                 if (!dst->found_inode_item) {
668                         dst->nlink = src->nlink;
669                         dst->isize = src->isize;
670                         dst->nbytes = src->nbytes;
671                         dst->imode = src->imode;
672                         dst->nodatasum = src->nodatasum;
673                         dst->found_inode_item = 1;
674                 } else {
675                         dst->errors |= I_ERR_DUP_INODE_ITEM;
676                 }
677         }
678         dst->merging = 0;
679
680         return 0;
681 }
682
683 static int splice_shared_node(struct shared_node *src_node,
684                               struct shared_node *dst_node)
685 {
686         struct cache_extent *cache;
687         struct ptr_node *node, *ins;
688         struct cache_tree *src, *dst;
689         struct inode_record *rec, *conflict;
690         u64 current_ino = 0;
691         int splice = 0;
692         int ret;
693
694         if (--src_node->refs == 0)
695                 splice = 1;
696         if (src_node->current)
697                 current_ino = src_node->current->ino;
698
699         src = &src_node->root_cache;
700         dst = &dst_node->root_cache;
701 again:
702         cache = search_cache_extent(src, 0);
703         while (cache) {
704                 node = container_of(cache, struct ptr_node, cache);
705                 rec = node->data;
706                 cache = next_cache_extent(cache);
707
708                 if (splice) {
709                         remove_cache_extent(src, &node->cache);
710                         ins = node;
711                 } else {
712                         ins = malloc(sizeof(*ins));
713                         ins->cache.start = node->cache.start;
714                         ins->cache.size = node->cache.size;
715                         ins->data = rec;
716                         rec->refs++;
717                 }
718                 ret = insert_cache_extent(dst, &ins->cache);
719                 if (ret == -EEXIST) {
720                         conflict = get_inode_rec(dst, rec->ino, 1);
721                         merge_inode_recs(rec, conflict, dst);
722                         if (rec->checked) {
723                                 conflict->checked = 1;
724                                 if (dst_node->current == conflict)
725                                         dst_node->current = NULL;
726                         }
727                         maybe_free_inode_rec(dst, conflict);
728                         free_inode_rec(rec);
729                         free(ins);
730                 } else {
731                         BUG_ON(ret);
732                 }
733         }
734
735         if (src == &src_node->root_cache) {
736                 src = &src_node->inode_cache;
737                 dst = &dst_node->inode_cache;
738                 goto again;
739         }
740
741         if (current_ino > 0 && (!dst_node->current ||
742             current_ino > dst_node->current->ino)) {
743                 if (dst_node->current) {
744                         dst_node->current->checked = 1;
745                         maybe_free_inode_rec(dst, dst_node->current);
746                 }
747                 dst_node->current = get_inode_rec(dst, current_ino, 1);
748         }
749         return 0;
750 }
751
752 static void free_inode_ptr(struct cache_extent *cache)
753 {
754         struct ptr_node *node;
755         struct inode_record *rec;
756
757         node = container_of(cache, struct ptr_node, cache);
758         rec = node->data;
759         free_inode_rec(rec);
760         free(node);
761 }
762
763 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
764
765 static struct shared_node *find_shared_node(struct cache_tree *shared,
766                                             u64 bytenr)
767 {
768         struct cache_extent *cache;
769         struct shared_node *node;
770
771         cache = lookup_cache_extent(shared, bytenr, 1);
772         if (cache) {
773                 node = container_of(cache, struct shared_node, cache);
774                 return node;
775         }
776         return NULL;
777 }
778
779 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
780 {
781         int ret;
782         struct shared_node *node;
783
784         node = calloc(1, sizeof(*node));
785         node->cache.start = bytenr;
786         node->cache.size = 1;
787         cache_tree_init(&node->root_cache);
788         cache_tree_init(&node->inode_cache);
789         node->refs = refs;
790
791         ret = insert_cache_extent(shared, &node->cache);
792         BUG_ON(ret);
793         return 0;
794 }
795
796 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
797                              struct walk_control *wc, int level)
798 {
799         struct shared_node *node;
800         struct shared_node *dest;
801
802         if (level == wc->active_node)
803                 return 0;
804
805         BUG_ON(wc->active_node <= level);
806         node = find_shared_node(&wc->shared, bytenr);
807         if (!node) {
808                 add_shared_node(&wc->shared, bytenr, refs);
809                 node = find_shared_node(&wc->shared, bytenr);
810                 wc->nodes[level] = node;
811                 wc->active_node = level;
812                 return 0;
813         }
814
815         if (wc->root_level == wc->active_node &&
816             btrfs_root_refs(&root->root_item) == 0) {
817                 if (--node->refs == 0) {
818                         free_inode_recs_tree(&node->root_cache);
819                         free_inode_recs_tree(&node->inode_cache);
820                         remove_cache_extent(&wc->shared, &node->cache);
821                         free(node);
822                 }
823                 return 1;
824         }
825
826         dest = wc->nodes[wc->active_node];
827         splice_shared_node(node, dest);
828         if (node->refs == 0) {
829                 remove_cache_extent(&wc->shared, &node->cache);
830                 free(node);
831         }
832         return 1;
833 }
834
835 static int leave_shared_node(struct btrfs_root *root,
836                              struct walk_control *wc, int level)
837 {
838         struct shared_node *node;
839         struct shared_node *dest;
840         int i;
841
842         if (level == wc->root_level)
843                 return 0;
844
845         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
846                 if (wc->nodes[i])
847                         break;
848         }
849         BUG_ON(i >= BTRFS_MAX_LEVEL);
850
851         node = wc->nodes[wc->active_node];
852         wc->nodes[wc->active_node] = NULL;
853         wc->active_node = i;
854
855         dest = wc->nodes[wc->active_node];
856         if (wc->active_node < wc->root_level ||
857             btrfs_root_refs(&root->root_item) > 0) {
858                 BUG_ON(node->refs <= 1);
859                 splice_shared_node(node, dest);
860         } else {
861                 BUG_ON(node->refs < 2);
862                 node->refs--;
863         }
864         return 0;
865 }
866
867 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
868                          u64 child_root_id)
869 {
870         struct btrfs_path path;
871         struct btrfs_key key;
872         struct extent_buffer *leaf;
873         int has_parent = 0;
874         int ret;
875
876         btrfs_init_path(&path);
877
878         key.objectid = parent_root_id;
879         key.type = BTRFS_ROOT_REF_KEY;
880         key.offset = child_root_id;
881         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
882                                 0, 0);
883         BUG_ON(ret < 0);
884         btrfs_release_path(&path);
885         if (!ret)
886                 return 1;
887
888         key.objectid = child_root_id;
889         key.type = BTRFS_ROOT_BACKREF_KEY;
890         key.offset = 0;
891         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
892                                 0, 0);
893         BUG_ON(ret <= 0);
894
895         while (1) {
896                 leaf = path.nodes[0];
897                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
898                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
899                         BUG_ON(ret < 0);
900
901                         if (ret > 0)
902                                 break;
903                         leaf = path.nodes[0];
904                 }
905
906                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
907                 if (key.objectid != child_root_id ||
908                     key.type != BTRFS_ROOT_BACKREF_KEY)
909                         break;
910
911                 has_parent = 1;
912
913                 if (key.offset == parent_root_id) {
914                         btrfs_release_path(&path);
915                         return 1;
916                 }
917
918                 path.slots[0]++;
919         }
920
921         btrfs_release_path(&path);
922         return has_parent? 0 : -1;
923 }
924
925 static int process_dir_item(struct btrfs_root *root,
926                             struct extent_buffer *eb,
927                             int slot, struct btrfs_key *key,
928                             struct shared_node *active_node)
929 {
930         u32 total;
931         u32 cur = 0;
932         u32 len;
933         u32 name_len;
934         u32 data_len;
935         int error;
936         int nritems = 0;
937         int filetype;
938         struct btrfs_dir_item *di;
939         struct inode_record *rec;
940         struct cache_tree *root_cache;
941         struct cache_tree *inode_cache;
942         struct btrfs_key location;
943         char namebuf[BTRFS_NAME_LEN];
944
945         root_cache = &active_node->root_cache;
946         inode_cache = &active_node->inode_cache;
947         rec = active_node->current;
948         rec->found_dir_item = 1;
949
950         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
951         total = btrfs_item_size_nr(eb, slot);
952         while (cur < total) {
953                 nritems++;
954                 btrfs_dir_item_key_to_cpu(eb, di, &location);
955                 name_len = btrfs_dir_name_len(eb, di);
956                 data_len = btrfs_dir_data_len(eb, di);
957                 filetype = btrfs_dir_type(eb, di);
958
959                 rec->found_size += name_len;
960                 if (name_len <= BTRFS_NAME_LEN) {
961                         len = name_len;
962                         error = 0;
963                 } else {
964                         len = BTRFS_NAME_LEN;
965                         error = REF_ERR_NAME_TOO_LONG;
966                 }
967                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
968
969                 if (location.type == BTRFS_INODE_ITEM_KEY) {
970                         add_inode_backref(inode_cache, location.objectid,
971                                           key->objectid, key->offset, namebuf,
972                                           len, filetype, key->type, error);
973                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
974                         add_inode_backref(root_cache, location.objectid,
975                                           key->objectid, key->offset,
976                                           namebuf, len, filetype,
977                                           key->type, error);
978                 } else {
979                         fprintf(stderr, "warning line %d\n", __LINE__);
980                 }
981
982                 len = sizeof(*di) + name_len + data_len;
983                 di = (struct btrfs_dir_item *)((char *)di + len);
984                 cur += len;
985         }
986         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
987                 rec->errors |= I_ERR_DUP_DIR_INDEX;
988
989         return 0;
990 }
991
992 static int process_inode_ref(struct extent_buffer *eb,
993                              int slot, struct btrfs_key *key,
994                              struct shared_node *active_node)
995 {
996         u32 total;
997         u32 cur = 0;
998         u32 len;
999         u32 name_len;
1000         u64 index;
1001         int error;
1002         struct cache_tree *inode_cache;
1003         struct btrfs_inode_ref *ref;
1004         char namebuf[BTRFS_NAME_LEN];
1005
1006         inode_cache = &active_node->inode_cache;
1007
1008         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1009         total = btrfs_item_size_nr(eb, slot);
1010         while (cur < total) {
1011                 name_len = btrfs_inode_ref_name_len(eb, ref);
1012                 index = btrfs_inode_ref_index(eb, ref);
1013                 if (name_len <= BTRFS_NAME_LEN) {
1014                         len = name_len;
1015                         error = 0;
1016                 } else {
1017                         len = BTRFS_NAME_LEN;
1018                         error = REF_ERR_NAME_TOO_LONG;
1019                 }
1020                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1021                 add_inode_backref(inode_cache, key->objectid, key->offset,
1022                                   index, namebuf, len, 0, key->type, error);
1023
1024                 len = sizeof(*ref) + name_len;
1025                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1026                 cur += len;
1027         }
1028         return 0;
1029 }
1030
1031 static int process_inode_extref(struct extent_buffer *eb,
1032                                 int slot, struct btrfs_key *key,
1033                                 struct shared_node *active_node)
1034 {
1035         u32 total;
1036         u32 cur = 0;
1037         u32 len;
1038         u32 name_len;
1039         u64 index;
1040         u64 parent;
1041         int error;
1042         struct cache_tree *inode_cache;
1043         struct btrfs_inode_extref *extref;
1044         char namebuf[BTRFS_NAME_LEN];
1045
1046         inode_cache = &active_node->inode_cache;
1047
1048         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1049         total = btrfs_item_size_nr(eb, slot);
1050         while (cur < total) {
1051                 name_len = btrfs_inode_extref_name_len(eb, extref);
1052                 index = btrfs_inode_extref_index(eb, extref);
1053                 parent = btrfs_inode_extref_parent(eb, extref);
1054                 if (name_len <= BTRFS_NAME_LEN) {
1055                         len = name_len;
1056                         error = 0;
1057                 } else {
1058                         len = BTRFS_NAME_LEN;
1059                         error = REF_ERR_NAME_TOO_LONG;
1060                 }
1061                 read_extent_buffer(eb, namebuf,
1062                                    (unsigned long)(extref + 1), len);
1063                 add_inode_backref(inode_cache, key->objectid, parent,
1064                                   index, namebuf, len, 0, key->type, error);
1065
1066                 len = sizeof(*extref) + name_len;
1067                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1068                 cur += len;
1069         }
1070         return 0;
1071
1072 }
1073
1074 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
1075 {
1076         struct btrfs_key key;
1077         struct btrfs_path path;
1078         struct extent_buffer *leaf;
1079         int ret ;
1080         size_t size;
1081         u64 found = 0;
1082         u64 csum_end;
1083         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1084
1085         btrfs_init_path(&path);
1086
1087         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1088         key.offset = start;
1089         key.type = BTRFS_EXTENT_CSUM_KEY;
1090
1091         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1092                                 &key, &path, 0, 0);
1093         BUG_ON(ret < 0);
1094         if (ret > 0 && path.slots[0] > 0) {
1095                 leaf = path.nodes[0];
1096                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1097                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1098                     key.type == BTRFS_EXTENT_CSUM_KEY)
1099                         path.slots[0]--;
1100         }
1101
1102         while (len > 0) {
1103                 leaf = path.nodes[0];
1104                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1105                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1106                         BUG_ON(ret < 0);
1107                         if (ret > 0)
1108                                 break;
1109                         leaf = path.nodes[0];
1110                 }
1111
1112                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1113                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1114                     key.type != BTRFS_EXTENT_CSUM_KEY)
1115                         break;
1116
1117                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1118                 if (key.offset >= start + len)
1119                         break;
1120
1121                 if (key.offset > start)
1122                         start = key.offset;
1123
1124                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1125                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1126                 if (csum_end > start) {
1127                         size = min(csum_end - start, len);
1128                         len -= size;
1129                         start += size;
1130                         found += size;
1131                 }
1132
1133                 path.slots[0]++;
1134         }
1135         btrfs_release_path(&path);
1136         return found;
1137 }
1138
1139 static int process_file_extent(struct btrfs_root *root,
1140                                 struct extent_buffer *eb,
1141                                 int slot, struct btrfs_key *key,
1142                                 struct shared_node *active_node)
1143 {
1144         struct inode_record *rec;
1145         struct btrfs_file_extent_item *fi;
1146         u64 num_bytes = 0;
1147         u64 disk_bytenr = 0;
1148         u64 extent_offset = 0;
1149         u64 mask = root->sectorsize - 1;
1150         int extent_type;
1151
1152         rec = active_node->current;
1153         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1154         rec->found_file_extent = 1;
1155
1156         if (rec->extent_start == (u64)-1) {
1157                 rec->extent_start = key->offset;
1158                 rec->extent_end = key->offset;
1159         }
1160
1161         if (rec->extent_end > key->offset)
1162                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1163         else if (rec->extent_end < key->offset &&
1164                  rec->extent_end < rec->first_extent_gap)
1165                 rec->first_extent_gap = rec->extent_end;
1166
1167         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1168         extent_type = btrfs_file_extent_type(eb, fi);
1169
1170         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1171                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1172                 if (num_bytes == 0)
1173                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1174                 rec->found_size += num_bytes;
1175                 num_bytes = (num_bytes + mask) & ~mask;
1176         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1177                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1178                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1179                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1180                 extent_offset = btrfs_file_extent_offset(eb, fi);
1181                 if (num_bytes == 0 || (num_bytes & mask))
1182                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1183                 if (num_bytes + extent_offset >
1184                     btrfs_file_extent_ram_bytes(eb, fi))
1185                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1186                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1187                     (btrfs_file_extent_compression(eb, fi) ||
1188                      btrfs_file_extent_encryption(eb, fi) ||
1189                      btrfs_file_extent_other_encoding(eb, fi)))
1190                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1191                 if (disk_bytenr > 0)
1192                         rec->found_size += num_bytes;
1193         } else {
1194                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1195         }
1196         rec->extent_end = key->offset + num_bytes;
1197
1198         if (disk_bytenr > 0) {
1199                 u64 found;
1200                 if (btrfs_file_extent_compression(eb, fi))
1201                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1202                 else
1203                         disk_bytenr += extent_offset;
1204
1205                 found = count_csum_range(root, disk_bytenr, num_bytes);
1206                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1207                         if (found > 0)
1208                                 rec->found_csum_item = 1;
1209                         if (found < num_bytes)
1210                                 rec->some_csum_missing = 1;
1211                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1212                         if (found > 0)
1213                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1214                 }
1215         }
1216         return 0;
1217 }
1218
1219 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1220                             struct walk_control *wc)
1221 {
1222         struct btrfs_key key;
1223         u32 nritems;
1224         int i;
1225         int ret = 0;
1226         int error = 0;
1227         struct cache_tree *inode_cache;
1228         struct shared_node *active_node;
1229
1230         if (wc->root_level == wc->active_node &&
1231             btrfs_root_refs(&root->root_item) == 0)
1232                 return 0;
1233
1234         active_node = wc->nodes[wc->active_node];
1235         inode_cache = &active_node->inode_cache;
1236         nritems = btrfs_header_nritems(eb);
1237         for (i = 0; i < nritems; i++) {
1238                 btrfs_item_key_to_cpu(eb, &key, i);
1239
1240                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1241                         continue;
1242                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1243                         continue;
1244
1245                 if (active_node->current == NULL ||
1246                     active_node->current->ino < key.objectid) {
1247                         if (active_node->current) {
1248                                 active_node->current->checked = 1;
1249                                 maybe_free_inode_rec(inode_cache,
1250                                                      active_node->current);
1251                         }
1252                         active_node->current = get_inode_rec(inode_cache,
1253                                                              key.objectid, 1);
1254                 }
1255                 switch (key.type) {
1256                 case BTRFS_DIR_ITEM_KEY:
1257                 case BTRFS_DIR_INDEX_KEY:
1258                         ret = process_dir_item(root, eb, i, &key, active_node);
1259                         break;
1260                 case BTRFS_INODE_REF_KEY:
1261                         ret = process_inode_ref(eb, i, &key, active_node);
1262                         break;
1263                 case BTRFS_INODE_EXTREF_KEY:
1264                         ret = process_inode_extref(eb, i, &key, active_node);
1265                         break;
1266                 case BTRFS_INODE_ITEM_KEY:
1267                         ret = process_inode_item(eb, i, &key, active_node);
1268                         break;
1269                 case BTRFS_EXTENT_DATA_KEY:
1270                         ret = process_file_extent(root, eb, i, &key,
1271                                                   active_node);
1272                         break;
1273                 default:
1274                         break;
1275                 };
1276                 if (ret != 0)
1277                         error = 1;
1278         }
1279         return error;
1280 }
1281
1282 static void reada_walk_down(struct btrfs_root *root,
1283                             struct extent_buffer *node, int slot)
1284 {
1285         u64 bytenr;
1286         u64 ptr_gen;
1287         u32 nritems;
1288         u32 blocksize;
1289         int i;
1290         int level;
1291
1292         level = btrfs_header_level(node);
1293         if (level != 1)
1294                 return;
1295
1296         nritems = btrfs_header_nritems(node);
1297         blocksize = btrfs_level_size(root, level - 1);
1298         for (i = slot; i < nritems; i++) {
1299                 bytenr = btrfs_node_blockptr(node, i);
1300                 ptr_gen = btrfs_node_ptr_generation(node, i);
1301                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1302         }
1303 }
1304
1305 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1306                           struct walk_control *wc, int *level)
1307 {
1308         u64 bytenr;
1309         u64 ptr_gen;
1310         struct extent_buffer *next;
1311         struct extent_buffer *cur;
1312         u32 blocksize;
1313         int ret, err = 0;
1314         u64 refs;
1315
1316         WARN_ON(*level < 0);
1317         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1318         ret = btrfs_lookup_extent_info(NULL, root,
1319                                        path->nodes[*level]->start,
1320                                        *level, 1, &refs, NULL);
1321         if (ret < 0) {
1322                 err = ret;
1323                 goto out;
1324         }
1325
1326         if (refs > 1) {
1327                 ret = enter_shared_node(root, path->nodes[*level]->start,
1328                                         refs, wc, *level);
1329                 if (ret > 0) {
1330                         err = ret;
1331                         goto out;
1332                 }
1333         }
1334
1335         while (*level >= 0) {
1336                 WARN_ON(*level < 0);
1337                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1338                 cur = path->nodes[*level];
1339
1340                 if (btrfs_header_level(cur) != *level)
1341                         WARN_ON(1);
1342
1343                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1344                         break;
1345                 if (*level == 0) {
1346                         ret = process_one_leaf(root, cur, wc);
1347                         break;
1348                 }
1349                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1350                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1351                 blocksize = btrfs_level_size(root, *level - 1);
1352                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1353                                                1, &refs, NULL);
1354                 if (ret < 0)
1355                         refs = 0;
1356
1357                 if (refs > 1) {
1358                         ret = enter_shared_node(root, bytenr, refs,
1359                                                 wc, *level - 1);
1360                         if (ret > 0) {
1361                                 path->slots[*level]++;
1362                                 continue;
1363                         }
1364                 }
1365
1366                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1367                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1368                         free_extent_buffer(next);
1369                         reada_walk_down(root, cur, path->slots[*level]);
1370                         next = read_tree_block(root, bytenr, blocksize,
1371                                                ptr_gen);
1372                         if (!next) {
1373                                 err = -EIO;
1374                                 goto out;
1375                         }
1376                 }
1377
1378                 *level = *level - 1;
1379                 free_extent_buffer(path->nodes[*level]);
1380                 path->nodes[*level] = next;
1381                 path->slots[*level] = 0;
1382         }
1383 out:
1384         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1385         return err;
1386 }
1387
1388 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1389                         struct walk_control *wc, int *level)
1390 {
1391         int i;
1392         struct extent_buffer *leaf;
1393
1394         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1395                 leaf = path->nodes[i];
1396                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1397                         path->slots[i]++;
1398                         *level = i;
1399                         return 0;
1400                 } else {
1401                         free_extent_buffer(path->nodes[*level]);
1402                         path->nodes[*level] = NULL;
1403                         BUG_ON(*level > wc->active_node);
1404                         if (*level == wc->active_node)
1405                                 leave_shared_node(root, wc, *level);
1406                         *level = i + 1;
1407                 }
1408         }
1409         return 1;
1410 }
1411
1412 static int check_root_dir(struct inode_record *rec)
1413 {
1414         struct inode_backref *backref;
1415         int ret = -1;
1416
1417         if (!rec->found_inode_item || rec->errors)
1418                 goto out;
1419         if (rec->nlink != 1 || rec->found_link != 0)
1420                 goto out;
1421         if (list_empty(&rec->backrefs))
1422                 goto out;
1423         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1424         if (!backref->found_inode_ref)
1425                 goto out;
1426         if (backref->index != 0 || backref->namelen != 2 ||
1427             memcmp(backref->name, "..", 2))
1428                 goto out;
1429         if (backref->found_dir_index || backref->found_dir_item)
1430                 goto out;
1431         ret = 0;
1432 out:
1433         return ret;
1434 }
1435
1436 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1437                               struct btrfs_root *root, struct btrfs_path *path,
1438                               struct inode_record *rec)
1439 {
1440         struct btrfs_inode_item *ei;
1441         struct btrfs_key key;
1442         int ret;
1443
1444         key.objectid = rec->ino;
1445         key.type = BTRFS_INODE_ITEM_KEY;
1446         key.offset = (u64)-1;
1447
1448         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1449         if (ret < 0)
1450                 goto out;
1451         if (ret) {
1452                 if (!path->slots[0]) {
1453                         ret = -ENOENT;
1454                         goto out;
1455                 }
1456                 path->slots[0]--;
1457                 ret = 0;
1458         }
1459         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1460         if (key.objectid != rec->ino) {
1461                 ret = -ENOENT;
1462                 goto out;
1463         }
1464
1465         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1466                             struct btrfs_inode_item);
1467         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1468         btrfs_mark_buffer_dirty(path->nodes[0]);
1469         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1470         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1471                root->root_key.objectid);
1472 out:
1473         btrfs_release_path(path);
1474         return ret;
1475 }
1476
1477 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1478                                     struct btrfs_root *root,
1479                                     struct btrfs_path *path,
1480                                     struct inode_record *rec)
1481 {
1482         struct btrfs_key key;
1483         int ret;
1484
1485         key.objectid = BTRFS_ORPHAN_OBJECTID;
1486         key.type = BTRFS_ORPHAN_ITEM_KEY;
1487         key.offset = rec->ino;
1488
1489         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1490         btrfs_release_path(path);
1491         if (!ret)
1492                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1493         return ret;
1494 }
1495
1496 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1497 {
1498         struct btrfs_trans_handle *trans;
1499         struct btrfs_path *path;
1500         int ret = 0;
1501
1502         /* So far we just fix dir isize wrong */
1503         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1504                 return 1;
1505
1506         path = btrfs_alloc_path();
1507         if (!path)
1508                 return -ENOMEM;
1509
1510         trans = btrfs_start_transaction(root, 1);
1511         if (IS_ERR(trans)) {
1512                 btrfs_free_path(path);
1513                 return PTR_ERR(trans);
1514         }
1515
1516         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1517                 ret = repair_inode_isize(trans, root, path, rec);
1518         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1519                 ret = repair_inode_orphan_item(trans, root, path, rec);
1520         btrfs_commit_transaction(trans, root);
1521         btrfs_free_path(path);
1522         return ret;
1523 }
1524
1525 static int check_inode_recs(struct btrfs_root *root,
1526                             struct cache_tree *inode_cache)
1527 {
1528         struct cache_extent *cache;
1529         struct ptr_node *node;
1530         struct inode_record *rec;
1531         struct inode_backref *backref;
1532         int ret;
1533         u64 error = 0;
1534         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1535
1536         if (btrfs_root_refs(&root->root_item) == 0) {
1537                 if (!cache_tree_empty(inode_cache))
1538                         fprintf(stderr, "warning line %d\n", __LINE__);
1539                 return 0;
1540         }
1541
1542         rec = get_inode_rec(inode_cache, root_dirid, 0);
1543         if (rec) {
1544                 ret = check_root_dir(rec);
1545                 if (ret) {
1546                         fprintf(stderr, "root %llu root dir %llu error\n",
1547                                 (unsigned long long)root->root_key.objectid,
1548                                 (unsigned long long)root_dirid);
1549                         error++;
1550                 }
1551         } else {
1552                 fprintf(stderr, "root %llu root dir %llu not found\n",
1553                         (unsigned long long)root->root_key.objectid,
1554                         (unsigned long long)root_dirid);
1555         }
1556
1557         while (1) {
1558                 cache = search_cache_extent(inode_cache, 0);
1559                 if (!cache)
1560                         break;
1561                 node = container_of(cache, struct ptr_node, cache);
1562                 rec = node->data;
1563                 remove_cache_extent(inode_cache, &node->cache);
1564                 free(node);
1565                 if (rec->ino == root_dirid ||
1566                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1567                         free_inode_rec(rec);
1568                         continue;
1569                 }
1570
1571                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1572                         ret = check_orphan_item(root, rec->ino);
1573                         if (ret == 0)
1574                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1575                         if (can_free_inode_rec(rec)) {
1576                                 free_inode_rec(rec);
1577                                 continue;
1578                         }
1579                 }
1580
1581                 if (repair) {
1582                         ret = try_repair_inode(root, rec);
1583                         if (ret == 0 && can_free_inode_rec(rec)) {
1584                                 free_inode_rec(rec);
1585                                 continue;
1586                         }
1587                         ret = 0;
1588                 }
1589
1590                 error++;
1591                 if (!rec->found_inode_item)
1592                         rec->errors |= I_ERR_NO_INODE_ITEM;
1593                 if (rec->found_link != rec->nlink)
1594                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1595                 fprintf(stderr, "root %llu inode %llu errors %x",
1596                         (unsigned long long) root->root_key.objectid,
1597                         (unsigned long long) rec->ino, rec->errors);
1598                 print_inode_error(rec->errors);
1599                 list_for_each_entry(backref, &rec->backrefs, list) {
1600                         if (!backref->found_dir_item)
1601                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1602                         if (!backref->found_dir_index)
1603                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1604                         if (!backref->found_inode_ref)
1605                                 backref->errors |= REF_ERR_NO_INODE_REF;
1606                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1607                                 " namelen %u name %s filetype %d errors %x",
1608                                 (unsigned long long)backref->dir,
1609                                 (unsigned long long)backref->index,
1610                                 backref->namelen, backref->name,
1611                                 backref->filetype, backref->errors);
1612                         print_ref_error(backref->errors);
1613                 }
1614                 free_inode_rec(rec);
1615         }
1616         return (error > 0) ? -1 : 0;
1617 }
1618
1619 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1620                                         u64 objectid)
1621 {
1622         struct cache_extent *cache;
1623         struct root_record *rec = NULL;
1624         int ret;
1625
1626         cache = lookup_cache_extent(root_cache, objectid, 1);
1627         if (cache) {
1628                 rec = container_of(cache, struct root_record, cache);
1629         } else {
1630                 rec = calloc(1, sizeof(*rec));
1631                 rec->objectid = objectid;
1632                 INIT_LIST_HEAD(&rec->backrefs);
1633                 rec->cache.start = objectid;
1634                 rec->cache.size = 1;
1635
1636                 ret = insert_cache_extent(root_cache, &rec->cache);
1637                 BUG_ON(ret);
1638         }
1639         return rec;
1640 }
1641
1642 static struct root_backref *get_root_backref(struct root_record *rec,
1643                                              u64 ref_root, u64 dir, u64 index,
1644                                              const char *name, int namelen)
1645 {
1646         struct root_backref *backref;
1647
1648         list_for_each_entry(backref, &rec->backrefs, list) {
1649                 if (backref->ref_root != ref_root || backref->dir != dir ||
1650                     backref->namelen != namelen)
1651                         continue;
1652                 if (memcmp(name, backref->name, namelen))
1653                         continue;
1654                 return backref;
1655         }
1656
1657         backref = malloc(sizeof(*backref) + namelen + 1);
1658         memset(backref, 0, sizeof(*backref));
1659         backref->ref_root = ref_root;
1660         backref->dir = dir;
1661         backref->index = index;
1662         backref->namelen = namelen;
1663         memcpy(backref->name, name, namelen);
1664         backref->name[namelen] = '\0';
1665         list_add_tail(&backref->list, &rec->backrefs);
1666         return backref;
1667 }
1668
1669 static void free_root_record(struct cache_extent *cache)
1670 {
1671         struct root_record *rec;
1672         struct root_backref *backref;
1673
1674         rec = container_of(cache, struct root_record, cache);
1675         while (!list_empty(&rec->backrefs)) {
1676                 backref = list_entry(rec->backrefs.next,
1677                                      struct root_backref, list);
1678                 list_del(&backref->list);
1679                 free(backref);
1680         }
1681
1682         kfree(rec);
1683 }
1684
1685 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1686
1687 static int add_root_backref(struct cache_tree *root_cache,
1688                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1689                             const char *name, int namelen,
1690                             int item_type, int errors)
1691 {
1692         struct root_record *rec;
1693         struct root_backref *backref;
1694
1695         rec = get_root_rec(root_cache, root_id);
1696         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1697
1698         backref->errors |= errors;
1699
1700         if (item_type != BTRFS_DIR_ITEM_KEY) {
1701                 if (backref->found_dir_index || backref->found_back_ref ||
1702                     backref->found_forward_ref) {
1703                         if (backref->index != index)
1704                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1705                 } else {
1706                         backref->index = index;
1707                 }
1708         }
1709
1710         if (item_type == BTRFS_DIR_ITEM_KEY) {
1711                 if (backref->found_forward_ref)
1712                         rec->found_ref++;
1713                 backref->found_dir_item = 1;
1714         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1715                 backref->found_dir_index = 1;
1716         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1717                 if (backref->found_forward_ref)
1718                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1719                 else if (backref->found_dir_item)
1720                         rec->found_ref++;
1721                 backref->found_forward_ref = 1;
1722         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1723                 if (backref->found_back_ref)
1724                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1725                 backref->found_back_ref = 1;
1726         } else {
1727                 BUG_ON(1);
1728         }
1729
1730         if (backref->found_forward_ref && backref->found_dir_item)
1731                 backref->reachable = 1;
1732         return 0;
1733 }
1734
1735 static int merge_root_recs(struct btrfs_root *root,
1736                            struct cache_tree *src_cache,
1737                            struct cache_tree *dst_cache)
1738 {
1739         struct cache_extent *cache;
1740         struct ptr_node *node;
1741         struct inode_record *rec;
1742         struct inode_backref *backref;
1743
1744         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1745                 free_inode_recs_tree(src_cache);
1746                 return 0;
1747         }
1748
1749         while (1) {
1750                 cache = search_cache_extent(src_cache, 0);
1751                 if (!cache)
1752                         break;
1753                 node = container_of(cache, struct ptr_node, cache);
1754                 rec = node->data;
1755                 remove_cache_extent(src_cache, &node->cache);
1756                 free(node);
1757
1758                 if (!is_child_root(root, root->objectid, rec->ino))
1759                         goto skip;
1760
1761                 list_for_each_entry(backref, &rec->backrefs, list) {
1762                         BUG_ON(backref->found_inode_ref);
1763                         if (backref->found_dir_item)
1764                                 add_root_backref(dst_cache, rec->ino,
1765                                         root->root_key.objectid, backref->dir,
1766                                         backref->index, backref->name,
1767                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1768                                         backref->errors);
1769                         if (backref->found_dir_index)
1770                                 add_root_backref(dst_cache, rec->ino,
1771                                         root->root_key.objectid, backref->dir,
1772                                         backref->index, backref->name,
1773                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1774                                         backref->errors);
1775                 }
1776 skip:
1777                 free_inode_rec(rec);
1778         }
1779         return 0;
1780 }
1781
1782 static int check_root_refs(struct btrfs_root *root,
1783                            struct cache_tree *root_cache)
1784 {
1785         struct root_record *rec;
1786         struct root_record *ref_root;
1787         struct root_backref *backref;
1788         struct cache_extent *cache;
1789         int loop = 1;
1790         int ret;
1791         int error;
1792         int errors = 0;
1793
1794         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1795         rec->found_ref = 1;
1796
1797         /* fixme: this can not detect circular references */
1798         while (loop) {
1799                 loop = 0;
1800                 cache = search_cache_extent(root_cache, 0);
1801                 while (1) {
1802                         if (!cache)
1803                                 break;
1804                         rec = container_of(cache, struct root_record, cache);
1805                         cache = next_cache_extent(cache);
1806
1807                         if (rec->found_ref == 0)
1808                                 continue;
1809
1810                         list_for_each_entry(backref, &rec->backrefs, list) {
1811                                 if (!backref->reachable)
1812                                         continue;
1813
1814                                 ref_root = get_root_rec(root_cache,
1815                                                         backref->ref_root);
1816                                 if (ref_root->found_ref > 0)
1817                                         continue;
1818
1819                                 backref->reachable = 0;
1820                                 rec->found_ref--;
1821                                 if (rec->found_ref == 0)
1822                                         loop = 1;
1823                         }
1824                 }
1825         }
1826
1827         cache = search_cache_extent(root_cache, 0);
1828         while (1) {
1829                 if (!cache)
1830                         break;
1831                 rec = container_of(cache, struct root_record, cache);
1832                 cache = next_cache_extent(cache);
1833
1834                 if (rec->found_ref == 0 &&
1835                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1836                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1837                         ret = check_orphan_item(root->fs_info->tree_root,
1838                                                 rec->objectid);
1839                         if (ret == 0)
1840                                 continue;
1841
1842                         /*
1843                          * If we don't have a root item then we likely just have
1844                          * a dir item in a snapshot for this root but no actual
1845                          * ref key or anything so it's meaningless.
1846                          */
1847                         if (!rec->found_root_item)
1848                                 continue;
1849                         errors++;
1850                         fprintf(stderr, "fs tree %llu not referenced\n",
1851                                 (unsigned long long)rec->objectid);
1852                 }
1853
1854                 error = 0;
1855                 if (rec->found_ref > 0 && !rec->found_root_item)
1856                         error = 1;
1857                 list_for_each_entry(backref, &rec->backrefs, list) {
1858                         if (!backref->found_dir_item)
1859                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1860                         if (!backref->found_dir_index)
1861                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1862                         if (!backref->found_back_ref)
1863                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1864                         if (!backref->found_forward_ref)
1865                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1866                         if (backref->reachable && backref->errors)
1867                                 error = 1;
1868                 }
1869                 if (!error)
1870                         continue;
1871
1872                 errors++;
1873                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1874                         (unsigned long long)rec->objectid, rec->found_ref,
1875                          rec->found_root_item ? "" : "not found");
1876
1877                 list_for_each_entry(backref, &rec->backrefs, list) {
1878                         if (!backref->reachable)
1879                                 continue;
1880                         if (!backref->errors && rec->found_root_item)
1881                                 continue;
1882                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1883                                 " index %llu namelen %u name %s errors %x\n",
1884                                 (unsigned long long)backref->ref_root,
1885                                 (unsigned long long)backref->dir,
1886                                 (unsigned long long)backref->index,
1887                                 backref->namelen, backref->name,
1888                                 backref->errors);
1889                         print_ref_error(backref->errors);
1890                 }
1891         }
1892         return errors > 0 ? 1 : 0;
1893 }
1894
1895 static int process_root_ref(struct extent_buffer *eb, int slot,
1896                             struct btrfs_key *key,
1897                             struct cache_tree *root_cache)
1898 {
1899         u64 dirid;
1900         u64 index;
1901         u32 len;
1902         u32 name_len;
1903         struct btrfs_root_ref *ref;
1904         char namebuf[BTRFS_NAME_LEN];
1905         int error;
1906
1907         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1908
1909         dirid = btrfs_root_ref_dirid(eb, ref);
1910         index = btrfs_root_ref_sequence(eb, ref);
1911         name_len = btrfs_root_ref_name_len(eb, ref);
1912
1913         if (name_len <= BTRFS_NAME_LEN) {
1914                 len = name_len;
1915                 error = 0;
1916         } else {
1917                 len = BTRFS_NAME_LEN;
1918                 error = REF_ERR_NAME_TOO_LONG;
1919         }
1920         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1921
1922         if (key->type == BTRFS_ROOT_REF_KEY) {
1923                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1924                                  index, namebuf, len, key->type, error);
1925         } else {
1926                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1927                                  index, namebuf, len, key->type, error);
1928         }
1929         return 0;
1930 }
1931
1932 static int check_fs_root(struct btrfs_root *root,
1933                          struct cache_tree *root_cache,
1934                          struct walk_control *wc)
1935 {
1936         int ret = 0;
1937         int wret;
1938         int level;
1939         struct btrfs_path path;
1940         struct shared_node root_node;
1941         struct root_record *rec;
1942         struct btrfs_root_item *root_item = &root->root_item;
1943
1944         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1945                 rec = get_root_rec(root_cache, root->root_key.objectid);
1946                 if (btrfs_root_refs(root_item) > 0)
1947                         rec->found_root_item = 1;
1948         }
1949
1950         btrfs_init_path(&path);
1951         memset(&root_node, 0, sizeof(root_node));
1952         cache_tree_init(&root_node.root_cache);
1953         cache_tree_init(&root_node.inode_cache);
1954
1955         level = btrfs_header_level(root->node);
1956         memset(wc->nodes, 0, sizeof(wc->nodes));
1957         wc->nodes[level] = &root_node;
1958         wc->active_node = level;
1959         wc->root_level = level;
1960
1961         if (btrfs_root_refs(root_item) > 0 ||
1962             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1963                 path.nodes[level] = root->node;
1964                 extent_buffer_get(root->node);
1965                 path.slots[level] = 0;
1966         } else {
1967                 struct btrfs_key key;
1968                 struct btrfs_disk_key found_key;
1969
1970                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1971                 level = root_item->drop_level;
1972                 path.lowest_level = level;
1973                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1974                 BUG_ON(wret < 0);
1975                 btrfs_node_key(path.nodes[level], &found_key,
1976                                 path.slots[level]);
1977                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1978                                         sizeof(found_key)));
1979         }
1980
1981         while (1) {
1982                 wret = walk_down_tree(root, &path, wc, &level);
1983                 if (wret < 0)
1984                         ret = wret;
1985                 if (wret != 0)
1986                         break;
1987
1988                 wret = walk_up_tree(root, &path, wc, &level);
1989                 if (wret < 0)
1990                         ret = wret;
1991                 if (wret != 0)
1992                         break;
1993         }
1994         btrfs_release_path(&path);
1995
1996         merge_root_recs(root, &root_node.root_cache, root_cache);
1997
1998         if (root_node.current) {
1999                 root_node.current->checked = 1;
2000                 maybe_free_inode_rec(&root_node.inode_cache,
2001                                 root_node.current);
2002         }
2003
2004         ret = check_inode_recs(root, &root_node.inode_cache);
2005         return ret;
2006 }
2007
2008 static int fs_root_objectid(u64 objectid)
2009 {
2010         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2011             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2012                 return 1;
2013         return is_fstree(objectid);
2014 }
2015
2016 static int check_fs_roots(struct btrfs_root *root,
2017                           struct cache_tree *root_cache)
2018 {
2019         struct btrfs_path path;
2020         struct btrfs_key key;
2021         struct walk_control wc;
2022         struct extent_buffer *leaf;
2023         struct btrfs_root *tmp_root;
2024         struct btrfs_root *tree_root = root->fs_info->tree_root;
2025         int ret;
2026         int err = 0;
2027
2028         /*
2029          * Just in case we made any changes to the extent tree that weren't
2030          * reflected into the free space cache yet.
2031          */
2032         if (repair)
2033                 reset_cached_block_groups(root->fs_info);
2034         memset(&wc, 0, sizeof(wc));
2035         cache_tree_init(&wc.shared);
2036         btrfs_init_path(&path);
2037
2038         key.offset = 0;
2039         key.objectid = 0;
2040         key.type = BTRFS_ROOT_ITEM_KEY;
2041         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2042         BUG_ON(ret < 0);
2043         while (1) {
2044                 leaf = path.nodes[0];
2045                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2046                         ret = btrfs_next_leaf(tree_root, &path);
2047                         if (ret != 0)
2048                                 break;
2049                         leaf = path.nodes[0];
2050                 }
2051                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2052                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2053                     fs_root_objectid(key.objectid)) {
2054                         key.offset = (u64)-1;
2055                         tmp_root = btrfs_read_fs_root(root->fs_info, &key);
2056                         if (IS_ERR(tmp_root)) {
2057                                 err = 1;
2058                                 goto next;
2059                         }
2060                         ret = check_fs_root(tmp_root, root_cache, &wc);
2061                         if (ret)
2062                                 err = 1;
2063                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2064                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2065                         process_root_ref(leaf, path.slots[0], &key,
2066                                          root_cache);
2067                 }
2068 next:
2069                 path.slots[0]++;
2070         }
2071         btrfs_release_path(&path);
2072
2073         if (!cache_tree_empty(&wc.shared))
2074                 fprintf(stderr, "warning line %d\n", __LINE__);
2075
2076         return err;
2077 }
2078
2079 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2080 {
2081         struct list_head *cur = rec->backrefs.next;
2082         struct extent_backref *back;
2083         struct tree_backref *tback;
2084         struct data_backref *dback;
2085         u64 found = 0;
2086         int err = 0;
2087
2088         while(cur != &rec->backrefs) {
2089                 back = list_entry(cur, struct extent_backref, list);
2090                 cur = cur->next;
2091                 if (!back->found_extent_tree) {
2092                         err = 1;
2093                         if (!print_errs)
2094                                 goto out;
2095                         if (back->is_data) {
2096                                 dback = (struct data_backref *)back;
2097                                 fprintf(stderr, "Backref %llu %s %llu"
2098                                         " owner %llu offset %llu num_refs %lu"
2099                                         " not found in extent tree\n",
2100                                         (unsigned long long)rec->start,
2101                                         back->full_backref ?
2102                                         "parent" : "root",
2103                                         back->full_backref ?
2104                                         (unsigned long long)dback->parent:
2105                                         (unsigned long long)dback->root,
2106                                         (unsigned long long)dback->owner,
2107                                         (unsigned long long)dback->offset,
2108                                         (unsigned long)dback->num_refs);
2109                         } else {
2110                                 tback = (struct tree_backref *)back;
2111                                 fprintf(stderr, "Backref %llu parent %llu"
2112                                         " root %llu not found in extent tree\n",
2113                                         (unsigned long long)rec->start,
2114                                         (unsigned long long)tback->parent,
2115                                         (unsigned long long)tback->root);
2116                         }
2117                 }
2118                 if (!back->is_data && !back->found_ref) {
2119                         err = 1;
2120                         if (!print_errs)
2121                                 goto out;
2122                         tback = (struct tree_backref *)back;
2123                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2124                                 (unsigned long long)rec->start,
2125                                 back->full_backref ? "parent" : "root",
2126                                 back->full_backref ?
2127                                 (unsigned long long)tback->parent :
2128                                 (unsigned long long)tback->root, back);
2129                 }
2130                 if (back->is_data) {
2131                         dback = (struct data_backref *)back;
2132                         if (dback->found_ref != dback->num_refs) {
2133                                 err = 1;
2134                                 if (!print_errs)
2135                                         goto out;
2136                                 fprintf(stderr, "Incorrect local backref count"
2137                                         " on %llu %s %llu owner %llu"
2138                                         " offset %llu found %u wanted %u back %p\n",
2139                                         (unsigned long long)rec->start,
2140                                         back->full_backref ?
2141                                         "parent" : "root",
2142                                         back->full_backref ?
2143                                         (unsigned long long)dback->parent:
2144                                         (unsigned long long)dback->root,
2145                                         (unsigned long long)dback->owner,
2146                                         (unsigned long long)dback->offset,
2147                                         dback->found_ref, dback->num_refs, back);
2148                         }
2149                         if (dback->disk_bytenr != rec->start) {
2150                                 err = 1;
2151                                 if (!print_errs)
2152                                         goto out;
2153                                 fprintf(stderr, "Backref disk bytenr does not"
2154                                         " match extent record, bytenr=%llu, "
2155                                         "ref bytenr=%llu\n",
2156                                         (unsigned long long)rec->start,
2157                                         (unsigned long long)dback->disk_bytenr);
2158                         }
2159
2160                         if (dback->bytes != rec->nr) {
2161                                 err = 1;
2162                                 if (!print_errs)
2163                                         goto out;
2164                                 fprintf(stderr, "Backref bytes do not match "
2165                                         "extent backref, bytenr=%llu, ref "
2166                                         "bytes=%llu, backref bytes=%llu\n",
2167                                         (unsigned long long)rec->start,
2168                                         (unsigned long long)rec->nr,
2169                                         (unsigned long long)dback->bytes);
2170                         }
2171                 }
2172                 if (!back->is_data) {
2173                         found += 1;
2174                 } else {
2175                         dback = (struct data_backref *)back;
2176                         found += dback->found_ref;
2177                 }
2178         }
2179         if (found != rec->refs) {
2180                 err = 1;
2181                 if (!print_errs)
2182                         goto out;
2183                 fprintf(stderr, "Incorrect global backref count "
2184                         "on %llu found %llu wanted %llu\n",
2185                         (unsigned long long)rec->start,
2186                         (unsigned long long)found,
2187                         (unsigned long long)rec->refs);
2188         }
2189 out:
2190         return err;
2191 }
2192
2193 static int free_all_extent_backrefs(struct extent_record *rec)
2194 {
2195         struct extent_backref *back;
2196         struct list_head *cur;
2197         while (!list_empty(&rec->backrefs)) {
2198                 cur = rec->backrefs.next;
2199                 back = list_entry(cur, struct extent_backref, list);
2200                 list_del(cur);
2201                 free(back);
2202         }
2203         return 0;
2204 }
2205
2206 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2207                                      struct cache_tree *extent_cache)
2208 {
2209         struct cache_extent *cache;
2210         struct extent_record *rec;
2211
2212         while (1) {
2213                 cache = first_cache_extent(extent_cache);
2214                 if (!cache)
2215                         break;
2216                 rec = container_of(cache, struct extent_record, cache);
2217                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2218                 remove_cache_extent(extent_cache, cache);
2219                 free_all_extent_backrefs(rec);
2220                 free(rec);
2221         }
2222 }
2223
2224 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2225                                  struct extent_record *rec)
2226 {
2227         if (rec->content_checked && rec->owner_ref_checked &&
2228             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2229             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2230                 remove_cache_extent(extent_cache, &rec->cache);
2231                 free_all_extent_backrefs(rec);
2232                 list_del_init(&rec->list);
2233                 free(rec);
2234         }
2235         return 0;
2236 }
2237
2238 static int check_owner_ref(struct btrfs_root *root,
2239                             struct extent_record *rec,
2240                             struct extent_buffer *buf)
2241 {
2242         struct extent_backref *node;
2243         struct tree_backref *back;
2244         struct btrfs_root *ref_root;
2245         struct btrfs_key key;
2246         struct btrfs_path path;
2247         struct extent_buffer *parent;
2248         int level;
2249         int found = 0;
2250         int ret;
2251
2252         list_for_each_entry(node, &rec->backrefs, list) {
2253                 if (node->is_data)
2254                         continue;
2255                 if (!node->found_ref)
2256                         continue;
2257                 if (node->full_backref)
2258                         continue;
2259                 back = (struct tree_backref *)node;
2260                 if (btrfs_header_owner(buf) == back->root)
2261                         return 0;
2262         }
2263         BUG_ON(rec->is_root);
2264
2265         /* try to find the block by search corresponding fs tree */
2266         key.objectid = btrfs_header_owner(buf);
2267         key.type = BTRFS_ROOT_ITEM_KEY;
2268         key.offset = (u64)-1;
2269
2270         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2271         if (IS_ERR(ref_root))
2272                 return 1;
2273
2274         level = btrfs_header_level(buf);
2275         if (level == 0)
2276                 btrfs_item_key_to_cpu(buf, &key, 0);
2277         else
2278                 btrfs_node_key_to_cpu(buf, &key, 0);
2279
2280         btrfs_init_path(&path);
2281         path.lowest_level = level + 1;
2282         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2283         if (ret < 0)
2284                 return 0;
2285
2286         parent = path.nodes[level + 1];
2287         if (parent && buf->start == btrfs_node_blockptr(parent,
2288                                                         path.slots[level + 1]))
2289                 found = 1;
2290
2291         btrfs_release_path(&path);
2292         return found ? 0 : 1;
2293 }
2294
2295 static int is_extent_tree_record(struct extent_record *rec)
2296 {
2297         struct list_head *cur = rec->backrefs.next;
2298         struct extent_backref *node;
2299         struct tree_backref *back;
2300         int is_extent = 0;
2301
2302         while(cur != &rec->backrefs) {
2303                 node = list_entry(cur, struct extent_backref, list);
2304                 cur = cur->next;
2305                 if (node->is_data)
2306                         return 0;
2307                 back = (struct tree_backref *)node;
2308                 if (node->full_backref)
2309                         return 0;
2310                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2311                         is_extent = 1;
2312         }
2313         return is_extent;
2314 }
2315
2316
2317 static int record_bad_block_io(struct btrfs_fs_info *info,
2318                                struct cache_tree *extent_cache,
2319                                u64 start, u64 len)
2320 {
2321         struct extent_record *rec;
2322         struct cache_extent *cache;
2323         struct btrfs_key key;
2324
2325         cache = lookup_cache_extent(extent_cache, start, len);
2326         if (!cache)
2327                 return 0;
2328
2329         rec = container_of(cache, struct extent_record, cache);
2330         if (!is_extent_tree_record(rec))
2331                 return 0;
2332
2333         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2334         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2335 }
2336
2337 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2338                        struct extent_buffer *buf, int slot)
2339 {
2340         if (btrfs_header_level(buf)) {
2341                 struct btrfs_key_ptr ptr1, ptr2;
2342
2343                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2344                                    sizeof(struct btrfs_key_ptr));
2345                 read_extent_buffer(buf, &ptr2,
2346                                    btrfs_node_key_ptr_offset(slot + 1),
2347                                    sizeof(struct btrfs_key_ptr));
2348                 write_extent_buffer(buf, &ptr1,
2349                                     btrfs_node_key_ptr_offset(slot + 1),
2350                                     sizeof(struct btrfs_key_ptr));
2351                 write_extent_buffer(buf, &ptr2,
2352                                     btrfs_node_key_ptr_offset(slot),
2353                                     sizeof(struct btrfs_key_ptr));
2354                 if (slot == 0) {
2355                         struct btrfs_disk_key key;
2356                         btrfs_node_key(buf, &key, 0);
2357                         btrfs_fixup_low_keys(root, path, &key,
2358                                              btrfs_header_level(buf) + 1);
2359                 }
2360         } else {
2361                 struct btrfs_item *item1, *item2;
2362                 struct btrfs_key k1, k2;
2363                 char *item1_data, *item2_data;
2364                 u32 item1_offset, item2_offset, item1_size, item2_size;
2365
2366                 item1 = btrfs_item_nr(slot);
2367                 item2 = btrfs_item_nr(slot + 1);
2368                 btrfs_item_key_to_cpu(buf, &k1, slot);
2369                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2370                 item1_offset = btrfs_item_offset(buf, item1);
2371                 item2_offset = btrfs_item_offset(buf, item2);
2372                 item1_size = btrfs_item_size(buf, item1);
2373                 item2_size = btrfs_item_size(buf, item2);
2374
2375                 item1_data = malloc(item1_size);
2376                 if (!item1_data)
2377                         return -ENOMEM;
2378                 item2_data = malloc(item2_size);
2379                 if (!item2_data) {
2380                         free(item1_data);
2381                         return -ENOMEM;
2382                 }
2383
2384                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2385                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2386
2387                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2388                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2389                 free(item1_data);
2390                 free(item2_data);
2391
2392                 btrfs_set_item_offset(buf, item1, item2_offset);
2393                 btrfs_set_item_offset(buf, item2, item1_offset);
2394                 btrfs_set_item_size(buf, item1, item2_size);
2395                 btrfs_set_item_size(buf, item2, item1_size);
2396
2397                 path->slots[0] = slot;
2398                 btrfs_set_item_key_unsafe(root, path, &k2);
2399                 path->slots[0] = slot + 1;
2400                 btrfs_set_item_key_unsafe(root, path, &k1);
2401         }
2402         return 0;
2403 }
2404
2405 /*
2406  * Attempt to fix basic block failures.  Currently we only handle bad key
2407  * orders, we will cycle through the keys and swap them if necessary.
2408  */
2409 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2410                                 struct btrfs_root *root,
2411                                 struct extent_buffer *buf,
2412                                 struct btrfs_disk_key *parent_key,
2413                                 enum btrfs_tree_block_status status)
2414 {
2415         struct btrfs_path *path;
2416         struct btrfs_key k1, k2;
2417         int i;
2418         int level;
2419         int ret;
2420
2421         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2422                 return -EIO;
2423
2424         k1.objectid = btrfs_header_owner(buf);
2425         k1.type = BTRFS_ROOT_ITEM_KEY;
2426         k1.offset = (u64)-1;
2427
2428         root = btrfs_read_fs_root(root->fs_info, &k1);
2429         if (IS_ERR(root))
2430                 return -EIO;
2431
2432         path = btrfs_alloc_path();
2433         if (!path)
2434                 return -EIO;
2435
2436         level = btrfs_header_level(buf);
2437         path->lowest_level = level;
2438         path->skip_check_block = 1;
2439         if (level)
2440                 btrfs_node_key_to_cpu(buf, &k1, 0);
2441         else
2442                 btrfs_item_key_to_cpu(buf, &k1, 0);
2443
2444         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2445         if (ret) {
2446                 btrfs_free_path(path);
2447                 return -EIO;
2448         }
2449
2450         buf = path->nodes[level];
2451         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2452                 if (level) {
2453                         btrfs_node_key_to_cpu(buf, &k1, i);
2454                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2455                 } else {
2456                         btrfs_item_key_to_cpu(buf, &k1, i);
2457                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2458                 }
2459                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2460                         continue;
2461                 ret = swap_values(root, path, buf, i);
2462                 if (ret)
2463                         break;
2464                 btrfs_mark_buffer_dirty(buf);
2465                 i = 0;
2466         }
2467
2468         btrfs_free_path(path);
2469         return ret;
2470 }
2471
2472 static int check_block(struct btrfs_trans_handle *trans,
2473                        struct btrfs_root *root,
2474                        struct cache_tree *extent_cache,
2475                        struct extent_buffer *buf, u64 flags)
2476 {
2477         struct extent_record *rec;
2478         struct cache_extent *cache;
2479         struct btrfs_key key;
2480         enum btrfs_tree_block_status status;
2481         int ret = 0;
2482         int level;
2483
2484         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2485         if (!cache)
2486                 return 1;
2487         rec = container_of(cache, struct extent_record, cache);
2488         rec->generation = btrfs_header_generation(buf);
2489
2490         level = btrfs_header_level(buf);
2491         if (btrfs_header_nritems(buf) > 0) {
2492
2493                 if (level == 0)
2494                         btrfs_item_key_to_cpu(buf, &key, 0);
2495                 else
2496                         btrfs_node_key_to_cpu(buf, &key, 0);
2497
2498                 rec->info_objectid = key.objectid;
2499         }
2500         rec->info_level = level;
2501
2502         if (btrfs_is_leaf(buf))
2503                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2504         else
2505                 status = btrfs_check_node(root, &rec->parent_key, buf);
2506
2507         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2508                 if (repair)
2509                         status = try_to_fix_bad_block(trans, root, buf,
2510                                                       &rec->parent_key,
2511                                                       status);
2512                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2513                         ret = -EIO;
2514                         fprintf(stderr, "bad block %llu\n",
2515                                 (unsigned long long)buf->start);
2516                 } else {
2517                         /*
2518                          * Signal to callers we need to start the scan over
2519                          * again since we'll have cow'ed blocks.
2520                          */
2521                         ret = -EAGAIN;
2522                 }
2523         } else {
2524                 rec->content_checked = 1;
2525                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2526                         rec->owner_ref_checked = 1;
2527                 else {
2528                         ret = check_owner_ref(root, rec, buf);
2529                         if (!ret)
2530                                 rec->owner_ref_checked = 1;
2531                 }
2532         }
2533         if (!ret)
2534                 maybe_free_extent_rec(extent_cache, rec);
2535         return ret;
2536 }
2537
2538 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2539                                                 u64 parent, u64 root)
2540 {
2541         struct list_head *cur = rec->backrefs.next;
2542         struct extent_backref *node;
2543         struct tree_backref *back;
2544
2545         while(cur != &rec->backrefs) {
2546                 node = list_entry(cur, struct extent_backref, list);
2547                 cur = cur->next;
2548                 if (node->is_data)
2549                         continue;
2550                 back = (struct tree_backref *)node;
2551                 if (parent > 0) {
2552                         if (!node->full_backref)
2553                                 continue;
2554                         if (parent == back->parent)
2555                                 return back;
2556                 } else {
2557                         if (node->full_backref)
2558                                 continue;
2559                         if (back->root == root)
2560                                 return back;
2561                 }
2562         }
2563         return NULL;
2564 }
2565
2566 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2567                                                 u64 parent, u64 root)
2568 {
2569         struct tree_backref *ref = malloc(sizeof(*ref));
2570         memset(&ref->node, 0, sizeof(ref->node));
2571         if (parent > 0) {
2572                 ref->parent = parent;
2573                 ref->node.full_backref = 1;
2574         } else {
2575                 ref->root = root;
2576                 ref->node.full_backref = 0;
2577         }
2578         list_add_tail(&ref->node.list, &rec->backrefs);
2579
2580         return ref;
2581 }
2582
2583 static struct data_backref *find_data_backref(struct extent_record *rec,
2584                                                 u64 parent, u64 root,
2585                                                 u64 owner, u64 offset,
2586                                                 int found_ref,
2587                                                 u64 disk_bytenr, u64 bytes)
2588 {
2589         struct list_head *cur = rec->backrefs.next;
2590         struct extent_backref *node;
2591         struct data_backref *back;
2592
2593         while(cur != &rec->backrefs) {
2594                 node = list_entry(cur, struct extent_backref, list);
2595                 cur = cur->next;
2596                 if (!node->is_data)
2597                         continue;
2598                 back = (struct data_backref *)node;
2599                 if (parent > 0) {
2600                         if (!node->full_backref)
2601                                 continue;
2602                         if (parent == back->parent)
2603                                 return back;
2604                 } else {
2605                         if (node->full_backref)
2606                                 continue;
2607                         if (back->root == root && back->owner == owner &&
2608                             back->offset == offset) {
2609                                 if (found_ref && node->found_ref &&
2610                                     (back->bytes != bytes ||
2611                                     back->disk_bytenr != disk_bytenr))
2612                                         continue;
2613                                 return back;
2614                         }
2615                 }
2616         }
2617         return NULL;
2618 }
2619
2620 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2621                                                 u64 parent, u64 root,
2622                                                 u64 owner, u64 offset,
2623                                                 u64 max_size)
2624 {
2625         struct data_backref *ref = malloc(sizeof(*ref));
2626         memset(&ref->node, 0, sizeof(ref->node));
2627         ref->node.is_data = 1;
2628
2629         if (parent > 0) {
2630                 ref->parent = parent;
2631                 ref->owner = 0;
2632                 ref->offset = 0;
2633                 ref->node.full_backref = 1;
2634         } else {
2635                 ref->root = root;
2636                 ref->owner = owner;
2637                 ref->offset = offset;
2638                 ref->node.full_backref = 0;
2639         }
2640         ref->bytes = max_size;
2641         ref->found_ref = 0;
2642         ref->num_refs = 0;
2643         list_add_tail(&ref->node.list, &rec->backrefs);
2644         if (max_size > rec->max_size)
2645                 rec->max_size = max_size;
2646         return ref;
2647 }
2648
2649 static int add_extent_rec(struct cache_tree *extent_cache,
2650                           struct btrfs_key *parent_key, u64 parent_gen,
2651                           u64 start, u64 nr, u64 extent_item_refs,
2652                           int is_root, int inc_ref, int set_checked,
2653                           int metadata, int extent_rec, u64 max_size)
2654 {
2655         struct extent_record *rec;
2656         struct cache_extent *cache;
2657         int ret = 0;
2658         int dup = 0;
2659
2660         cache = lookup_cache_extent(extent_cache, start, nr);
2661         if (cache) {
2662                 rec = container_of(cache, struct extent_record, cache);
2663                 if (inc_ref)
2664                         rec->refs++;
2665                 if (rec->nr == 1)
2666                         rec->nr = max(nr, max_size);
2667
2668                 /*
2669                  * We need to make sure to reset nr to whatever the extent
2670                  * record says was the real size, this way we can compare it to
2671                  * the backrefs.
2672                  */
2673                 if (extent_rec) {
2674                         if (start != rec->start || rec->found_rec) {
2675                                 struct extent_record *tmp;
2676
2677                                 dup = 1;
2678                                 if (list_empty(&rec->list))
2679                                         list_add_tail(&rec->list,
2680                                                       &duplicate_extents);
2681
2682                                 /*
2683                                  * We have to do this song and dance in case we
2684                                  * find an extent record that falls inside of
2685                                  * our current extent record but does not have
2686                                  * the same objectid.
2687                                  */
2688                                 tmp = malloc(sizeof(*tmp));
2689                                 if (!tmp)
2690                                         return -ENOMEM;
2691                                 tmp->start = start;
2692                                 tmp->max_size = max_size;
2693                                 tmp->nr = nr;
2694                                 tmp->found_rec = 1;
2695                                 tmp->metadata = metadata;
2696                                 tmp->extent_item_refs = extent_item_refs;
2697                                 INIT_LIST_HEAD(&tmp->list);
2698                                 list_add_tail(&tmp->list, &rec->dups);
2699                                 rec->num_duplicates++;
2700                         } else {
2701                                 rec->nr = nr;
2702                                 rec->found_rec = 1;
2703                         }
2704                 }
2705
2706                 if (extent_item_refs && !dup) {
2707                         if (rec->extent_item_refs) {
2708                                 fprintf(stderr, "block %llu rec "
2709                                         "extent_item_refs %llu, passed %llu\n",
2710                                         (unsigned long long)start,
2711                                         (unsigned long long)
2712                                                         rec->extent_item_refs,
2713                                         (unsigned long long)extent_item_refs);
2714                         }
2715                         rec->extent_item_refs = extent_item_refs;
2716                 }
2717                 if (is_root)
2718                         rec->is_root = 1;
2719                 if (set_checked) {
2720                         rec->content_checked = 1;
2721                         rec->owner_ref_checked = 1;
2722                 }
2723
2724                 if (parent_key)
2725                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2726                 if (parent_gen)
2727                         rec->parent_generation = parent_gen;
2728
2729                 if (rec->max_size < max_size)
2730                         rec->max_size = max_size;
2731
2732                 maybe_free_extent_rec(extent_cache, rec);
2733                 return ret;
2734         }
2735         rec = malloc(sizeof(*rec));
2736         rec->start = start;
2737         rec->max_size = max_size;
2738         rec->nr = max(nr, max_size);
2739         rec->found_rec = !!extent_rec;
2740         rec->content_checked = 0;
2741         rec->owner_ref_checked = 0;
2742         rec->num_duplicates = 0;
2743         rec->metadata = metadata;
2744         INIT_LIST_HEAD(&rec->backrefs);
2745         INIT_LIST_HEAD(&rec->dups);
2746         INIT_LIST_HEAD(&rec->list);
2747
2748         if (is_root)
2749                 rec->is_root = 1;
2750         else
2751                 rec->is_root = 0;
2752
2753         if (inc_ref)
2754                 rec->refs = 1;
2755         else
2756                 rec->refs = 0;
2757
2758         if (extent_item_refs)
2759                 rec->extent_item_refs = extent_item_refs;
2760         else
2761                 rec->extent_item_refs = 0;
2762
2763         if (parent_key)
2764                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2765         else
2766                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2767
2768         if (parent_gen)
2769                 rec->parent_generation = parent_gen;
2770         else
2771                 rec->parent_generation = 0;
2772
2773         rec->cache.start = start;
2774         rec->cache.size = nr;
2775         ret = insert_cache_extent(extent_cache, &rec->cache);
2776         BUG_ON(ret);
2777         bytes_used += nr;
2778         if (set_checked) {
2779                 rec->content_checked = 1;
2780                 rec->owner_ref_checked = 1;
2781         }
2782         return ret;
2783 }
2784
2785 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2786                             u64 parent, u64 root, int found_ref)
2787 {
2788         struct extent_record *rec;
2789         struct tree_backref *back;
2790         struct cache_extent *cache;
2791
2792         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2793         if (!cache) {
2794                 add_extent_rec(extent_cache, NULL, 0, bytenr,
2795                                1, 0, 0, 0, 0, 1, 0, 0);
2796                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2797                 if (!cache)
2798                         abort();
2799         }
2800
2801         rec = container_of(cache, struct extent_record, cache);
2802         if (rec->start != bytenr) {
2803                 abort();
2804         }
2805
2806         back = find_tree_backref(rec, parent, root);
2807         if (!back)
2808                 back = alloc_tree_backref(rec, parent, root);
2809
2810         if (found_ref) {
2811                 if (back->node.found_ref) {
2812                         fprintf(stderr, "Extent back ref already exists "
2813                                 "for %llu parent %llu root %llu \n",
2814                                 (unsigned long long)bytenr,
2815                                 (unsigned long long)parent,
2816                                 (unsigned long long)root);
2817                 }
2818                 back->node.found_ref = 1;
2819         } else {
2820                 if (back->node.found_extent_tree) {
2821                         fprintf(stderr, "Extent back ref already exists "
2822                                 "for %llu parent %llu root %llu \n",
2823                                 (unsigned long long)bytenr,
2824                                 (unsigned long long)parent,
2825                                 (unsigned long long)root);
2826                 }
2827                 back->node.found_extent_tree = 1;
2828         }
2829         maybe_free_extent_rec(extent_cache, rec);
2830         return 0;
2831 }
2832
2833 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2834                             u64 parent, u64 root, u64 owner, u64 offset,
2835                             u32 num_refs, int found_ref, u64 max_size)
2836 {
2837         struct extent_record *rec;
2838         struct data_backref *back;
2839         struct cache_extent *cache;
2840
2841         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2842         if (!cache) {
2843                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
2844                                0, 0, max_size);
2845                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2846                 if (!cache)
2847                         abort();
2848         }
2849
2850         rec = container_of(cache, struct extent_record, cache);
2851         if (rec->max_size < max_size)
2852                 rec->max_size = max_size;
2853
2854         /*
2855          * If found_ref is set then max_size is the real size and must match the
2856          * existing refs.  So if we have already found a ref then we need to
2857          * make sure that this ref matches the existing one, otherwise we need
2858          * to add a new backref so we can notice that the backrefs don't match
2859          * and we need to figure out who is telling the truth.  This is to
2860          * account for that awful fsync bug I introduced where we'd end up with
2861          * a btrfs_file_extent_item that would have its length include multiple
2862          * prealloc extents or point inside of a prealloc extent.
2863          */
2864         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2865                                  bytenr, max_size);
2866         if (!back)
2867                 back = alloc_data_backref(rec, parent, root, owner, offset,
2868                                           max_size);
2869
2870         if (found_ref) {
2871                 BUG_ON(num_refs != 1);
2872                 if (back->node.found_ref)
2873                         BUG_ON(back->bytes != max_size);
2874                 back->node.found_ref = 1;
2875                 back->found_ref += 1;
2876                 back->bytes = max_size;
2877                 back->disk_bytenr = bytenr;
2878                 rec->refs += 1;
2879                 rec->content_checked = 1;
2880                 rec->owner_ref_checked = 1;
2881         } else {
2882                 if (back->node.found_extent_tree) {
2883                         fprintf(stderr, "Extent back ref already exists "
2884                                 "for %llu parent %llu root %llu "
2885                                 "owner %llu offset %llu num_refs %lu\n",
2886                                 (unsigned long long)bytenr,
2887                                 (unsigned long long)parent,
2888                                 (unsigned long long)root,
2889                                 (unsigned long long)owner,
2890                                 (unsigned long long)offset,
2891                                 (unsigned long)num_refs);
2892                 }
2893                 back->num_refs = num_refs;
2894                 back->node.found_extent_tree = 1;
2895         }
2896         maybe_free_extent_rec(extent_cache, rec);
2897         return 0;
2898 }
2899
2900 static int add_pending(struct cache_tree *pending,
2901                        struct cache_tree *seen, u64 bytenr, u32 size)
2902 {
2903         int ret;
2904         ret = add_cache_extent(seen, bytenr, size);
2905         if (ret)
2906                 return ret;
2907         add_cache_extent(pending, bytenr, size);
2908         return 0;
2909 }
2910
2911 static int pick_next_pending(struct cache_tree *pending,
2912                         struct cache_tree *reada,
2913                         struct cache_tree *nodes,
2914                         u64 last, struct block_info *bits, int bits_nr,
2915                         int *reada_bits)
2916 {
2917         unsigned long node_start = last;
2918         struct cache_extent *cache;
2919         int ret;
2920
2921         cache = search_cache_extent(reada, 0);
2922         if (cache) {
2923                 bits[0].start = cache->start;
2924                 bits[0].size = cache->size;
2925                 *reada_bits = 1;
2926                 return 1;
2927         }
2928         *reada_bits = 0;
2929         if (node_start > 32768)
2930                 node_start -= 32768;
2931
2932         cache = search_cache_extent(nodes, node_start);
2933         if (!cache)
2934                 cache = search_cache_extent(nodes, 0);
2935
2936         if (!cache) {
2937                  cache = search_cache_extent(pending, 0);
2938                  if (!cache)
2939                          return 0;
2940                  ret = 0;
2941                  do {
2942                          bits[ret].start = cache->start;
2943                          bits[ret].size = cache->size;
2944                          cache = next_cache_extent(cache);
2945                          ret++;
2946                  } while (cache && ret < bits_nr);
2947                  return ret;
2948         }
2949
2950         ret = 0;
2951         do {
2952                 bits[ret].start = cache->start;
2953                 bits[ret].size = cache->size;
2954                 cache = next_cache_extent(cache);
2955                 ret++;
2956         } while (cache && ret < bits_nr);
2957
2958         if (bits_nr - ret > 8) {
2959                 u64 lookup = bits[0].start + bits[0].size;
2960                 struct cache_extent *next;
2961                 next = search_cache_extent(pending, lookup);
2962                 while(next) {
2963                         if (next->start - lookup > 32768)
2964                                 break;
2965                         bits[ret].start = next->start;
2966                         bits[ret].size = next->size;
2967                         lookup = next->start + next->size;
2968                         ret++;
2969                         if (ret == bits_nr)
2970                                 break;
2971                         next = next_cache_extent(next);
2972                         if (!next)
2973                                 break;
2974                 }
2975         }
2976         return ret;
2977 }
2978
2979 static void free_chunk_record(struct cache_extent *cache)
2980 {
2981         struct chunk_record *rec;
2982
2983         rec = container_of(cache, struct chunk_record, cache);
2984         free(rec);
2985 }
2986
2987 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
2988 {
2989         cache_tree_free_extents(chunk_cache, free_chunk_record);
2990 }
2991
2992 static void free_device_record(struct rb_node *node)
2993 {
2994         struct device_record *rec;
2995
2996         rec = container_of(node, struct device_record, node);
2997         free(rec);
2998 }
2999
3000 FREE_RB_BASED_TREE(device_cache, free_device_record);
3001
3002 int insert_block_group_record(struct block_group_tree *tree,
3003                               struct block_group_record *bg_rec)
3004 {
3005         int ret;
3006
3007         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3008         if (ret)
3009                 return ret;
3010
3011         list_add_tail(&bg_rec->list, &tree->block_groups);
3012         return 0;
3013 }
3014
3015 static void free_block_group_record(struct cache_extent *cache)
3016 {
3017         struct block_group_record *rec;
3018
3019         rec = container_of(cache, struct block_group_record, cache);
3020         free(rec);
3021 }
3022
3023 void free_block_group_tree(struct block_group_tree *tree)
3024 {
3025         cache_tree_free_extents(&tree->tree, free_block_group_record);
3026 }
3027
3028 int insert_device_extent_record(struct device_extent_tree *tree,
3029                                 struct device_extent_record *de_rec)
3030 {
3031         int ret;
3032
3033         /*
3034          * Device extent is a bit different from the other extents, because
3035          * the extents which belong to the different devices may have the
3036          * same start and size, so we need use the special extent cache
3037          * search/insert functions.
3038          */
3039         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3040         if (ret)
3041                 return ret;
3042
3043         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3044         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3045         return 0;
3046 }
3047
3048 static void free_device_extent_record(struct cache_extent *cache)
3049 {
3050         struct device_extent_record *rec;
3051
3052         rec = container_of(cache, struct device_extent_record, cache);
3053         free(rec);
3054 }
3055
3056 void free_device_extent_tree(struct device_extent_tree *tree)
3057 {
3058         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3059 }
3060
3061 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3062 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3063                                  struct extent_buffer *leaf, int slot)
3064 {
3065         struct btrfs_extent_ref_v0 *ref0;
3066         struct btrfs_key key;
3067
3068         btrfs_item_key_to_cpu(leaf, &key, slot);
3069         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3070         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3071                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3072         } else {
3073                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3074                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3075         }
3076         return 0;
3077 }
3078 #endif
3079
3080 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3081                                             struct btrfs_key *key,
3082                                             int slot)
3083 {
3084         struct btrfs_chunk *ptr;
3085         struct chunk_record *rec;
3086         int num_stripes, i;
3087
3088         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3089         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3090
3091         rec = malloc(btrfs_chunk_record_size(num_stripes));
3092         if (!rec) {
3093                 fprintf(stderr, "memory allocation failed\n");
3094                 exit(-1);
3095         }
3096
3097         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3098
3099         INIT_LIST_HEAD(&rec->list);
3100         INIT_LIST_HEAD(&rec->dextents);
3101         rec->bg_rec = NULL;
3102
3103         rec->cache.start = key->offset;
3104         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3105
3106         rec->generation = btrfs_header_generation(leaf);
3107
3108         rec->objectid = key->objectid;
3109         rec->type = key->type;
3110         rec->offset = key->offset;
3111
3112         rec->length = rec->cache.size;
3113         rec->owner = btrfs_chunk_owner(leaf, ptr);
3114         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3115         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3116         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3117         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3118         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3119         rec->num_stripes = num_stripes;
3120         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3121
3122         for (i = 0; i < rec->num_stripes; ++i) {
3123                 rec->stripes[i].devid =
3124                         btrfs_stripe_devid_nr(leaf, ptr, i);
3125                 rec->stripes[i].offset =
3126                         btrfs_stripe_offset_nr(leaf, ptr, i);
3127                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3128                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3129                                 BTRFS_UUID_SIZE);
3130         }
3131
3132         return rec;
3133 }
3134
3135 static int process_chunk_item(struct cache_tree *chunk_cache,
3136                               struct btrfs_key *key, struct extent_buffer *eb,
3137                               int slot)
3138 {
3139         struct chunk_record *rec;
3140         int ret = 0;
3141
3142         rec = btrfs_new_chunk_record(eb, key, slot);
3143         ret = insert_cache_extent(chunk_cache, &rec->cache);
3144         if (ret) {
3145                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3146                         rec->offset, rec->length);
3147                 free(rec);
3148         }
3149
3150         return ret;
3151 }
3152
3153 static int process_device_item(struct rb_root *dev_cache,
3154                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3155 {
3156         struct btrfs_dev_item *ptr;
3157         struct device_record *rec;
3158         int ret = 0;
3159
3160         ptr = btrfs_item_ptr(eb,
3161                 slot, struct btrfs_dev_item);
3162
3163         rec = malloc(sizeof(*rec));
3164         if (!rec) {
3165                 fprintf(stderr, "memory allocation failed\n");
3166                 return -ENOMEM;
3167         }
3168
3169         rec->devid = key->offset;
3170         rec->generation = btrfs_header_generation(eb);
3171
3172         rec->objectid = key->objectid;
3173         rec->type = key->type;
3174         rec->offset = key->offset;
3175
3176         rec->devid = btrfs_device_id(eb, ptr);
3177         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3178         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3179
3180         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3181         if (ret) {
3182                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3183                 free(rec);
3184         }
3185
3186         return ret;
3187 }
3188
3189 struct block_group_record *
3190 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3191                              int slot)
3192 {
3193         struct btrfs_block_group_item *ptr;
3194         struct block_group_record *rec;
3195
3196         rec = malloc(sizeof(*rec));
3197         if (!rec) {
3198                 fprintf(stderr, "memory allocation failed\n");
3199                 exit(-1);
3200         }
3201         memset(rec, 0, sizeof(*rec));
3202
3203         rec->cache.start = key->objectid;
3204         rec->cache.size = key->offset;
3205
3206         rec->generation = btrfs_header_generation(leaf);
3207
3208         rec->objectid = key->objectid;
3209         rec->type = key->type;
3210         rec->offset = key->offset;
3211
3212         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3213         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3214
3215         INIT_LIST_HEAD(&rec->list);
3216
3217         return rec;
3218 }
3219
3220 static int process_block_group_item(struct block_group_tree *block_group_cache,
3221                                     struct btrfs_key *key,
3222                                     struct extent_buffer *eb, int slot)
3223 {
3224         struct block_group_record *rec;
3225         int ret = 0;
3226
3227         rec = btrfs_new_block_group_record(eb, key, slot);
3228         ret = insert_block_group_record(block_group_cache, rec);
3229         if (ret) {
3230                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3231                         rec->objectid, rec->offset);
3232                 free(rec);
3233         }
3234
3235         return ret;
3236 }
3237
3238 struct device_extent_record *
3239 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3240                                struct btrfs_key *key, int slot)
3241 {
3242         struct device_extent_record *rec;
3243         struct btrfs_dev_extent *ptr;
3244
3245         rec = malloc(sizeof(*rec));
3246         if (!rec) {
3247                 fprintf(stderr, "memory allocation failed\n");
3248                 exit(-1);
3249         }
3250         memset(rec, 0, sizeof(*rec));
3251
3252         rec->cache.objectid = key->objectid;
3253         rec->cache.start = key->offset;
3254
3255         rec->generation = btrfs_header_generation(leaf);
3256
3257         rec->objectid = key->objectid;
3258         rec->type = key->type;
3259         rec->offset = key->offset;
3260
3261         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3262         rec->chunk_objecteid =
3263                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3264         rec->chunk_offset =
3265                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3266         rec->length = btrfs_dev_extent_length(leaf, ptr);
3267         rec->cache.size = rec->length;
3268
3269         INIT_LIST_HEAD(&rec->chunk_list);
3270         INIT_LIST_HEAD(&rec->device_list);
3271
3272         return rec;
3273 }
3274
3275 static int
3276 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3277                            struct btrfs_key *key, struct extent_buffer *eb,
3278                            int slot)
3279 {
3280         struct device_extent_record *rec;
3281         int ret;
3282
3283         rec = btrfs_new_device_extent_record(eb, key, slot);
3284         ret = insert_device_extent_record(dev_extent_cache, rec);
3285         if (ret) {
3286                 fprintf(stderr,
3287                         "Device extent[%llu, %llu, %llu] existed.\n",
3288                         rec->objectid, rec->offset, rec->length);
3289                 free(rec);
3290         }
3291
3292         return ret;
3293 }
3294
3295 static int process_extent_item(struct btrfs_root *root,
3296                                struct cache_tree *extent_cache,
3297                                struct extent_buffer *eb, int slot)
3298 {
3299         struct btrfs_extent_item *ei;
3300         struct btrfs_extent_inline_ref *iref;
3301         struct btrfs_extent_data_ref *dref;
3302         struct btrfs_shared_data_ref *sref;
3303         struct btrfs_key key;
3304         unsigned long end;
3305         unsigned long ptr;
3306         int type;
3307         u32 item_size = btrfs_item_size_nr(eb, slot);
3308         u64 refs = 0;
3309         u64 offset;
3310         u64 num_bytes;
3311         int metadata = 0;
3312
3313         btrfs_item_key_to_cpu(eb, &key, slot);
3314
3315         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3316                 metadata = 1;
3317                 num_bytes = root->leafsize;
3318         } else {
3319                 num_bytes = key.offset;
3320         }
3321
3322         if (item_size < sizeof(*ei)) {
3323 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3324                 struct btrfs_extent_item_v0 *ei0;
3325                 BUG_ON(item_size != sizeof(*ei0));
3326                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3327                 refs = btrfs_extent_refs_v0(eb, ei0);
3328 #else
3329                 BUG();
3330 #endif
3331                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3332                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3333                                       num_bytes);
3334         }
3335
3336         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3337         refs = btrfs_extent_refs(eb, ei);
3338
3339         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3340                        refs, 0, 0, 0, metadata, 1, num_bytes);
3341
3342         ptr = (unsigned long)(ei + 1);
3343         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3344             key.type == BTRFS_EXTENT_ITEM_KEY)
3345                 ptr += sizeof(struct btrfs_tree_block_info);
3346
3347         end = (unsigned long)ei + item_size;
3348         while (ptr < end) {
3349                 iref = (struct btrfs_extent_inline_ref *)ptr;
3350                 type = btrfs_extent_inline_ref_type(eb, iref);
3351                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3352                 switch (type) {
3353                 case BTRFS_TREE_BLOCK_REF_KEY:
3354                         add_tree_backref(extent_cache, key.objectid,
3355                                          0, offset, 0);
3356                         break;
3357                 case BTRFS_SHARED_BLOCK_REF_KEY:
3358                         add_tree_backref(extent_cache, key.objectid,
3359                                          offset, 0, 0);
3360                         break;
3361                 case BTRFS_EXTENT_DATA_REF_KEY:
3362                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3363                         add_data_backref(extent_cache, key.objectid, 0,
3364                                         btrfs_extent_data_ref_root(eb, dref),
3365                                         btrfs_extent_data_ref_objectid(eb,
3366                                                                        dref),
3367                                         btrfs_extent_data_ref_offset(eb, dref),
3368                                         btrfs_extent_data_ref_count(eb, dref),
3369                                         0, num_bytes);
3370                         break;
3371                 case BTRFS_SHARED_DATA_REF_KEY:
3372                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3373                         add_data_backref(extent_cache, key.objectid, offset,
3374                                         0, 0, 0,
3375                                         btrfs_shared_data_ref_count(eb, sref),
3376                                         0, num_bytes);
3377                         break;
3378                 default:
3379                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3380                                 key.objectid, key.type, num_bytes);
3381                         goto out;
3382                 }
3383                 ptr += btrfs_extent_inline_ref_size(type);
3384         }
3385         WARN_ON(ptr > end);
3386 out:
3387         return 0;
3388 }
3389
3390 static int check_cache_range(struct btrfs_root *root,
3391                              struct btrfs_block_group_cache *cache,
3392                              u64 offset, u64 bytes)
3393 {
3394         struct btrfs_free_space *entry;
3395         u64 *logical;
3396         u64 bytenr;
3397         int stripe_len;
3398         int i, nr, ret;
3399
3400         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3401                 bytenr = btrfs_sb_offset(i);
3402                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3403                                        cache->key.objectid, bytenr, 0,
3404                                        &logical, &nr, &stripe_len);
3405                 if (ret)
3406                         return ret;
3407
3408                 while (nr--) {
3409                         if (logical[nr] + stripe_len <= offset)
3410                                 continue;
3411                         if (offset + bytes <= logical[nr])
3412                                 continue;
3413                         if (logical[nr] == offset) {
3414                                 if (stripe_len >= bytes) {
3415                                         kfree(logical);
3416                                         return 0;
3417                                 }
3418                                 bytes -= stripe_len;
3419                                 offset += stripe_len;
3420                         } else if (logical[nr] < offset) {
3421                                 if (logical[nr] + stripe_len >=
3422                                     offset + bytes) {
3423                                         kfree(logical);
3424                                         return 0;
3425                                 }
3426                                 bytes = (offset + bytes) -
3427                                         (logical[nr] + stripe_len);
3428                                 offset = logical[nr] + stripe_len;
3429                         } else {
3430                                 /*
3431                                  * Could be tricky, the super may land in the
3432                                  * middle of the area we're checking.  First
3433                                  * check the easiest case, it's at the end.
3434                                  */
3435                                 if (logical[nr] + stripe_len >=
3436                                     bytes + offset) {
3437                                         bytes = logical[nr] - offset;
3438                                         continue;
3439                                 }
3440
3441                                 /* Check the left side */
3442                                 ret = check_cache_range(root, cache,
3443                                                         offset,
3444                                                         logical[nr] - offset);
3445                                 if (ret) {
3446                                         kfree(logical);
3447                                         return ret;
3448                                 }
3449
3450                                 /* Now we continue with the right side */
3451                                 bytes = (offset + bytes) -
3452                                         (logical[nr] + stripe_len);
3453                                 offset = logical[nr] + stripe_len;
3454                         }
3455                 }
3456
3457                 kfree(logical);
3458         }
3459
3460         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3461         if (!entry) {
3462                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3463                         offset, offset+bytes);
3464                 return -EINVAL;
3465         }
3466
3467         if (entry->offset != offset) {
3468                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3469                         entry->offset);
3470                 return -EINVAL;
3471         }
3472
3473         if (entry->bytes != bytes) {
3474                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3475                         bytes, entry->bytes, offset);
3476                 return -EINVAL;
3477         }
3478
3479         unlink_free_space(cache->free_space_ctl, entry);
3480         free(entry);
3481         return 0;
3482 }
3483
3484 static int verify_space_cache(struct btrfs_root *root,
3485                               struct btrfs_block_group_cache *cache)
3486 {
3487         struct btrfs_path *path;
3488         struct extent_buffer *leaf;
3489         struct btrfs_key key;
3490         u64 last;
3491         int ret = 0;
3492
3493         path = btrfs_alloc_path();
3494         if (!path)
3495                 return -ENOMEM;
3496
3497         root = root->fs_info->extent_root;
3498
3499         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3500
3501         key.objectid = last;
3502         key.offset = 0;
3503         key.type = BTRFS_EXTENT_ITEM_KEY;
3504
3505         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3506         if (ret < 0)
3507                 goto out;
3508         ret = 0;
3509         while (1) {
3510                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3511                         ret = btrfs_next_leaf(root, path);
3512                         if (ret < 0)
3513                                 goto out;
3514                         if (ret > 0) {
3515                                 ret = 0;
3516                                 break;
3517                         }
3518                 }
3519                 leaf = path->nodes[0];
3520                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3521                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3522                         break;
3523                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3524                     key.type != BTRFS_METADATA_ITEM_KEY) {
3525                         path->slots[0]++;
3526                         continue;
3527                 }
3528
3529                 if (last == key.objectid) {
3530                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3531                                 last = key.objectid + key.offset;
3532                         else
3533                                 last = key.objectid + root->leafsize;
3534                         path->slots[0]++;
3535                         continue;
3536                 }
3537
3538                 ret = check_cache_range(root, cache, last,
3539                                         key.objectid - last);
3540                 if (ret)
3541                         break;
3542                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3543                         last = key.objectid + key.offset;
3544                 else
3545                         last = key.objectid + root->leafsize;
3546                 path->slots[0]++;
3547         }
3548
3549         if (last < cache->key.objectid + cache->key.offset)
3550                 ret = check_cache_range(root, cache, last,
3551                                         cache->key.objectid +
3552                                         cache->key.offset - last);
3553
3554 out:
3555         btrfs_free_path(path);
3556
3557         if (!ret &&
3558             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3559                 fprintf(stderr, "There are still entries left in the space "
3560                         "cache\n");
3561                 ret = -EINVAL;
3562         }
3563
3564         return ret;
3565 }
3566
3567 static int check_space_cache(struct btrfs_root *root)
3568 {
3569         struct btrfs_block_group_cache *cache;
3570         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3571         int ret;
3572         int error = 0;
3573
3574         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3575             btrfs_super_generation(root->fs_info->super_copy) !=
3576             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3577                 printf("cache and super generation don't match, space cache "
3578                        "will be invalidated\n");
3579                 return 0;
3580         }
3581
3582         while (1) {
3583                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3584                 if (!cache)
3585                         break;
3586
3587                 start = cache->key.objectid + cache->key.offset;
3588                 if (!cache->free_space_ctl) {
3589                         if (btrfs_init_free_space_ctl(cache,
3590                                                       root->sectorsize)) {
3591                                 ret = -ENOMEM;
3592                                 break;
3593                         }
3594                 } else {
3595                         btrfs_remove_free_space_cache(cache);
3596                 }
3597
3598                 ret = load_free_space_cache(root->fs_info, cache);
3599                 if (!ret)
3600                         continue;
3601
3602                 ret = verify_space_cache(root, cache);
3603                 if (ret) {
3604                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3605                                 cache->key.objectid);
3606                         error++;
3607                 }
3608         }
3609
3610         return error ? -EINVAL : 0;
3611 }
3612
3613 static int read_extent_data(struct btrfs_root *root, char *data,
3614                         u64 logical, u64 *len, int mirror)
3615 {
3616         u64 offset = 0;
3617         struct btrfs_multi_bio *multi = NULL;
3618         struct btrfs_fs_info *info = root->fs_info;
3619         struct btrfs_device *device;
3620         int ret = 0;
3621         u64 max_len = *len;
3622
3623         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
3624                               &multi, mirror, NULL);
3625         if (ret) {
3626                 fprintf(stderr, "Couldn't map the block %llu\n",
3627                                 logical + offset);
3628                 goto err;
3629         }
3630         device = multi->stripes[0].dev;
3631
3632         if (device->fd == 0)
3633                 goto err;
3634         if (*len > max_len)
3635                 *len = max_len;
3636
3637         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
3638         if (ret != *len)
3639                 ret = -EIO;
3640         else
3641                 ret = 0;
3642 err:
3643         kfree(multi);
3644         return ret;
3645 }
3646
3647 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
3648                         u64 num_bytes, unsigned long leaf_offset,
3649                         struct extent_buffer *eb) {
3650
3651         u64 offset = 0;
3652         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3653         char *data;
3654         unsigned long csum_offset;
3655         u32 csum;
3656         u32 csum_expected;
3657         u64 read_len;
3658         u64 data_checked = 0;
3659         u64 tmp;
3660         int ret = 0;
3661         int mirror;
3662         int num_copies;
3663
3664         if (num_bytes % root->sectorsize)
3665                 return -EINVAL;
3666
3667         data = malloc(num_bytes);
3668         if (!data)
3669                 return -ENOMEM;
3670
3671         while (offset < num_bytes) {
3672                 mirror = 0;
3673 again:
3674                 read_len = num_bytes - offset;
3675                 /* read as much space once a time */
3676                 ret = read_extent_data(root, data + offset,
3677                                 bytenr + offset, &read_len, mirror);
3678                 if (ret)
3679                         goto out;
3680                 data_checked = 0;
3681                 /* verify every 4k data's checksum */
3682                 while (data_checked < read_len) {
3683                         csum = ~(u32)0;
3684                         tmp = offset + data_checked;
3685
3686                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
3687                                                csum, root->sectorsize);
3688                         btrfs_csum_final(csum, (char *)&csum);
3689
3690                         csum_offset = leaf_offset +
3691                                  tmp / root->sectorsize * csum_size;
3692                         read_extent_buffer(eb, (char *)&csum_expected,
3693                                            csum_offset, csum_size);
3694                         /* try another mirror */
3695                         if (csum != csum_expected) {
3696                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
3697                                                 mirror, bytenr + tmp,
3698                                                 csum, csum_expected);
3699                                 num_copies = btrfs_num_copies(
3700                                                 &root->fs_info->mapping_tree,
3701                                                 bytenr, num_bytes);
3702                                 if (mirror < num_copies - 1) {
3703                                         mirror += 1;
3704                                         goto again;
3705                                 }
3706                         }
3707                         data_checked += root->sectorsize;
3708                 }
3709                 offset += read_len;
3710         }
3711 out:
3712         free(data);
3713         return ret;
3714 }
3715
3716 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3717                                u64 num_bytes)
3718 {
3719         struct btrfs_path *path;
3720         struct extent_buffer *leaf;
3721         struct btrfs_key key;
3722         int ret;
3723
3724         path = btrfs_alloc_path();
3725         if (!path) {
3726                 fprintf(stderr, "Error allocing path\n");
3727                 return -ENOMEM;
3728         }
3729
3730         key.objectid = bytenr;
3731         key.type = BTRFS_EXTENT_ITEM_KEY;
3732         key.offset = 0;
3733
3734
3735 again:
3736         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3737                                 0, 0);
3738         if (ret < 0) {
3739                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3740                 btrfs_free_path(path);
3741                 return ret;
3742         } else if (ret) {
3743                 if (path->slots[0])
3744                         path->slots[0]--;
3745                 else
3746                         btrfs_prev_leaf(root, path);
3747         }
3748
3749         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3750
3751         /*
3752          * Block group items come before extent items if they have the same
3753          * bytenr, so walk back one more just in case.  Dear future traveler,
3754          * first congrats on mastering time travel.  Now if it's not too much
3755          * trouble could you go back to 2006 and tell Chris to make the
3756          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
3757          */
3758         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3759                 if (path->slots[0])
3760                         path->slots[0]--;
3761                 else
3762                         btrfs_prev_leaf(root, path);
3763         }
3764
3765         while (num_bytes) {
3766                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3767                         ret = btrfs_next_leaf(root, path);
3768                         if (ret < 0) {
3769                                 fprintf(stderr, "Error going to next leaf "
3770                                         "%d\n", ret);
3771                                 btrfs_free_path(path);
3772                                 return ret;
3773                         } else if (ret) {
3774                                 break;
3775                         }
3776                 }
3777                 leaf = path->nodes[0];
3778                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3779                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3780                         path->slots[0]++;
3781                         continue;
3782                 }
3783                 if (key.objectid + key.offset < bytenr) {
3784                         path->slots[0]++;
3785                         continue;
3786                 }
3787                 if (key.objectid > bytenr + num_bytes)
3788                         break;
3789
3790                 if (key.objectid == bytenr) {
3791                         if (key.offset >= num_bytes) {
3792                                 num_bytes = 0;
3793                                 break;
3794                         }
3795                         num_bytes -= key.offset;
3796                         bytenr += key.offset;
3797                 } else if (key.objectid < bytenr) {
3798                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3799                                 num_bytes = 0;
3800                                 break;
3801                         }
3802                         num_bytes = (bytenr + num_bytes) -
3803                                 (key.objectid + key.offset);
3804                         bytenr = key.objectid + key.offset;
3805                 } else {
3806                         if (key.objectid + key.offset < bytenr + num_bytes) {
3807                                 u64 new_start = key.objectid + key.offset;
3808                                 u64 new_bytes = bytenr + num_bytes - new_start;
3809
3810                                 /*
3811                                  * Weird case, the extent is in the middle of
3812                                  * our range, we'll have to search one side
3813                                  * and then the other.  Not sure if this happens
3814                                  * in real life, but no harm in coding it up
3815                                  * anyway just in case.
3816                                  */
3817                                 btrfs_release_path(path);
3818                                 ret = check_extent_exists(root, new_start,
3819                                                           new_bytes);
3820                                 if (ret) {
3821                                         fprintf(stderr, "Right section didn't "
3822                                                 "have a record\n");
3823                                         break;
3824                                 }
3825                                 num_bytes = key.objectid - bytenr;
3826                                 goto again;
3827                         }
3828                         num_bytes = key.objectid - bytenr;
3829                 }
3830                 path->slots[0]++;
3831         }
3832         ret = 0;
3833
3834         if (num_bytes) {
3835                 fprintf(stderr, "There are no extents for csum range "
3836                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3837                 ret = 1;
3838         }
3839
3840         btrfs_free_path(path);
3841         return ret;
3842 }
3843
3844 static int check_csums(struct btrfs_root *root)
3845 {
3846         struct btrfs_path *path;
3847         struct extent_buffer *leaf;
3848         struct btrfs_key key;
3849         u64 offset = 0, num_bytes = 0;
3850         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3851         int errors = 0;
3852         int ret;
3853         u64 data_len;
3854         unsigned long leaf_offset;
3855
3856         root = root->fs_info->csum_root;
3857
3858         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3859         key.type = BTRFS_EXTENT_CSUM_KEY;
3860         key.offset = 0;
3861
3862         path = btrfs_alloc_path();
3863         if (!path)
3864                 return -ENOMEM;
3865
3866         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3867         if (ret < 0) {
3868                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3869                 btrfs_free_path(path);
3870                 return ret;
3871         }
3872
3873         if (ret > 0 && path->slots[0])
3874                 path->slots[0]--;
3875         ret = 0;
3876
3877         while (1) {
3878                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3879                         ret = btrfs_next_leaf(root, path);
3880                         if (ret < 0) {
3881                                 fprintf(stderr, "Error going to next leaf "
3882                                         "%d\n", ret);
3883                                 break;
3884                         }
3885                         if (ret)
3886                                 break;
3887                 }
3888                 leaf = path->nodes[0];
3889
3890                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3891                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3892                         path->slots[0]++;
3893                         continue;
3894                 }
3895
3896                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
3897                               csum_size) * root->sectorsize;
3898                 if (!check_data_csum)
3899                         goto skip_csum_check;
3900                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
3901                 ret = check_extent_csums(root, key.offset, data_len,
3902                                          leaf_offset, leaf);
3903                 if (ret)
3904                         break;
3905 skip_csum_check:
3906                 if (!num_bytes) {
3907                         offset = key.offset;
3908                 } else if (key.offset != offset + num_bytes) {
3909                         ret = check_extent_exists(root, offset, num_bytes);
3910                         if (ret) {
3911                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3912                                         "there is no extent record\n",
3913                                         offset, offset+num_bytes);
3914                                 errors++;
3915                         }
3916                         offset = key.offset;
3917                         num_bytes = 0;
3918                 }
3919                 num_bytes += data_len;
3920                 path->slots[0]++;
3921         }
3922
3923         btrfs_free_path(path);
3924         return errors;
3925 }
3926
3927 static int is_dropped_key(struct btrfs_key *key,
3928                           struct btrfs_key *drop_key) {
3929         if (key->objectid < drop_key->objectid)
3930                 return 1;
3931         else if (key->objectid == drop_key->objectid) {
3932                 if (key->type < drop_key->type)
3933                         return 1;
3934                 else if (key->type == drop_key->type) {
3935                         if (key->offset < drop_key->offset)
3936                                 return 1;
3937                 }
3938         }
3939         return 0;
3940 }
3941
3942 static int run_next_block(struct btrfs_trans_handle *trans,
3943                           struct btrfs_root *root,
3944                           struct block_info *bits,
3945                           int bits_nr,
3946                           u64 *last,
3947                           struct cache_tree *pending,
3948                           struct cache_tree *seen,
3949                           struct cache_tree *reada,
3950                           struct cache_tree *nodes,
3951                           struct cache_tree *extent_cache,
3952                           struct cache_tree *chunk_cache,
3953                           struct rb_root *dev_cache,
3954                           struct block_group_tree *block_group_cache,
3955                           struct device_extent_tree *dev_extent_cache,
3956                           struct btrfs_root_item *ri)
3957 {
3958         struct extent_buffer *buf;
3959         u64 bytenr;
3960         u32 size;
3961         u64 parent;
3962         u64 owner;
3963         u64 flags;
3964         u64 ptr;
3965         u64 gen = 0;
3966         int ret = 0;
3967         int i;
3968         int nritems;
3969         struct btrfs_key key;
3970         struct cache_extent *cache;
3971         int reada_bits;
3972
3973         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3974                                     bits_nr, &reada_bits);
3975         if (nritems == 0)
3976                 return 1;
3977
3978         if (!reada_bits) {
3979                 for(i = 0; i < nritems; i++) {
3980                         ret = add_cache_extent(reada, bits[i].start,
3981                                                bits[i].size);
3982                         if (ret == -EEXIST)
3983                                 continue;
3984
3985                         /* fixme, get the parent transid */
3986                         readahead_tree_block(root, bits[i].start,
3987                                              bits[i].size, 0);
3988                 }
3989         }
3990         *last = bits[0].start;
3991         bytenr = bits[0].start;
3992         size = bits[0].size;
3993
3994         cache = lookup_cache_extent(pending, bytenr, size);
3995         if (cache) {
3996                 remove_cache_extent(pending, cache);
3997                 free(cache);
3998         }
3999         cache = lookup_cache_extent(reada, bytenr, size);
4000         if (cache) {
4001                 remove_cache_extent(reada, cache);
4002                 free(cache);
4003         }
4004         cache = lookup_cache_extent(nodes, bytenr, size);
4005         if (cache) {
4006                 remove_cache_extent(nodes, cache);
4007                 free(cache);
4008         }
4009         cache = lookup_cache_extent(extent_cache, bytenr, size);
4010         if (cache) {
4011                 struct extent_record *rec;
4012
4013                 rec = container_of(cache, struct extent_record, cache);
4014                 gen = rec->parent_generation;
4015         }
4016
4017         /* fixme, get the real parent transid */
4018         buf = read_tree_block(root, bytenr, size, gen);
4019         if (!extent_buffer_uptodate(buf)) {
4020                 record_bad_block_io(root->fs_info,
4021                                     extent_cache, bytenr, size);
4022                 goto out;
4023         }
4024
4025         nritems = btrfs_header_nritems(buf);
4026
4027         /*
4028          * FIXME, this only works only if we don't have any full
4029          * backref mode.
4030          */
4031         if (!init_extent_tree) {
4032                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4033                                        btrfs_header_level(buf), 1, NULL,
4034                                        &flags);
4035                 if (ret < 0)
4036                         flags = 0;
4037         } else {
4038                 flags = 0;
4039         }
4040
4041         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4042                 parent = bytenr;
4043                 owner = 0;
4044         } else {
4045                 parent = 0;
4046                 owner = btrfs_header_owner(buf);
4047         }
4048
4049         ret = check_block(trans, root, extent_cache, buf, flags);
4050         if (ret)
4051                 goto out;
4052
4053         if (btrfs_is_leaf(buf)) {
4054                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4055                 for (i = 0; i < nritems; i++) {
4056                         struct btrfs_file_extent_item *fi;
4057                         btrfs_item_key_to_cpu(buf, &key, i);
4058                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4059                                 process_extent_item(root, extent_cache, buf,
4060                                                     i);
4061                                 continue;
4062                         }
4063                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4064                                 process_extent_item(root, extent_cache, buf,
4065                                                     i);
4066                                 continue;
4067                         }
4068                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4069                                 total_csum_bytes +=
4070                                         btrfs_item_size_nr(buf, i);
4071                                 continue;
4072                         }
4073                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4074                                 process_chunk_item(chunk_cache, &key, buf, i);
4075                                 continue;
4076                         }
4077                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4078                                 process_device_item(dev_cache, &key, buf, i);
4079                                 continue;
4080                         }
4081                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4082                                 process_block_group_item(block_group_cache,
4083                                         &key, buf, i);
4084                                 continue;
4085                         }
4086                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4087                                 process_device_extent_item(dev_extent_cache,
4088                                         &key, buf, i);
4089                                 continue;
4090
4091                         }
4092                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4093 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4094                                 process_extent_ref_v0(extent_cache, buf, i);
4095 #else
4096                                 BUG();
4097 #endif
4098                                 continue;
4099                         }
4100
4101                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4102                                 add_tree_backref(extent_cache, key.objectid, 0,
4103                                                  key.offset, 0);
4104                                 continue;
4105                         }
4106                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4107                                 add_tree_backref(extent_cache, key.objectid,
4108                                                  key.offset, 0, 0);
4109                                 continue;
4110                         }
4111                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4112                                 struct btrfs_extent_data_ref *ref;
4113                                 ref = btrfs_item_ptr(buf, i,
4114                                                 struct btrfs_extent_data_ref);
4115                                 add_data_backref(extent_cache,
4116                                         key.objectid, 0,
4117                                         btrfs_extent_data_ref_root(buf, ref),
4118                                         btrfs_extent_data_ref_objectid(buf,
4119                                                                        ref),
4120                                         btrfs_extent_data_ref_offset(buf, ref),
4121                                         btrfs_extent_data_ref_count(buf, ref),
4122                                         0, root->sectorsize);
4123                                 continue;
4124                         }
4125                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4126                                 struct btrfs_shared_data_ref *ref;
4127                                 ref = btrfs_item_ptr(buf, i,
4128                                                 struct btrfs_shared_data_ref);
4129                                 add_data_backref(extent_cache,
4130                                         key.objectid, key.offset, 0, 0, 0,
4131                                         btrfs_shared_data_ref_count(buf, ref),
4132                                         0, root->sectorsize);
4133                                 continue;
4134                         }
4135                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4136                                 struct bad_item *bad;
4137
4138                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4139                                         continue;
4140                                 if (!owner)
4141                                         continue;
4142                                 bad = malloc(sizeof(struct bad_item));
4143                                 if (!bad)
4144                                         continue;
4145                                 INIT_LIST_HEAD(&bad->list);
4146                                 memcpy(&bad->key, &key,
4147                                        sizeof(struct btrfs_key));
4148                                 bad->root_id = owner;
4149                                 list_add_tail(&bad->list, &delete_items);
4150                                 continue;
4151                         }
4152                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4153                                 continue;
4154                         fi = btrfs_item_ptr(buf, i,
4155                                             struct btrfs_file_extent_item);
4156                         if (btrfs_file_extent_type(buf, fi) ==
4157                             BTRFS_FILE_EXTENT_INLINE)
4158                                 continue;
4159                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4160                                 continue;
4161
4162                         data_bytes_allocated +=
4163                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4164                         if (data_bytes_allocated < root->sectorsize) {
4165                                 abort();
4166                         }
4167                         data_bytes_referenced +=
4168                                 btrfs_file_extent_num_bytes(buf, fi);
4169                         add_data_backref(extent_cache,
4170                                 btrfs_file_extent_disk_bytenr(buf, fi),
4171                                 parent, owner, key.objectid, key.offset -
4172                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4173                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4174                 }
4175         } else {
4176                 int level;
4177                 struct btrfs_key first_key;
4178
4179                 first_key.objectid = 0;
4180
4181                 if (nritems > 0)
4182                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4183                 level = btrfs_header_level(buf);
4184                 for (i = 0; i < nritems; i++) {
4185                         ptr = btrfs_node_blockptr(buf, i);
4186                         size = btrfs_level_size(root, level - 1);
4187                         btrfs_node_key_to_cpu(buf, &key, i);
4188                         if (ri != NULL) {
4189                                 struct btrfs_key drop_key;
4190                                 btrfs_disk_key_to_cpu(&drop_key,
4191                                                       &ri->drop_progress);
4192                                 if ((level == ri->drop_level)
4193                                     && is_dropped_key(&key, &drop_key)) {
4194                                         continue;
4195                                 }
4196                         }
4197                         ret = add_extent_rec(extent_cache, &key,
4198                                              btrfs_node_ptr_generation(buf, i),
4199                                              ptr, size, 0, 0, 1, 0, 1, 0,
4200                                              size);
4201                         BUG_ON(ret);
4202
4203                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4204
4205                         if (level > 1) {
4206                                 add_pending(nodes, seen, ptr, size);
4207                         } else {
4208                                 add_pending(pending, seen, ptr, size);
4209                         }
4210                 }
4211                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4212                                       nritems) * sizeof(struct btrfs_key_ptr);
4213         }
4214         total_btree_bytes += buf->len;
4215         if (fs_root_objectid(btrfs_header_owner(buf)))
4216                 total_fs_tree_bytes += buf->len;
4217         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4218                 total_extent_tree_bytes += buf->len;
4219         if (!found_old_backref &&
4220             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4221             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4222             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4223                 found_old_backref = 1;
4224 out:
4225         free_extent_buffer(buf);
4226         return ret;
4227 }
4228
4229 static int add_root_to_pending(struct extent_buffer *buf,
4230                                struct cache_tree *extent_cache,
4231                                struct cache_tree *pending,
4232                                struct cache_tree *seen,
4233                                struct cache_tree *nodes,
4234                                struct btrfs_key *root_key)
4235 {
4236         if (btrfs_header_level(buf) > 0)
4237                 add_pending(nodes, seen, buf->start, buf->len);
4238         else
4239                 add_pending(pending, seen, buf->start, buf->len);
4240         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4241                        0, 1, 1, 0, 1, 0, buf->len);
4242
4243         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4244             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4245                 add_tree_backref(extent_cache, buf->start, buf->start,
4246                                  0, 1);
4247         else
4248                 add_tree_backref(extent_cache, buf->start, 0,
4249                                  root_key->objectid, 1);
4250         return 0;
4251 }
4252
4253 /* as we fix the tree, we might be deleting blocks that
4254  * we're tracking for repair.  This hook makes sure we
4255  * remove any backrefs for blocks as we are fixing them.
4256  */
4257 static int free_extent_hook(struct btrfs_trans_handle *trans,
4258                             struct btrfs_root *root,
4259                             u64 bytenr, u64 num_bytes, u64 parent,
4260                             u64 root_objectid, u64 owner, u64 offset,
4261                             int refs_to_drop)
4262 {
4263         struct extent_record *rec;
4264         struct cache_extent *cache;
4265         int is_data;
4266         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4267
4268         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4269         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4270         if (!cache)
4271                 return 0;
4272
4273         rec = container_of(cache, struct extent_record, cache);
4274         if (is_data) {
4275                 struct data_backref *back;
4276                 back = find_data_backref(rec, parent, root_objectid, owner,
4277                                          offset, 1, bytenr, num_bytes);
4278                 if (!back)
4279                         goto out;
4280                 if (back->node.found_ref) {
4281                         back->found_ref -= refs_to_drop;
4282                         if (rec->refs)
4283                                 rec->refs -= refs_to_drop;
4284                 }
4285                 if (back->node.found_extent_tree) {
4286                         back->num_refs -= refs_to_drop;
4287                         if (rec->extent_item_refs)
4288                                 rec->extent_item_refs -= refs_to_drop;
4289                 }
4290                 if (back->found_ref == 0)
4291                         back->node.found_ref = 0;
4292                 if (back->num_refs == 0)
4293                         back->node.found_extent_tree = 0;
4294
4295                 if (!back->node.found_extent_tree && back->node.found_ref) {
4296                         list_del(&back->node.list);
4297                         free(back);
4298                 }
4299         } else {
4300                 struct tree_backref *back;
4301                 back = find_tree_backref(rec, parent, root_objectid);
4302                 if (!back)
4303                         goto out;
4304                 if (back->node.found_ref) {
4305                         if (rec->refs)
4306                                 rec->refs--;
4307                         back->node.found_ref = 0;
4308                 }
4309                 if (back->node.found_extent_tree) {
4310                         if (rec->extent_item_refs)
4311                                 rec->extent_item_refs--;
4312                         back->node.found_extent_tree = 0;
4313                 }
4314                 if (!back->node.found_extent_tree && back->node.found_ref) {
4315                         list_del(&back->node.list);
4316                         free(back);
4317                 }
4318         }
4319         maybe_free_extent_rec(extent_cache, rec);
4320 out:
4321         return 0;
4322 }
4323
4324 static int delete_extent_records(struct btrfs_trans_handle *trans,
4325                                  struct btrfs_root *root,
4326                                  struct btrfs_path *path,
4327                                  u64 bytenr, u64 new_len)
4328 {
4329         struct btrfs_key key;
4330         struct btrfs_key found_key;
4331         struct extent_buffer *leaf;
4332         int ret;
4333         int slot;
4334
4335
4336         key.objectid = bytenr;
4337         key.type = (u8)-1;
4338         key.offset = (u64)-1;
4339
4340         while(1) {
4341                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4342                                         &key, path, 0, 1);
4343                 if (ret < 0)
4344                         break;
4345
4346                 if (ret > 0) {
4347                         ret = 0;
4348                         if (path->slots[0] == 0)
4349                                 break;
4350                         path->slots[0]--;
4351                 }
4352                 ret = 0;
4353
4354                 leaf = path->nodes[0];
4355                 slot = path->slots[0];
4356
4357                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4358                 if (found_key.objectid != bytenr)
4359                         break;
4360
4361                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4362                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4363                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4364                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4365                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4366                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4367                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4368                         btrfs_release_path(path);
4369                         if (found_key.type == 0) {
4370                                 if (found_key.offset == 0)
4371                                         break;
4372                                 key.offset = found_key.offset - 1;
4373                                 key.type = found_key.type;
4374                         }
4375                         key.type = found_key.type - 1;
4376                         key.offset = (u64)-1;
4377                         continue;
4378                 }
4379
4380                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4381                         found_key.objectid, found_key.type, found_key.offset);
4382
4383                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4384                 if (ret)
4385                         break;
4386                 btrfs_release_path(path);
4387
4388                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4389                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4390                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4391                                 found_key.offset : root->leafsize;
4392
4393                         ret = btrfs_update_block_group(trans, root, bytenr,
4394                                                        bytes, 0, 0);
4395                         if (ret)
4396                                 break;
4397                 }
4398         }
4399
4400         btrfs_release_path(path);
4401         return ret;
4402 }
4403
4404 /*
4405  * for a single backref, this will allocate a new extent
4406  * and add the backref to it.
4407  */
4408 static int record_extent(struct btrfs_trans_handle *trans,
4409                          struct btrfs_fs_info *info,
4410                          struct btrfs_path *path,
4411                          struct extent_record *rec,
4412                          struct extent_backref *back,
4413                          int allocated, u64 flags)
4414 {
4415         int ret;
4416         struct btrfs_root *extent_root = info->extent_root;
4417         struct extent_buffer *leaf;
4418         struct btrfs_key ins_key;
4419         struct btrfs_extent_item *ei;
4420         struct tree_backref *tback;
4421         struct data_backref *dback;
4422         struct btrfs_tree_block_info *bi;
4423
4424         if (!back->is_data)
4425                 rec->max_size = max_t(u64, rec->max_size,
4426                                     info->extent_root->leafsize);
4427
4428         if (!allocated) {
4429                 u32 item_size = sizeof(*ei);
4430
4431                 if (!back->is_data)
4432                         item_size += sizeof(*bi);
4433
4434                 ins_key.objectid = rec->start;
4435                 ins_key.offset = rec->max_size;
4436                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4437
4438                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4439                                         &ins_key, item_size);
4440                 if (ret)
4441                         goto fail;
4442
4443                 leaf = path->nodes[0];
4444                 ei = btrfs_item_ptr(leaf, path->slots[0],
4445                                     struct btrfs_extent_item);
4446
4447                 btrfs_set_extent_refs(leaf, ei, 0);
4448                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4449
4450                 if (back->is_data) {
4451                         btrfs_set_extent_flags(leaf, ei,
4452                                                BTRFS_EXTENT_FLAG_DATA);
4453                 } else {
4454                         struct btrfs_disk_key copy_key;;
4455
4456                         tback = (struct tree_backref *)back;
4457                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4458                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4459                                              sizeof(*bi));
4460
4461                         btrfs_set_disk_key_objectid(&copy_key,
4462                                                     rec->info_objectid);
4463                         btrfs_set_disk_key_type(&copy_key, 0);
4464                         btrfs_set_disk_key_offset(&copy_key, 0);
4465
4466                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4467                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4468
4469                         btrfs_set_extent_flags(leaf, ei,
4470                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4471                 }
4472
4473                 btrfs_mark_buffer_dirty(leaf);
4474                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4475                                                rec->max_size, 1, 0);
4476                 if (ret)
4477                         goto fail;
4478                 btrfs_release_path(path);
4479         }
4480
4481         if (back->is_data) {
4482                 u64 parent;
4483                 int i;
4484
4485                 dback = (struct data_backref *)back;
4486                 if (back->full_backref)
4487                         parent = dback->parent;
4488                 else
4489                         parent = 0;
4490
4491                 for (i = 0; i < dback->found_ref; i++) {
4492                         /* if parent != 0, we're doing a full backref
4493                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4494                          * just makes the backref allocator create a data
4495                          * backref
4496                          */
4497                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4498                                                    rec->start, rec->max_size,
4499                                                    parent,
4500                                                    dback->root,
4501                                                    parent ?
4502                                                    BTRFS_FIRST_FREE_OBJECTID :
4503                                                    dback->owner,
4504                                                    dback->offset);
4505                         if (ret)
4506                                 break;
4507                 }
4508                 fprintf(stderr, "adding new data backref"
4509                                 " on %llu %s %llu owner %llu"
4510                                 " offset %llu found %d\n",
4511                                 (unsigned long long)rec->start,
4512                                 back->full_backref ?
4513                                 "parent" : "root",
4514                                 back->full_backref ?
4515                                 (unsigned long long)parent :
4516                                 (unsigned long long)dback->root,
4517                                 (unsigned long long)dback->owner,
4518                                 (unsigned long long)dback->offset,
4519                                 dback->found_ref);
4520         } else {
4521                 u64 parent;
4522
4523                 tback = (struct tree_backref *)back;
4524                 if (back->full_backref)
4525                         parent = tback->parent;
4526                 else
4527                         parent = 0;
4528
4529                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4530                                            rec->start, rec->max_size,
4531                                            parent, tback->root, 0, 0);
4532                 fprintf(stderr, "adding new tree backref on "
4533                         "start %llu len %llu parent %llu root %llu\n",
4534                         rec->start, rec->max_size, tback->parent, tback->root);
4535         }
4536         if (ret)
4537                 goto fail;
4538 fail:
4539         btrfs_release_path(path);
4540         return ret;
4541 }
4542
4543 struct extent_entry {
4544         u64 bytenr;
4545         u64 bytes;
4546         int count;
4547         int broken;
4548         struct list_head list;
4549 };
4550
4551 static struct extent_entry *find_entry(struct list_head *entries,
4552                                        u64 bytenr, u64 bytes)
4553 {
4554         struct extent_entry *entry = NULL;
4555
4556         list_for_each_entry(entry, entries, list) {
4557                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4558                         return entry;
4559         }
4560
4561         return NULL;
4562 }
4563
4564 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4565 {
4566         struct extent_entry *entry, *best = NULL, *prev = NULL;
4567
4568         list_for_each_entry(entry, entries, list) {
4569                 if (!prev) {
4570                         prev = entry;
4571                         continue;
4572                 }
4573
4574                 /*
4575                  * If there are as many broken entries as entries then we know
4576                  * not to trust this particular entry.
4577                  */
4578                 if (entry->broken == entry->count)
4579                         continue;
4580
4581                 /*
4582                  * If our current entry == best then we can't be sure our best
4583                  * is really the best, so we need to keep searching.
4584                  */
4585                 if (best && best->count == entry->count) {
4586                         prev = entry;
4587                         best = NULL;
4588                         continue;
4589                 }
4590
4591                 /* Prev == entry, not good enough, have to keep searching */
4592                 if (!prev->broken && prev->count == entry->count)
4593                         continue;
4594
4595                 if (!best)
4596                         best = (prev->count > entry->count) ? prev : entry;
4597                 else if (best->count < entry->count)
4598                         best = entry;
4599                 prev = entry;
4600         }
4601
4602         return best;
4603 }
4604
4605 static int repair_ref(struct btrfs_trans_handle *trans,
4606                       struct btrfs_fs_info *info, struct btrfs_path *path,
4607                       struct data_backref *dback, struct extent_entry *entry)
4608 {
4609         struct btrfs_root *root;
4610         struct btrfs_file_extent_item *fi;
4611         struct extent_buffer *leaf;
4612         struct btrfs_key key;
4613         u64 bytenr, bytes;
4614         int ret;
4615
4616         key.objectid = dback->root;
4617         key.type = BTRFS_ROOT_ITEM_KEY;
4618         key.offset = (u64)-1;
4619         root = btrfs_read_fs_root(info, &key);
4620         if (IS_ERR(root)) {
4621                 fprintf(stderr, "Couldn't find root for our ref\n");
4622                 return -EINVAL;
4623         }
4624
4625         /*
4626          * The backref points to the original offset of the extent if it was
4627          * split, so we need to search down to the offset we have and then walk
4628          * forward until we find the backref we're looking for.
4629          */
4630         key.objectid = dback->owner;
4631         key.type = BTRFS_EXTENT_DATA_KEY;
4632         key.offset = dback->offset;
4633         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4634         if (ret < 0) {
4635                 fprintf(stderr, "Error looking up ref %d\n", ret);
4636                 return ret;
4637         }
4638
4639         while (1) {
4640                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4641                         ret = btrfs_next_leaf(root, path);
4642                         if (ret) {
4643                                 fprintf(stderr, "Couldn't find our ref, next\n");
4644                                 return -EINVAL;
4645                         }
4646                 }
4647                 leaf = path->nodes[0];
4648                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4649                 if (key.objectid != dback->owner ||
4650                     key.type != BTRFS_EXTENT_DATA_KEY) {
4651                         fprintf(stderr, "Couldn't find our ref, search\n");
4652                         return -EINVAL;
4653                 }
4654                 fi = btrfs_item_ptr(leaf, path->slots[0],
4655                                     struct btrfs_file_extent_item);
4656                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4657                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4658
4659                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4660                         break;
4661                 path->slots[0]++;
4662         }
4663
4664         btrfs_release_path(path);
4665
4666         /*
4667          * Have to make sure that this root gets updated when we commit the
4668          * transaction
4669          */
4670         root->track_dirty = 1;
4671         if (root->last_trans != trans->transid) {
4672                 root->last_trans = trans->transid;
4673                 root->commit_root = root->node;
4674                 extent_buffer_get(root->node);
4675         }
4676
4677         /*
4678          * Ok we have the key of the file extent we want to fix, now we can cow
4679          * down to the thing and fix it.
4680          */
4681         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4682         if (ret < 0) {
4683                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4684                         key.objectid, key.type, key.offset, ret);
4685                 return ret;
4686         }
4687         if (ret > 0) {
4688                 fprintf(stderr, "Well that's odd, we just found this key "
4689                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4690                         key.offset);
4691                 return -EINVAL;
4692         }
4693         leaf = path->nodes[0];
4694         fi = btrfs_item_ptr(leaf, path->slots[0],
4695                             struct btrfs_file_extent_item);
4696
4697         if (btrfs_file_extent_compression(leaf, fi) &&
4698             dback->disk_bytenr != entry->bytenr) {
4699                 fprintf(stderr, "Ref doesn't match the record start and is "
4700                         "compressed, please take a btrfs-image of this file "
4701                         "system and send it to a btrfs developer so they can "
4702                         "complete this functionality for bytenr %Lu\n",
4703                         dback->disk_bytenr);
4704                 return -EINVAL;
4705         }
4706
4707         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4708                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4709         } else if (dback->disk_bytenr > entry->bytenr) {
4710                 u64 off_diff, offset;
4711
4712                 off_diff = dback->disk_bytenr - entry->bytenr;
4713                 offset = btrfs_file_extent_offset(leaf, fi);
4714                 if (dback->disk_bytenr + offset +
4715                     btrfs_file_extent_num_bytes(leaf, fi) >
4716                     entry->bytenr + entry->bytes) {
4717                         fprintf(stderr, "Ref is past the entry end, please "
4718                                 "take a btrfs-image of this file system and "
4719                                 "send it to a btrfs developer, ref %Lu\n",
4720                                 dback->disk_bytenr);
4721                         return -EINVAL;
4722                 }
4723                 offset += off_diff;
4724                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4725                 btrfs_set_file_extent_offset(leaf, fi, offset);
4726         } else if (dback->disk_bytenr < entry->bytenr) {
4727                 u64 offset;
4728
4729                 offset = btrfs_file_extent_offset(leaf, fi);
4730                 if (dback->disk_bytenr + offset < entry->bytenr) {
4731                         fprintf(stderr, "Ref is before the entry start, please"
4732                                 " take a btrfs-image of this file system and "
4733                                 "send it to a btrfs developer, ref %Lu\n",
4734                                 dback->disk_bytenr);
4735                         return -EINVAL;
4736                 }
4737
4738                 offset += dback->disk_bytenr;
4739                 offset -= entry->bytenr;
4740                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4741                 btrfs_set_file_extent_offset(leaf, fi, offset);
4742         }
4743
4744         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4745
4746         /*
4747          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4748          * only do this if we aren't using compression, otherwise it's a
4749          * trickier case.
4750          */
4751         if (!btrfs_file_extent_compression(leaf, fi))
4752                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4753         else
4754                 printf("ram bytes may be wrong?\n");
4755         btrfs_mark_buffer_dirty(leaf);
4756         btrfs_release_path(path);
4757         return 0;
4758 }
4759
4760 static int verify_backrefs(struct btrfs_trans_handle *trans,
4761                            struct btrfs_fs_info *info, struct btrfs_path *path,
4762                            struct extent_record *rec)
4763 {
4764         struct extent_backref *back;
4765         struct data_backref *dback;
4766         struct extent_entry *entry, *best = NULL;
4767         LIST_HEAD(entries);
4768         int nr_entries = 0;
4769         int broken_entries = 0;
4770         int ret = 0;
4771         short mismatch = 0;
4772
4773         /*
4774          * Metadata is easy and the backrefs should always agree on bytenr and
4775          * size, if not we've got bigger issues.
4776          */
4777         if (rec->metadata)
4778                 return 0;
4779
4780         list_for_each_entry(back, &rec->backrefs, list) {
4781                 dback = (struct data_backref *)back;
4782                 /*
4783                  * We only pay attention to backrefs that we found a real
4784                  * backref for.
4785                  */
4786                 if (dback->found_ref == 0)
4787                         continue;
4788                 if (back->full_backref)
4789                         continue;
4790
4791                 /*
4792                  * For now we only catch when the bytes don't match, not the
4793                  * bytenr.  We can easily do this at the same time, but I want
4794                  * to have a fs image to test on before we just add repair
4795                  * functionality willy-nilly so we know we won't screw up the
4796                  * repair.
4797                  */
4798
4799                 entry = find_entry(&entries, dback->disk_bytenr,
4800                                    dback->bytes);
4801                 if (!entry) {
4802                         entry = malloc(sizeof(struct extent_entry));
4803                         if (!entry) {
4804                                 ret = -ENOMEM;
4805                                 goto out;
4806                         }
4807                         memset(entry, 0, sizeof(*entry));
4808                         entry->bytenr = dback->disk_bytenr;
4809                         entry->bytes = dback->bytes;
4810                         list_add_tail(&entry->list, &entries);
4811                         nr_entries++;
4812                 }
4813
4814                 /*
4815                  * If we only have on entry we may think the entries agree when
4816                  * in reality they don't so we have to do some extra checking.
4817                  */
4818                 if (dback->disk_bytenr != rec->start ||
4819                     dback->bytes != rec->nr || back->broken)
4820                         mismatch = 1;
4821
4822                 if (back->broken) {
4823                         entry->broken++;
4824                         broken_entries++;
4825                 }
4826
4827                 entry->count++;
4828         }
4829
4830         /* Yay all the backrefs agree, carry on good sir */
4831         if (nr_entries <= 1 && !mismatch)
4832                 goto out;
4833
4834         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4835                 "%Lu\n", rec->start);
4836
4837         /*
4838          * First we want to see if the backrefs can agree amongst themselves who
4839          * is right, so figure out which one of the entries has the highest
4840          * count.
4841          */
4842         best = find_most_right_entry(&entries);
4843
4844         /*
4845          * Ok so we may have an even split between what the backrefs think, so
4846          * this is where we use the extent ref to see what it thinks.
4847          */
4848         if (!best) {
4849                 entry = find_entry(&entries, rec->start, rec->nr);
4850                 if (!entry && (!broken_entries || !rec->found_rec)) {
4851                         fprintf(stderr, "Backrefs don't agree with each other "
4852                                 "and extent record doesn't agree with anybody,"
4853                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4854                                 rec->start, rec->nr);
4855                         ret = -EINVAL;
4856                         goto out;
4857                 } else if (!entry) {
4858                         /*
4859                          * Ok our backrefs were broken, we'll assume this is the
4860                          * correct value and add an entry for this range.
4861                          */
4862                         entry = malloc(sizeof(struct extent_entry));
4863                         if (!entry) {
4864                                 ret = -ENOMEM;
4865                                 goto out;
4866                         }
4867                         memset(entry, 0, sizeof(*entry));
4868                         entry->bytenr = rec->start;
4869                         entry->bytes = rec->nr;
4870                         list_add_tail(&entry->list, &entries);
4871                         nr_entries++;
4872                 }
4873                 entry->count++;
4874                 best = find_most_right_entry(&entries);
4875                 if (!best) {
4876                         fprintf(stderr, "Backrefs and extent record evenly "
4877                                 "split on who is right, this is going to "
4878                                 "require user input to fix bytenr %Lu bytes "
4879                                 "%Lu\n", rec->start, rec->nr);
4880                         ret = -EINVAL;
4881                         goto out;
4882                 }
4883         }
4884
4885         /*
4886          * I don't think this can happen currently as we'll abort() if we catch
4887          * this case higher up, but in case somebody removes that we still can't
4888          * deal with it properly here yet, so just bail out of that's the case.
4889          */
4890         if (best->bytenr != rec->start) {
4891                 fprintf(stderr, "Extent start and backref starts don't match, "
4892                         "please use btrfs-image on this file system and send "
4893                         "it to a btrfs developer so they can make fsck fix "
4894                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
4895                         rec->start, rec->nr);
4896                 ret = -EINVAL;
4897                 goto out;
4898         }
4899
4900         /*
4901          * Ok great we all agreed on an extent record, let's go find the real
4902          * references and fix up the ones that don't match.
4903          */
4904         list_for_each_entry(back, &rec->backrefs, list) {
4905                 dback = (struct data_backref *)back;
4906
4907                 /*
4908                  * Still ignoring backrefs that don't have a real ref attached
4909                  * to them.
4910                  */
4911                 if (dback->found_ref == 0)
4912                         continue;
4913                 if (back->full_backref)
4914                         continue;
4915
4916                 if (dback->bytes == best->bytes &&
4917                     dback->disk_bytenr == best->bytenr)
4918                         continue;
4919
4920                 ret = repair_ref(trans, info, path, dback, best);
4921                 if (ret)
4922                         goto out;
4923         }
4924
4925         /*
4926          * Ok we messed with the actual refs, which means we need to drop our
4927          * entire cache and go back and rescan.  I know this is a huge pain and
4928          * adds a lot of extra work, but it's the only way to be safe.  Once all
4929          * the backrefs agree we may not need to do anything to the extent
4930          * record itself.
4931          */
4932         ret = -EAGAIN;
4933 out:
4934         while (!list_empty(&entries)) {
4935                 entry = list_entry(entries.next, struct extent_entry, list);
4936                 list_del_init(&entry->list);
4937                 free(entry);
4938         }
4939         return ret;
4940 }
4941
4942 static int process_duplicates(struct btrfs_root *root,
4943                               struct cache_tree *extent_cache,
4944                               struct extent_record *rec)
4945 {
4946         struct extent_record *good, *tmp;
4947         struct cache_extent *cache;
4948         int ret;
4949
4950         /*
4951          * If we found a extent record for this extent then return, or if we
4952          * have more than one duplicate we are likely going to need to delete
4953          * something.
4954          */
4955         if (rec->found_rec || rec->num_duplicates > 1)
4956                 return 0;
4957
4958         /* Shouldn't happen but just in case */
4959         BUG_ON(!rec->num_duplicates);
4960
4961         /*
4962          * So this happens if we end up with a backref that doesn't match the
4963          * actual extent entry.  So either the backref is bad or the extent
4964          * entry is bad.  Either way we want to have the extent_record actually
4965          * reflect what we found in the extent_tree, so we need to take the
4966          * duplicate out and use that as the extent_record since the only way we
4967          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4968          */
4969         remove_cache_extent(extent_cache, &rec->cache);
4970
4971         good = list_entry(rec->dups.next, struct extent_record, list);
4972         list_del_init(&good->list);
4973         INIT_LIST_HEAD(&good->backrefs);
4974         INIT_LIST_HEAD(&good->dups);
4975         good->cache.start = good->start;
4976         good->cache.size = good->nr;
4977         good->content_checked = 0;
4978         good->owner_ref_checked = 0;
4979         good->num_duplicates = 0;
4980         good->refs = rec->refs;
4981         list_splice_init(&rec->backrefs, &good->backrefs);
4982         while (1) {
4983                 cache = lookup_cache_extent(extent_cache, good->start,
4984                                             good->nr);
4985                 if (!cache)
4986                         break;
4987                 tmp = container_of(cache, struct extent_record, cache);
4988
4989                 /*
4990                  * If we find another overlapping extent and it's found_rec is
4991                  * set then it's a duplicate and we need to try and delete
4992                  * something.
4993                  */
4994                 if (tmp->found_rec || tmp->num_duplicates > 0) {
4995                         if (list_empty(&good->list))
4996                                 list_add_tail(&good->list,
4997                                               &duplicate_extents);
4998                         good->num_duplicates += tmp->num_duplicates + 1;
4999                         list_splice_init(&tmp->dups, &good->dups);
5000                         list_del_init(&tmp->list);
5001                         list_add_tail(&tmp->list, &good->dups);
5002                         remove_cache_extent(extent_cache, &tmp->cache);
5003                         continue;
5004                 }
5005
5006                 /*
5007                  * Ok we have another non extent item backed extent rec, so lets
5008                  * just add it to this extent and carry on like we did above.
5009                  */
5010                 good->refs += tmp->refs;
5011                 list_splice_init(&tmp->backrefs, &good->backrefs);
5012                 remove_cache_extent(extent_cache, &tmp->cache);
5013                 free(tmp);
5014         }
5015         ret = insert_cache_extent(extent_cache, &good->cache);
5016         BUG_ON(ret);
5017         free(rec);
5018         return good->num_duplicates ? 0 : 1;
5019 }
5020
5021 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5022                                     struct btrfs_root *root,
5023                                     struct extent_record *rec)
5024 {
5025         LIST_HEAD(delete_list);
5026         struct btrfs_path *path;
5027         struct extent_record *tmp, *good, *n;
5028         int nr_del = 0;
5029         int ret = 0;
5030         struct btrfs_key key;
5031
5032         path = btrfs_alloc_path();
5033         if (!path) {
5034                 ret = -ENOMEM;
5035                 goto out;
5036         }
5037
5038         good = rec;
5039         /* Find the record that covers all of the duplicates. */
5040         list_for_each_entry(tmp, &rec->dups, list) {
5041                 if (good->start < tmp->start)
5042                         continue;
5043                 if (good->nr > tmp->nr)
5044                         continue;
5045
5046                 if (tmp->start + tmp->nr < good->start + good->nr) {
5047                         fprintf(stderr, "Ok we have overlapping extents that "
5048                                 "aren't completely covered by eachother, this "
5049                                 "is going to require more careful thought.  "
5050                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5051                                 tmp->start, tmp->nr, good->start, good->nr);
5052                         abort();
5053                 }
5054                 good = tmp;
5055         }
5056
5057         if (good != rec)
5058                 list_add_tail(&rec->list, &delete_list);
5059
5060         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5061                 if (tmp == good)
5062                         continue;
5063                 list_move_tail(&tmp->list, &delete_list);
5064         }
5065
5066         root = root->fs_info->extent_root;
5067         list_for_each_entry(tmp, &delete_list, list) {
5068                 if (tmp->found_rec == 0)
5069                         continue;
5070                 key.objectid = tmp->start;
5071                 key.type = BTRFS_EXTENT_ITEM_KEY;
5072                 key.offset = tmp->nr;
5073
5074                 /* Shouldn't happen but just in case */
5075                 if (tmp->metadata) {
5076                         fprintf(stderr, "Well this shouldn't happen, extent "
5077                                 "record overlaps but is metadata? "
5078                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5079                         abort();
5080                 }
5081
5082                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5083                 if (ret) {
5084                         if (ret > 0)
5085                                 ret = -EINVAL;
5086                         goto out;
5087                 }
5088                 ret = btrfs_del_item(trans, root, path);
5089                 if (ret)
5090                         goto out;
5091                 btrfs_release_path(path);
5092                 nr_del++;
5093         }
5094
5095 out:
5096         while (!list_empty(&delete_list)) {
5097                 tmp = list_entry(delete_list.next, struct extent_record, list);
5098                 list_del_init(&tmp->list);
5099                 if (tmp == rec)
5100                         continue;
5101                 free(tmp);
5102         }
5103
5104         while (!list_empty(&rec->dups)) {
5105                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5106                 list_del_init(&tmp->list);
5107                 free(tmp);
5108         }
5109
5110         btrfs_free_path(path);
5111
5112         if (!ret && !nr_del)
5113                 rec->num_duplicates = 0;
5114
5115         return ret ? ret : nr_del;
5116 }
5117
5118 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5119                                   struct btrfs_fs_info *info,
5120                                   struct btrfs_path *path,
5121                                   struct cache_tree *extent_cache,
5122                                   struct extent_record *rec)
5123 {
5124         struct btrfs_root *root;
5125         struct extent_backref *back;
5126         struct data_backref *dback;
5127         struct cache_extent *cache;
5128         struct btrfs_file_extent_item *fi;
5129         struct btrfs_key key;
5130         u64 bytenr, bytes;
5131         int ret;
5132
5133         list_for_each_entry(back, &rec->backrefs, list) {
5134                 dback = (struct data_backref *)back;
5135
5136                 /* We found this one, we don't need to do a lookup */
5137                 if (dback->found_ref)
5138                         continue;
5139                 /* Don't care about full backrefs (poor unloved backrefs) */
5140                 if (back->full_backref)
5141                         continue;
5142                 key.objectid = dback->root;
5143                 key.type = BTRFS_ROOT_ITEM_KEY;
5144                 key.offset = (u64)-1;
5145
5146                 root = btrfs_read_fs_root(info, &key);
5147
5148                 /* No root, definitely a bad ref, skip */
5149                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5150                         continue;
5151                 /* Other err, exit */
5152                 if (IS_ERR(root))
5153                         return PTR_ERR(root);
5154
5155                 key.objectid = dback->owner;
5156                 key.type = BTRFS_EXTENT_DATA_KEY;
5157                 key.offset = dback->offset;
5158                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5159                 if (ret) {
5160                         btrfs_release_path(path);
5161                         if (ret < 0)
5162                                 return ret;
5163                         /* Didn't find it, we can carry on */
5164                         ret = 0;
5165                         continue;
5166                 }
5167
5168                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5169                                     struct btrfs_file_extent_item);
5170                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5171                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5172                 btrfs_release_path(path);
5173                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5174                 if (cache) {
5175                         struct extent_record *tmp;
5176                         tmp = container_of(cache, struct extent_record, cache);
5177
5178                         /*
5179                          * If we found an extent record for the bytenr for this
5180                          * particular backref then we can't add it to our
5181                          * current extent record.  We only want to add backrefs
5182                          * that don't have a corresponding extent item in the
5183                          * extent tree since they likely belong to this record
5184                          * and we need to fix it if it doesn't match bytenrs.
5185                          */
5186                         if  (tmp->found_rec)
5187                                 continue;
5188                 }
5189
5190                 dback->found_ref += 1;
5191                 dback->disk_bytenr = bytenr;
5192                 dback->bytes = bytes;
5193
5194                 /*
5195                  * Set this so the verify backref code knows not to trust the
5196                  * values in this backref.
5197                  */
5198                 back->broken = 1;
5199         }
5200
5201         return 0;
5202 }
5203
5204 /*
5205  * when an incorrect extent item is found, this will delete
5206  * all of the existing entries for it and recreate them
5207  * based on what the tree scan found.
5208  */
5209 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5210                              struct btrfs_fs_info *info,
5211                              struct cache_tree *extent_cache,
5212                              struct extent_record *rec)
5213 {
5214         int ret;
5215         struct btrfs_path *path;
5216         struct list_head *cur = rec->backrefs.next;
5217         struct cache_extent *cache;
5218         struct extent_backref *back;
5219         int allocated = 0;
5220         u64 flags = 0;
5221
5222         /*
5223          * remember our flags for recreating the extent.
5224          * FIXME, if we have cleared extent tree, we can not
5225          * lookup extent info in extent tree.
5226          */
5227         if (!init_extent_tree) {
5228                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5229                                         rec->start, rec->max_size,
5230                                         rec->metadata, NULL, &flags);
5231                 if (ret < 0)
5232                         flags = 0;
5233         } else {
5234                 flags = 0;
5235         }
5236
5237         path = btrfs_alloc_path();
5238         if (!path)
5239                 return -ENOMEM;
5240
5241         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5242                 /*
5243                  * Sometimes the backrefs themselves are so broken they don't
5244                  * get attached to any meaningful rec, so first go back and
5245                  * check any of our backrefs that we couldn't find and throw
5246                  * them into the list if we find the backref so that
5247                  * verify_backrefs can figure out what to do.
5248                  */
5249                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5250                                              rec);
5251                 if (ret < 0)
5252                         goto out;
5253         }
5254
5255         /* step one, make sure all of the backrefs agree */
5256         ret = verify_backrefs(trans, info, path, rec);
5257         if (ret < 0)
5258                 goto out;
5259
5260         /* step two, delete all the existing records */
5261         ret = delete_extent_records(trans, info->extent_root, path,
5262                                     rec->start, rec->max_size);
5263
5264         if (ret < 0)
5265                 goto out;
5266
5267         /* was this block corrupt?  If so, don't add references to it */
5268         cache = lookup_cache_extent(info->corrupt_blocks,
5269                                     rec->start, rec->max_size);
5270         if (cache) {
5271                 ret = 0;
5272                 goto out;
5273         }
5274
5275         /* step three, recreate all the refs we did find */
5276         while(cur != &rec->backrefs) {
5277                 back = list_entry(cur, struct extent_backref, list);
5278                 cur = cur->next;
5279
5280                 /*
5281                  * if we didn't find any references, don't create a
5282                  * new extent record
5283                  */
5284                 if (!back->found_ref)
5285                         continue;
5286
5287                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5288                 allocated = 1;
5289
5290                 if (ret)
5291                         goto out;
5292         }
5293 out:
5294         btrfs_free_path(path);
5295         return ret;
5296 }
5297
5298 /* right now we only prune from the extent allocation tree */
5299 static int prune_one_block(struct btrfs_trans_handle *trans,
5300                            struct btrfs_fs_info *info,
5301                            struct btrfs_corrupt_block *corrupt)
5302 {
5303         int ret;
5304         struct btrfs_path path;
5305         struct extent_buffer *eb;
5306         u64 found;
5307         int slot;
5308         int nritems;
5309         int level = corrupt->level + 1;
5310
5311         btrfs_init_path(&path);
5312 again:
5313         /* we want to stop at the parent to our busted block */
5314         path.lowest_level = level;
5315
5316         ret = btrfs_search_slot(trans, info->extent_root,
5317                                 &corrupt->key, &path, -1, 1);
5318
5319         if (ret < 0)
5320                 goto out;
5321
5322         eb = path.nodes[level];
5323         if (!eb) {
5324                 ret = -ENOENT;
5325                 goto out;
5326         }
5327
5328         /*
5329          * hopefully the search gave us the block we want to prune,
5330          * lets try that first
5331          */
5332         slot = path.slots[level];
5333         found =  btrfs_node_blockptr(eb, slot);
5334         if (found == corrupt->cache.start)
5335                 goto del_ptr;
5336
5337         nritems = btrfs_header_nritems(eb);
5338
5339         /* the search failed, lets scan this node and hope we find it */
5340         for (slot = 0; slot < nritems; slot++) {
5341                 found =  btrfs_node_blockptr(eb, slot);
5342                 if (found == corrupt->cache.start)
5343                         goto del_ptr;
5344         }
5345         /*
5346          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5347          * to this block
5348          */
5349         if (eb == info->extent_root->node) {
5350                 ret = -ENOENT;
5351                 goto out;
5352         } else {
5353                 level++;
5354                 btrfs_release_path(&path);
5355                 goto again;
5356         }
5357
5358 del_ptr:
5359         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5360         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5361
5362 out:
5363         btrfs_release_path(&path);
5364         return ret;
5365 }
5366
5367 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5368                                 struct btrfs_fs_info *info)
5369 {
5370         struct cache_extent *cache;
5371         struct btrfs_corrupt_block *corrupt;
5372
5373         cache = search_cache_extent(info->corrupt_blocks, 0);
5374         while (1) {
5375                 if (!cache)
5376                         break;
5377                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5378                 prune_one_block(trans, info, corrupt);
5379                 cache = next_cache_extent(cache);
5380         }
5381         return 0;
5382 }
5383
5384 static void free_corrupt_block(struct cache_extent *cache)
5385 {
5386         struct btrfs_corrupt_block *corrupt;
5387
5388         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5389         free(corrupt);
5390 }
5391
5392 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5393
5394 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5395 {
5396         struct btrfs_block_group_cache *cache;
5397         u64 start, end;
5398         int ret;
5399
5400         while (1) {
5401                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5402                                             &start, &end, EXTENT_DIRTY);
5403                 if (ret)
5404                         break;
5405                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5406                                    GFP_NOFS);
5407         }
5408
5409         start = 0;
5410         while (1) {
5411                 cache = btrfs_lookup_first_block_group(fs_info, start);
5412                 if (!cache)
5413                         break;
5414                 if (cache->cached)
5415                         cache->cached = 0;
5416                 start = cache->key.objectid + cache->key.offset;
5417         }
5418 }
5419
5420 static int check_extent_refs(struct btrfs_trans_handle *trans,
5421                              struct btrfs_root *root,
5422                              struct cache_tree *extent_cache)
5423 {
5424         struct extent_record *rec;
5425         struct cache_extent *cache;
5426         int err = 0;
5427         int ret = 0;
5428         int fixed = 0;
5429         int had_dups = 0;
5430
5431         if (repair) {
5432                 /*
5433                  * if we're doing a repair, we have to make sure
5434                  * we don't allocate from the problem extents.
5435                  * In the worst case, this will be all the
5436                  * extents in the FS
5437                  */
5438                 cache = search_cache_extent(extent_cache, 0);
5439                 while(cache) {
5440                         rec = container_of(cache, struct extent_record, cache);
5441                         btrfs_pin_extent(root->fs_info,
5442                                          rec->start, rec->max_size);
5443                         cache = next_cache_extent(cache);
5444                 }
5445
5446                 /* pin down all the corrupted blocks too */
5447                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5448                 while(cache) {
5449                         btrfs_pin_extent(root->fs_info,
5450                                          cache->start, cache->size);
5451                         cache = next_cache_extent(cache);
5452                 }
5453                 prune_corrupt_blocks(trans, root->fs_info);
5454                 reset_cached_block_groups(root->fs_info);
5455         }
5456
5457         /*
5458          * We need to delete any duplicate entries we find first otherwise we
5459          * could mess up the extent tree when we have backrefs that actually
5460          * belong to a different extent item and not the weird duplicate one.
5461          */
5462         while (repair && !list_empty(&duplicate_extents)) {
5463                 rec = list_entry(duplicate_extents.next, struct extent_record,
5464                                  list);
5465                 list_del_init(&rec->list);
5466
5467                 /* Sometimes we can find a backref before we find an actual
5468                  * extent, so we need to process it a little bit to see if there
5469                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5470                  * if this is a backref screwup.  If we need to delete stuff
5471                  * process_duplicates() will return 0, otherwise it will return
5472                  * 1 and we
5473                  */
5474                 if (process_duplicates(root, extent_cache, rec))
5475                         continue;
5476                 ret = delete_duplicate_records(trans, root, rec);
5477                 if (ret < 0)
5478                         return ret;
5479                 /*
5480                  * delete_duplicate_records will return the number of entries
5481                  * deleted, so if it's greater than 0 then we know we actually
5482                  * did something and we need to remove.
5483                  */
5484                 if (ret)
5485                         had_dups = 1;
5486         }
5487
5488         if (had_dups)
5489                 return -EAGAIN;
5490
5491         while(1) {
5492                 fixed = 0;
5493                 cache = search_cache_extent(extent_cache, 0);
5494                 if (!cache)
5495                         break;
5496                 rec = container_of(cache, struct extent_record, cache);
5497                 if (rec->num_duplicates) {
5498                         fprintf(stderr, "extent item %llu has multiple extent "
5499                                 "items\n", (unsigned long long)rec->start);
5500                         err = 1;
5501                 }
5502
5503                 if (rec->refs != rec->extent_item_refs) {
5504                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5505                                 (unsigned long long)rec->start,
5506                                 (unsigned long long)rec->nr);
5507                         fprintf(stderr, "extent item %llu, found %llu\n",
5508                                 (unsigned long long)rec->extent_item_refs,
5509                                 (unsigned long long)rec->refs);
5510                         if (!fixed && repair) {
5511                                 ret = fixup_extent_refs(trans, root->fs_info,
5512                                                         extent_cache, rec);
5513                                 if (ret)
5514                                         goto repair_abort;
5515                                 fixed = 1;
5516                         }
5517                         err = 1;
5518
5519                 }
5520                 if (all_backpointers_checked(rec, 1)) {
5521                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5522                                 (unsigned long long)rec->start,
5523                                 (unsigned long long)rec->nr);
5524
5525                         if (!fixed && repair) {
5526                                 ret = fixup_extent_refs(trans, root->fs_info,
5527                                                         extent_cache, rec);
5528                                 if (ret)
5529                                         goto repair_abort;
5530                                 fixed = 1;
5531                         }
5532
5533                         err = 1;
5534                 }
5535                 if (!rec->owner_ref_checked) {
5536                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5537                                 (unsigned long long)rec->start,
5538                                 (unsigned long long)rec->nr);
5539                         if (!fixed && repair) {
5540                                 ret = fixup_extent_refs(trans, root->fs_info,
5541                                                         extent_cache, rec);
5542                                 if (ret)
5543                                         goto repair_abort;
5544                                 fixed = 1;
5545                         }
5546                         err = 1;
5547                 }
5548
5549                 remove_cache_extent(extent_cache, cache);
5550                 free_all_extent_backrefs(rec);
5551                 free(rec);
5552         }
5553 repair_abort:
5554         if (repair) {
5555                 if (ret && ret != -EAGAIN) {
5556                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5557                         exit(1);
5558                 } else if (!ret) {
5559                         btrfs_fix_block_accounting(trans, root);
5560                 }
5561                 if (err)
5562                         fprintf(stderr, "repaired damaged extent references\n");
5563                 return ret;
5564         }
5565         return err;
5566 }
5567
5568 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5569 {
5570         u64 stripe_size;
5571
5572         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5573                 stripe_size = length;
5574                 stripe_size /= num_stripes;
5575         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5576                 stripe_size = length * 2;
5577                 stripe_size /= num_stripes;
5578         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5579                 stripe_size = length;
5580                 stripe_size /= (num_stripes - 1);
5581         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5582                 stripe_size = length;
5583                 stripe_size /= (num_stripes - 2);
5584         } else {
5585                 stripe_size = length;
5586         }
5587         return stripe_size;
5588 }
5589
5590 static int check_chunk_refs(struct chunk_record *chunk_rec,
5591                             struct block_group_tree *block_group_cache,
5592                             struct device_extent_tree *dev_extent_cache,
5593                             int silent)
5594 {
5595         struct cache_extent *block_group_item;
5596         struct block_group_record *block_group_rec;
5597         struct cache_extent *dev_extent_item;
5598         struct device_extent_record *dev_extent_rec;
5599         u64 devid;
5600         u64 offset;
5601         u64 length;
5602         int i;
5603         int ret = 0;
5604
5605         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5606                                                chunk_rec->offset,
5607                                                chunk_rec->length);
5608         if (block_group_item) {
5609                 block_group_rec = container_of(block_group_item,
5610                                                struct block_group_record,
5611                                                cache);
5612                 if (chunk_rec->length != block_group_rec->offset ||
5613                     chunk_rec->offset != block_group_rec->objectid ||
5614                     chunk_rec->type_flags != block_group_rec->flags) {
5615                         if (!silent)
5616                                 fprintf(stderr,
5617                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5618                                         chunk_rec->objectid,
5619                                         chunk_rec->type,
5620                                         chunk_rec->offset,
5621                                         chunk_rec->length,
5622                                         chunk_rec->offset,
5623                                         chunk_rec->type_flags,
5624                                         block_group_rec->objectid,
5625                                         block_group_rec->type,
5626                                         block_group_rec->offset,
5627                                         block_group_rec->offset,
5628                                         block_group_rec->objectid,
5629                                         block_group_rec->flags);
5630                         ret = -1;
5631                 } else {
5632                         list_del_init(&block_group_rec->list);
5633                         chunk_rec->bg_rec = block_group_rec;
5634                 }
5635         } else {
5636                 if (!silent)
5637                         fprintf(stderr,
5638                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5639                                 chunk_rec->objectid,
5640                                 chunk_rec->type,
5641                                 chunk_rec->offset,
5642                                 chunk_rec->length,
5643                                 chunk_rec->offset,
5644                                 chunk_rec->type_flags);
5645                 ret = -1;
5646         }
5647
5648         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5649                                     chunk_rec->num_stripes);
5650         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5651                 devid = chunk_rec->stripes[i].devid;
5652                 offset = chunk_rec->stripes[i].offset;
5653                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5654                                                        devid, offset, length);
5655                 if (dev_extent_item) {
5656                         dev_extent_rec = container_of(dev_extent_item,
5657                                                 struct device_extent_record,
5658                                                 cache);
5659                         if (dev_extent_rec->objectid != devid ||
5660                             dev_extent_rec->offset != offset ||
5661                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5662                             dev_extent_rec->length != length) {
5663                                 if (!silent)
5664                                         fprintf(stderr,
5665                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5666                                                 chunk_rec->objectid,
5667                                                 chunk_rec->type,
5668                                                 chunk_rec->offset,
5669                                                 chunk_rec->stripes[i].devid,
5670                                                 chunk_rec->stripes[i].offset,
5671                                                 dev_extent_rec->objectid,
5672                                                 dev_extent_rec->offset,
5673                                                 dev_extent_rec->length);
5674                                 ret = -1;
5675                         } else {
5676                                 list_move(&dev_extent_rec->chunk_list,
5677                                           &chunk_rec->dextents);
5678                         }
5679                 } else {
5680                         if (!silent)
5681                                 fprintf(stderr,
5682                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5683                                         chunk_rec->objectid,
5684                                         chunk_rec->type,
5685                                         chunk_rec->offset,
5686                                         chunk_rec->stripes[i].devid,
5687                                         chunk_rec->stripes[i].offset);
5688                         ret = -1;
5689                 }
5690         }
5691         return ret;
5692 }
5693
5694 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5695 int check_chunks(struct cache_tree *chunk_cache,
5696                  struct block_group_tree *block_group_cache,
5697                  struct device_extent_tree *dev_extent_cache,
5698                  struct list_head *good, struct list_head *bad, int silent)
5699 {
5700         struct cache_extent *chunk_item;
5701         struct chunk_record *chunk_rec;
5702         struct block_group_record *bg_rec;
5703         struct device_extent_record *dext_rec;
5704         int err;
5705         int ret = 0;
5706
5707         chunk_item = first_cache_extent(chunk_cache);
5708         while (chunk_item) {
5709                 chunk_rec = container_of(chunk_item, struct chunk_record,
5710                                          cache);
5711                 err = check_chunk_refs(chunk_rec, block_group_cache,
5712                                        dev_extent_cache, silent);
5713                 if (err) {
5714                         ret = err;
5715                         if (bad)
5716                                 list_add_tail(&chunk_rec->list, bad);
5717                 } else {
5718                         if (good)
5719                                 list_add_tail(&chunk_rec->list, good);
5720                 }
5721
5722                 chunk_item = next_cache_extent(chunk_item);
5723         }
5724
5725         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5726                 if (!silent)
5727                         fprintf(stderr,
5728                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5729                                 bg_rec->objectid,
5730                                 bg_rec->offset,
5731                                 bg_rec->flags);
5732                 if (!ret)
5733                         ret = 1;
5734         }
5735
5736         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5737                             chunk_list) {
5738                 if (!silent)
5739                         fprintf(stderr,
5740                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5741                                 dext_rec->objectid,
5742                                 dext_rec->offset,
5743                                 dext_rec->length);
5744                 if (!ret)
5745                         ret = 1;
5746         }
5747         return ret;
5748 }
5749
5750
5751 static int check_device_used(struct device_record *dev_rec,
5752                              struct device_extent_tree *dext_cache)
5753 {
5754         struct cache_extent *cache;
5755         struct device_extent_record *dev_extent_rec;
5756         u64 total_byte = 0;
5757
5758         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5759         while (cache) {
5760                 dev_extent_rec = container_of(cache,
5761                                               struct device_extent_record,
5762                                               cache);
5763                 if (dev_extent_rec->objectid != dev_rec->devid)
5764                         break;
5765
5766                 list_del(&dev_extent_rec->device_list);
5767                 total_byte += dev_extent_rec->length;
5768                 cache = next_cache_extent(cache);
5769         }
5770
5771         if (total_byte != dev_rec->byte_used) {
5772                 fprintf(stderr,
5773                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5774                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5775                         dev_rec->type, dev_rec->offset);
5776                 return -1;
5777         } else {
5778                 return 0;
5779         }
5780 }
5781
5782 /* check btrfs_dev_item -> btrfs_dev_extent */
5783 static int check_devices(struct rb_root *dev_cache,
5784                          struct device_extent_tree *dev_extent_cache)
5785 {
5786         struct rb_node *dev_node;
5787         struct device_record *dev_rec;
5788         struct device_extent_record *dext_rec;
5789         int err;
5790         int ret = 0;
5791
5792         dev_node = rb_first(dev_cache);
5793         while (dev_node) {
5794                 dev_rec = container_of(dev_node, struct device_record, node);
5795                 err = check_device_used(dev_rec, dev_extent_cache);
5796                 if (err)
5797                         ret = err;
5798
5799                 dev_node = rb_next(dev_node);
5800         }
5801         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5802                             device_list) {
5803                 fprintf(stderr,
5804                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5805                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5806                 if (!ret)
5807                         ret = 1;
5808         }
5809         return ret;
5810 }
5811
5812 static int check_chunks_and_extents(struct btrfs_root *root)
5813 {
5814         struct rb_root dev_cache;
5815         struct cache_tree chunk_cache;
5816         struct block_group_tree block_group_cache;
5817         struct device_extent_tree dev_extent_cache;
5818         struct cache_tree extent_cache;
5819         struct cache_tree seen;
5820         struct cache_tree pending;
5821         struct cache_tree reada;
5822         struct cache_tree nodes;
5823         struct cache_tree corrupt_blocks;
5824         struct btrfs_path path;
5825         struct btrfs_key key;
5826         struct btrfs_key found_key;
5827         int ret, err = 0;
5828         u64 last = 0;
5829         struct block_info *bits;
5830         int bits_nr;
5831         struct extent_buffer *leaf;
5832         struct btrfs_trans_handle *trans = NULL;
5833         int slot;
5834         struct btrfs_root_item ri;
5835         struct list_head dropping_trees;
5836
5837         dev_cache = RB_ROOT;
5838         cache_tree_init(&chunk_cache);
5839         block_group_tree_init(&block_group_cache);
5840         device_extent_tree_init(&dev_extent_cache);
5841
5842         cache_tree_init(&extent_cache);
5843         cache_tree_init(&seen);
5844         cache_tree_init(&pending);
5845         cache_tree_init(&nodes);
5846         cache_tree_init(&reada);
5847         cache_tree_init(&corrupt_blocks);
5848         INIT_LIST_HEAD(&dropping_trees);
5849
5850         if (repair) {
5851                 trans = btrfs_start_transaction(root, 1);
5852                 if (IS_ERR(trans)) {
5853                         fprintf(stderr, "Error starting transaction\n");
5854                         return PTR_ERR(trans);
5855                 }
5856                 root->fs_info->fsck_extent_cache = &extent_cache;
5857                 root->fs_info->free_extent_hook = free_extent_hook;
5858                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5859         }
5860
5861         bits_nr = 1024;
5862         bits = malloc(bits_nr * sizeof(struct block_info));
5863         if (!bits) {
5864                 perror("malloc");
5865                 exit(1);
5866         }
5867
5868 again:
5869         add_root_to_pending(root->fs_info->tree_root->node,
5870                             &extent_cache, &pending, &seen, &nodes,
5871                             &root->fs_info->tree_root->root_key);
5872
5873         add_root_to_pending(root->fs_info->chunk_root->node,
5874                             &extent_cache, &pending, &seen, &nodes,
5875                             &root->fs_info->chunk_root->root_key);
5876
5877         btrfs_init_path(&path);
5878         key.offset = 0;
5879         key.objectid = 0;
5880         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
5881         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
5882                                         &key, &path, 0, 0);
5883         BUG_ON(ret < 0);
5884         while(1) {
5885                 leaf = path.nodes[0];
5886                 slot = path.slots[0];
5887                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
5888                         ret = btrfs_next_leaf(root, &path);
5889                         if (ret != 0)
5890                                 break;
5891                         leaf = path.nodes[0];
5892                         slot = path.slots[0];
5893                 }
5894                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
5895                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
5896                         unsigned long offset;
5897                         struct extent_buffer *buf;
5898
5899                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5900                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
5901                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
5902                                 buf = read_tree_block(root->fs_info->tree_root,
5903                                                       btrfs_root_bytenr(&ri),
5904                                                       btrfs_level_size(root,
5905                                                       btrfs_root_level(&ri)),
5906                                                       0);
5907                                 if (!buf) {
5908                                         ret = -EIO;
5909                                         goto out;
5910                                 }
5911                                 add_root_to_pending(buf, &extent_cache,
5912                                                     &pending, &seen, &nodes,
5913                                                     &found_key);
5914                                 free_extent_buffer(buf);
5915                         } else {
5916                                 struct dropping_root_item_record *dri_rec;
5917                                 dri_rec = malloc(sizeof(*dri_rec));
5918                                 if (!dri_rec) {
5919                                         perror("malloc");
5920                                         exit(1);
5921                                 }
5922                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
5923                                 memcpy(&dri_rec->found_key, &found_key,
5924                                        sizeof(found_key));
5925                                 list_add_tail(&dri_rec->list, &dropping_trees);
5926                         }
5927                 }
5928                 path.slots[0]++;
5929         }
5930         btrfs_release_path(&path);
5931         while (1) {
5932                 ret = run_next_block(trans, root, bits, bits_nr, &last,
5933                                      &pending, &seen, &reada, &nodes,
5934                                      &extent_cache, &chunk_cache, &dev_cache,
5935                                      &block_group_cache, &dev_extent_cache,
5936                                      NULL);
5937                 if (ret != 0)
5938                         break;
5939         }
5940
5941         while (!list_empty(&dropping_trees)) {
5942                 struct dropping_root_item_record *rec;
5943                 struct extent_buffer *buf;
5944                 rec = list_entry(dropping_trees.next,
5945                                  struct dropping_root_item_record, list);
5946                 last = 0;
5947                 if (!bits) {
5948                         perror("realloc");
5949                         exit(1);
5950                 }
5951                 buf = read_tree_block(root->fs_info->tree_root,
5952                                       btrfs_root_bytenr(&rec->ri),
5953                                       btrfs_level_size(root,
5954                                       btrfs_root_level(&rec->ri)), 0);
5955                 if (!buf) {
5956                         ret = -EIO;
5957                         goto out;
5958                 }
5959                 add_root_to_pending(buf, &extent_cache, &pending,
5960                                     &seen, &nodes, &rec->found_key);
5961                 while (1) {
5962                         ret = run_next_block(trans, root, bits, bits_nr, &last,
5963                                              &pending, &seen, &reada,
5964                                              &nodes, &extent_cache,
5965                                              &chunk_cache, &dev_cache,
5966                                              &block_group_cache,
5967                                              &dev_extent_cache,
5968                                              &rec->ri);
5969                         if (ret != 0)
5970                                 break;
5971                 }
5972                 free_extent_buffer(buf);
5973                 list_del(&rec->list);
5974                 free(rec);
5975         }
5976
5977         if (ret >= 0)
5978                 ret = check_extent_refs(trans, root, &extent_cache);
5979         if (ret == -EAGAIN) {
5980                 ret = btrfs_commit_transaction(trans, root);
5981                 if (ret)
5982                         goto out;
5983
5984                 trans = btrfs_start_transaction(root, 1);
5985                 if (IS_ERR(trans)) {
5986                         ret = PTR_ERR(trans);
5987                         goto out;
5988                 }
5989
5990                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5991                 free_extent_cache_tree(&seen);
5992                 free_extent_cache_tree(&pending);
5993                 free_extent_cache_tree(&reada);
5994                 free_extent_cache_tree(&nodes);
5995                 free_extent_record_cache(root->fs_info, &extent_cache);
5996                 goto again;
5997         }
5998
5999         err = check_chunks(&chunk_cache, &block_group_cache,
6000                            &dev_extent_cache, NULL, NULL, 0);
6001         if (err && !ret)
6002                 ret = err;
6003
6004         err = check_devices(&dev_cache, &dev_extent_cache);
6005         if (err && !ret)
6006                 ret = err;
6007
6008         if (trans) {
6009                 err = btrfs_commit_transaction(trans, root);
6010                 if (!ret)
6011                         ret = err;
6012         }
6013 out:
6014         if (repair) {
6015                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6016                 root->fs_info->fsck_extent_cache = NULL;
6017                 root->fs_info->free_extent_hook = NULL;
6018                 root->fs_info->corrupt_blocks = NULL;
6019         }
6020         free(bits);
6021         free_chunk_cache_tree(&chunk_cache);
6022         free_device_cache_tree(&dev_cache);
6023         free_block_group_tree(&block_group_cache);
6024         free_device_extent_tree(&dev_extent_cache);
6025         free_extent_cache_tree(&seen);
6026         free_extent_cache_tree(&pending);
6027         free_extent_cache_tree(&reada);
6028         free_extent_cache_tree(&nodes);
6029         return ret;
6030 }
6031
6032 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6033                            struct btrfs_root *root, int overwrite)
6034 {
6035         struct extent_buffer *c;
6036         struct extent_buffer *old = root->node;
6037         int level;
6038         int ret;
6039         struct btrfs_disk_key disk_key = {0,0,0};
6040
6041         level = 0;
6042
6043         if (overwrite) {
6044                 c = old;
6045                 extent_buffer_get(c);
6046                 goto init;
6047         }
6048         c = btrfs_alloc_free_block(trans, root,
6049                                    btrfs_level_size(root, 0),
6050                                    root->root_key.objectid,
6051                                    &disk_key, level, 0, 0);
6052         if (IS_ERR(c)) {
6053                 c = old;
6054                 extent_buffer_get(c);
6055                 overwrite = 1;
6056         }
6057 init:
6058         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6059         btrfs_set_header_level(c, level);
6060         btrfs_set_header_bytenr(c, c->start);
6061         btrfs_set_header_generation(c, trans->transid);
6062         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6063         btrfs_set_header_owner(c, root->root_key.objectid);
6064
6065         write_extent_buffer(c, root->fs_info->fsid,
6066                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6067
6068         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6069                             btrfs_header_chunk_tree_uuid(c),
6070                             BTRFS_UUID_SIZE);
6071
6072         btrfs_mark_buffer_dirty(c);
6073         /*
6074          * this case can happen in the following case:
6075          *
6076          * 1.overwrite previous root.
6077          *
6078          * 2.reinit reloc data root, this is because we skip pin
6079          * down reloc data tree before which means we can allocate
6080          * same block bytenr here.
6081          */
6082         if (old->start == c->start) {
6083                 btrfs_set_root_generation(&root->root_item,
6084                                           trans->transid);
6085                 root->root_item.level = btrfs_header_level(root->node);
6086                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6087                                         &root->root_key, &root->root_item);
6088                 if (ret) {
6089                         free_extent_buffer(c);
6090                         return ret;
6091                 }
6092         }
6093         free_extent_buffer(old);
6094         root->node = c;
6095         add_root_to_dirty_list(root);
6096         return 0;
6097 }
6098
6099 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6100                                 struct extent_buffer *eb, int tree_root)
6101 {
6102         struct extent_buffer *tmp;
6103         struct btrfs_root_item *ri;
6104         struct btrfs_key key;
6105         u64 bytenr;
6106         u32 leafsize;
6107         int level = btrfs_header_level(eb);
6108         int nritems;
6109         int ret;
6110         int i;
6111
6112         btrfs_pin_extent(fs_info, eb->start, eb->len);
6113
6114         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6115         nritems = btrfs_header_nritems(eb);
6116         for (i = 0; i < nritems; i++) {
6117                 if (level == 0) {
6118                         btrfs_item_key_to_cpu(eb, &key, i);
6119                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6120                                 continue;
6121                         /* Skip the extent root and reloc roots */
6122                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6123                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6124                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6125                                 continue;
6126                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6127                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6128
6129                         /*
6130                          * If at any point we start needing the real root we
6131                          * will have to build a stump root for the root we are
6132                          * in, but for now this doesn't actually use the root so
6133                          * just pass in extent_root.
6134                          */
6135                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6136                                               leafsize, 0);
6137                         if (!tmp) {
6138                                 fprintf(stderr, "Error reading root block\n");
6139                                 return -EIO;
6140                         }
6141                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6142                         free_extent_buffer(tmp);
6143                         if (ret)
6144                                 return ret;
6145                 } else {
6146                         bytenr = btrfs_node_blockptr(eb, i);
6147
6148                         /* If we aren't the tree root don't read the block */
6149                         if (level == 1 && !tree_root) {
6150                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6151                                 continue;
6152                         }
6153
6154                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6155                                               leafsize, 0);
6156                         if (!tmp) {
6157                                 fprintf(stderr, "Error reading tree block\n");
6158                                 return -EIO;
6159                         }
6160                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6161                         free_extent_buffer(tmp);
6162                         if (ret)
6163                                 return ret;
6164                 }
6165         }
6166
6167         return 0;
6168 }
6169
6170 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6171 {
6172         int ret;
6173
6174         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6175         if (ret)
6176                 return ret;
6177
6178         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6179 }
6180
6181 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6182 {
6183         struct btrfs_block_group_cache *cache;
6184         struct btrfs_path *path;
6185         struct extent_buffer *leaf;
6186         struct btrfs_chunk *chunk;
6187         struct btrfs_key key;
6188         int ret;
6189         u64 start;
6190
6191         path = btrfs_alloc_path();
6192         if (!path)
6193                 return -ENOMEM;
6194
6195         key.objectid = 0;
6196         key.type = BTRFS_CHUNK_ITEM_KEY;
6197         key.offset = 0;
6198
6199         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6200         if (ret < 0) {
6201                 btrfs_free_path(path);
6202                 return ret;
6203         }
6204
6205         /*
6206          * We do this in case the block groups were screwed up and had alloc
6207          * bits that aren't actually set on the chunks.  This happens with
6208          * restored images every time and could happen in real life I guess.
6209          */
6210         fs_info->avail_data_alloc_bits = 0;
6211         fs_info->avail_metadata_alloc_bits = 0;
6212         fs_info->avail_system_alloc_bits = 0;
6213
6214         /* First we need to create the in-memory block groups */
6215         while (1) {
6216                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6217                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6218                         if (ret < 0) {
6219                                 btrfs_free_path(path);
6220                                 return ret;
6221                         }
6222                         if (ret) {
6223                                 ret = 0;
6224                                 break;
6225                         }
6226                 }
6227                 leaf = path->nodes[0];
6228                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6229                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6230                         path->slots[0]++;
6231                         continue;
6232                 }
6233
6234                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6235                                        struct btrfs_chunk);
6236                 btrfs_add_block_group(fs_info, 0,
6237                                       btrfs_chunk_type(leaf, chunk),
6238                                       key.objectid, key.offset,
6239                                       btrfs_chunk_length(leaf, chunk));
6240                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6241                                  key.offset + btrfs_chunk_length(leaf, chunk),
6242                                  GFP_NOFS);
6243                 path->slots[0]++;
6244         }
6245         start = 0;
6246         while (1) {
6247                 cache = btrfs_lookup_first_block_group(fs_info, start);
6248                 if (!cache)
6249                         break;
6250                 cache->cached = 1;
6251                 start = cache->key.objectid + cache->key.offset;
6252         }
6253
6254         btrfs_free_path(path);
6255         return 0;
6256 }
6257
6258 static int reset_balance(struct btrfs_trans_handle *trans,
6259                          struct btrfs_fs_info *fs_info)
6260 {
6261         struct btrfs_root *root = fs_info->tree_root;
6262         struct btrfs_path *path;
6263         struct extent_buffer *leaf;
6264         struct btrfs_key key;
6265         int del_slot, del_nr = 0;
6266         int ret;
6267         int found = 0;
6268
6269         path = btrfs_alloc_path();
6270         if (!path)
6271                 return -ENOMEM;
6272
6273         key.objectid = BTRFS_BALANCE_OBJECTID;
6274         key.type = BTRFS_BALANCE_ITEM_KEY;
6275         key.offset = 0;
6276
6277         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6278         if (ret) {
6279                 if (ret > 0)
6280                         ret = 0;
6281                 if (!ret)
6282                         goto reinit_data_reloc;
6283                 else
6284                         goto out;
6285         }
6286
6287         ret = btrfs_del_item(trans, root, path);
6288         if (ret)
6289                 goto out;
6290         btrfs_release_path(path);
6291
6292         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6293         key.type = BTRFS_ROOT_ITEM_KEY;
6294         key.offset = 0;
6295
6296         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6297         if (ret < 0)
6298                 goto out;
6299         while (1) {
6300                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6301                         if (!found)
6302                                 break;
6303
6304                         if (del_nr) {
6305                                 ret = btrfs_del_items(trans, root, path,
6306                                                       del_slot, del_nr);
6307                                 del_nr = 0;
6308                                 if (ret)
6309                                         goto out;
6310                         }
6311                         key.offset++;
6312                         btrfs_release_path(path);
6313
6314                         found = 0;
6315                         ret = btrfs_search_slot(trans, root, &key, path,
6316                                                 -1, 1);
6317                         if (ret < 0)
6318                                 goto out;
6319                         continue;
6320                 }
6321                 found = 1;
6322                 leaf = path->nodes[0];
6323                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6324                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6325                         break;
6326                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6327                         path->slots[0]++;
6328                         continue;
6329                 }
6330                 if (!del_nr) {
6331                         del_slot = path->slots[0];
6332                         del_nr = 1;
6333                 } else {
6334                         del_nr++;
6335                 }
6336                 path->slots[0]++;
6337         }
6338
6339         if (del_nr) {
6340                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6341                 if (ret)
6342                         goto out;
6343         }
6344         btrfs_release_path(path);
6345
6346 reinit_data_reloc:
6347         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6348         key.type = BTRFS_ROOT_ITEM_KEY;
6349         key.offset = (u64)-1;
6350         root = btrfs_read_fs_root(fs_info, &key);
6351         if (IS_ERR(root)) {
6352                 fprintf(stderr, "Error reading data reloc tree\n");
6353                 return PTR_ERR(root);
6354         }
6355         root->track_dirty = 1;
6356         if (root->last_trans != trans->transid) {
6357                 root->last_trans = trans->transid;
6358                 root->commit_root = root->node;
6359                 extent_buffer_get(root->node);
6360         }
6361         ret = btrfs_fsck_reinit_root(trans, root, 0);
6362         if (ret)
6363                 goto out;
6364         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6365 out:
6366         btrfs_free_path(path);
6367         return ret;
6368 }
6369
6370 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6371                               struct btrfs_fs_info *fs_info)
6372 {
6373         u64 start = 0;
6374         int ret;
6375
6376         /*
6377          * The only reason we don't do this is because right now we're just
6378          * walking the trees we find and pinning down their bytes, we don't look
6379          * at any of the leaves.  In order to do mixed groups we'd have to check
6380          * the leaves of any fs roots and pin down the bytes for any file
6381          * extents we find.  Not hard but why do it if we don't have to?
6382          */
6383         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6384                 fprintf(stderr, "We don't support re-initing the extent tree "
6385                         "for mixed block groups yet, please notify a btrfs "
6386                         "developer you want to do this so they can add this "
6387                         "functionality.\n");
6388                 return -EINVAL;
6389         }
6390
6391         /*
6392          * first we need to walk all of the trees except the extent tree and pin
6393          * down the bytes that are in use so we don't overwrite any existing
6394          * metadata.
6395          */
6396         ret = pin_metadata_blocks(fs_info);
6397         if (ret) {
6398                 fprintf(stderr, "error pinning down used bytes\n");
6399                 return ret;
6400         }
6401
6402         /*
6403          * Need to drop all the block groups since we're going to recreate all
6404          * of them again.
6405          */
6406         btrfs_free_block_groups(fs_info);
6407         ret = reset_block_groups(fs_info);
6408         if (ret) {
6409                 fprintf(stderr, "error resetting the block groups\n");
6410                 return ret;
6411         }
6412
6413         /* Ok we can allocate now, reinit the extent root */
6414         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6415         if (ret) {
6416                 fprintf(stderr, "extent root initialization failed\n");
6417                 /*
6418                  * When the transaction code is updated we should end the
6419                  * transaction, but for now progs only knows about commit so
6420                  * just return an error.
6421                  */
6422                 return ret;
6423         }
6424
6425         /*
6426          * Now we have all the in-memory block groups setup so we can make
6427          * allocations properly, and the metadata we care about is safe since we
6428          * pinned all of it above.
6429          */
6430         while (1) {
6431                 struct btrfs_block_group_cache *cache;
6432
6433                 cache = btrfs_lookup_first_block_group(fs_info, start);
6434                 if (!cache)
6435                         break;
6436                 start = cache->key.objectid + cache->key.offset;
6437                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6438                                         &cache->key, &cache->item,
6439                                         sizeof(cache->item));
6440                 if (ret) {
6441                         fprintf(stderr, "Error adding block group\n");
6442                         return ret;
6443                 }
6444                 btrfs_extent_post_op(trans, fs_info->extent_root);
6445         }
6446
6447         ret = reset_balance(trans, fs_info);
6448         if (ret)
6449                 fprintf(stderr, "error reseting the pending balance\n");
6450
6451         return ret;
6452 }
6453
6454 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6455 {
6456         struct btrfs_path *path;
6457         struct btrfs_trans_handle *trans;
6458         struct btrfs_key key;
6459         int ret;
6460
6461         printf("Recowing metadata block %llu\n", eb->start);
6462         key.objectid = btrfs_header_owner(eb);
6463         key.type = BTRFS_ROOT_ITEM_KEY;
6464         key.offset = (u64)-1;
6465
6466         root = btrfs_read_fs_root(root->fs_info, &key);
6467         if (IS_ERR(root)) {
6468                 fprintf(stderr, "Couldn't find owner root %llu\n",
6469                         key.objectid);
6470                 return PTR_ERR(root);
6471         }
6472
6473         path = btrfs_alloc_path();
6474         if (!path)
6475                 return -ENOMEM;
6476
6477         trans = btrfs_start_transaction(root, 1);
6478         if (IS_ERR(trans)) {
6479                 btrfs_free_path(path);
6480                 return PTR_ERR(trans);
6481         }
6482
6483         path->lowest_level = btrfs_header_level(eb);
6484         if (path->lowest_level)
6485                 btrfs_node_key_to_cpu(eb, &key, 0);
6486         else
6487                 btrfs_item_key_to_cpu(eb, &key, 0);
6488
6489         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6490         btrfs_commit_transaction(trans, root);
6491         btrfs_free_path(path);
6492         return ret;
6493 }
6494
6495 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6496 {
6497         struct btrfs_path *path;
6498         struct btrfs_trans_handle *trans;
6499         struct btrfs_key key;
6500         int ret;
6501
6502         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6503                bad->key.type, bad->key.offset);
6504         key.objectid = bad->root_id;
6505         key.type = BTRFS_ROOT_ITEM_KEY;
6506         key.offset = (u64)-1;
6507
6508         root = btrfs_read_fs_root(root->fs_info, &key);
6509         if (IS_ERR(root)) {
6510                 fprintf(stderr, "Couldn't find owner root %llu\n",
6511                         key.objectid);
6512                 return PTR_ERR(root);
6513         }
6514
6515         path = btrfs_alloc_path();
6516         if (!path)
6517                 return -ENOMEM;
6518
6519         trans = btrfs_start_transaction(root, 1);
6520         if (IS_ERR(trans)) {
6521                 btrfs_free_path(path);
6522                 return PTR_ERR(trans);
6523         }
6524
6525         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6526         if (ret) {
6527                 if (ret > 0)
6528                         ret = 0;
6529                 goto out;
6530         }
6531         ret = btrfs_del_item(trans, root, path);
6532 out:
6533         btrfs_commit_transaction(trans, root);
6534         btrfs_free_path(path);
6535         return ret;
6536 }
6537
6538 static struct option long_options[] = {
6539         { "super", 1, NULL, 's' },
6540         { "repair", 0, NULL, 0 },
6541         { "init-csum-tree", 0, NULL, 0 },
6542         { "init-extent-tree", 0, NULL, 0 },
6543         { "check-data-csum", 0, NULL, 0 },
6544         { "backup", 0, NULL, 0 },
6545         { "subvol-extents", no_argument, NULL, 'E' },
6546         { "qgroup-report", 0, NULL, 'Q' },
6547         { NULL, 0, NULL, 0}
6548 };
6549
6550 const char * const cmd_check_usage[] = {
6551         "btrfs check [options] <device>",
6552         "Check an unmounted btrfs filesystem.",
6553         "",
6554         "-s|--super <superblock>     use this superblock copy",
6555         "-b|--backup                 use the backup root copy",
6556         "--repair                    try to repair the filesystem",
6557         "--init-csum-tree            create a new CRC tree",
6558         "--init-extent-tree          create a new extent tree",
6559         "--check-data-csum           verify checkums of data blocks",
6560         "--qgroup-report             print a report on qgroup consistency",
6561         "--subvol-extents            print subvolume extents and sharing state",
6562         NULL
6563 };
6564
6565 int cmd_check(int argc, char **argv)
6566 {
6567         struct cache_tree root_cache;
6568         struct btrfs_root *root;
6569         struct btrfs_fs_info *info;
6570         u64 bytenr = 0;
6571         u64 subvolid = 0;
6572         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6573         int ret;
6574         u64 num;
6575         int option_index = 0;
6576         int init_csum_tree = 0;
6577         int qgroup_report = 0;
6578         enum btrfs_open_ctree_flags ctree_flags =
6579                 OPEN_CTREE_PARTIAL | OPEN_CTREE_EXCLUSIVE;
6580
6581         while(1) {
6582                 int c;
6583                 c = getopt_long(argc, argv, "as:b", long_options,
6584                                 &option_index);
6585                 if (c < 0)
6586                         break;
6587                 switch(c) {
6588                         case 'a': /* ignored */ break;
6589                         case 'b':
6590                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6591                                 break;
6592                         case 's':
6593                                 num = arg_strtou64(optarg);
6594                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
6595                                         fprintf(stderr,
6596                                                 "ERROR: super mirror should be less than: %d\n",
6597                                                 BTRFS_SUPER_MIRROR_MAX);
6598                                         exit(1);
6599                                 }
6600                                 bytenr = btrfs_sb_offset(((int)num));
6601                                 printf("using SB copy %llu, bytenr %llu\n", num,
6602                                        (unsigned long long)bytenr);
6603                                 break;
6604                         case 'Q':
6605                                 qgroup_report = 1;
6606                                 break;
6607                         case 'E':
6608                                 subvolid = arg_strtou64(optarg);
6609                                 break;
6610                         case '?':
6611                         case 'h':
6612                                 usage(cmd_check_usage);
6613                 }
6614                 if (option_index == 1) {
6615                         printf("enabling repair mode\n");
6616                         repair = 1;
6617                         ctree_flags |= OPEN_CTREE_WRITES;
6618                 } else if (option_index == 2) {
6619                         printf("Creating a new CRC tree\n");
6620                         init_csum_tree = 1;
6621                         repair = 1;
6622                         ctree_flags |= OPEN_CTREE_WRITES;
6623                 } else if (option_index == 3) {
6624                         init_extent_tree = 1;
6625                         ctree_flags |= (OPEN_CTREE_WRITES |
6626                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6627                         repair = 1;
6628                 } else if (option_index == 4) {
6629                         check_data_csum = 1;
6630                 }
6631         }
6632         argc = argc - optind;
6633
6634         if (check_argc_exact(argc, 1))
6635                 usage(cmd_check_usage);
6636
6637         radix_tree_init();
6638         cache_tree_init(&root_cache);
6639
6640         if((ret = check_mounted(argv[optind])) < 0) {
6641                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6642                 goto err_out;
6643         } else if(ret) {
6644                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6645                 ret = -EBUSY;
6646                 goto err_out;
6647         }
6648
6649         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6650         if (!info) {
6651                 fprintf(stderr, "Couldn't open file system\n");
6652                 ret = -EIO;
6653                 goto err_out;
6654         }
6655
6656         root = info->fs_root;
6657         uuid_unparse(info->super_copy->fsid, uuidbuf);
6658         if (qgroup_report) {
6659                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
6660                        uuidbuf);
6661                 ret = qgroup_verify_all(info);
6662                 if (ret == 0)
6663                         print_qgroup_report(1);
6664                 goto close_out;
6665         }
6666         if (subvolid) {
6667                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
6668                        subvolid, argv[optind], uuidbuf);
6669                 ret = print_extent_state(info, subvolid);
6670                 goto close_out;
6671         }
6672         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6673
6674         if (!extent_buffer_uptodate(info->tree_root->node) ||
6675             !extent_buffer_uptodate(info->dev_root->node) ||
6676             !extent_buffer_uptodate(info->chunk_root->node)) {
6677                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6678                 ret = -EIO;
6679                 goto close_out;
6680         }
6681
6682         if (init_extent_tree || init_csum_tree) {
6683                 struct btrfs_trans_handle *trans;
6684
6685                 trans = btrfs_start_transaction(info->extent_root, 0);
6686                 if (IS_ERR(trans)) {
6687                         fprintf(stderr, "Error starting transaction\n");
6688                         ret = PTR_ERR(trans);
6689                         goto close_out;
6690                 }
6691
6692                 if (init_extent_tree) {
6693                         printf("Creating a new extent tree\n");
6694                         ret = reinit_extent_tree(trans, info);
6695                         if (ret)
6696                                 goto close_out;
6697                 }
6698
6699                 if (init_csum_tree) {
6700                         fprintf(stderr, "Reinit crc root\n");
6701                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6702                         if (ret) {
6703                                 fprintf(stderr, "crc root initialization failed\n");
6704                                 ret = -EIO;
6705                                 goto close_out;
6706                         }
6707                 }
6708                 /*
6709                  * Ok now we commit and run the normal fsck, which will add
6710                  * extent entries for all of the items it finds.
6711                  */
6712                 ret = btrfs_commit_transaction(trans, info->extent_root);
6713                 if (ret)
6714                         goto close_out;
6715         }
6716         if (!extent_buffer_uptodate(info->extent_root->node)) {
6717                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6718                 ret = -EIO;
6719                 goto close_out;
6720         }
6721
6722         fprintf(stderr, "checking extents\n");
6723         ret = check_chunks_and_extents(root);
6724         if (ret)
6725                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
6726
6727         fprintf(stderr, "checking free space cache\n");
6728         ret = check_space_cache(root);
6729         if (ret)
6730                 goto out;
6731
6732         /*
6733          * We used to have to have these hole extents in between our real
6734          * extents so if we don't have this flag set we need to make sure there
6735          * are no gaps in the file extents for inodes, otherwise we can just
6736          * ignore it when this happens.
6737          */
6738         no_holes = btrfs_fs_incompat(root->fs_info,
6739                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
6740         fprintf(stderr, "checking fs roots\n");
6741         ret = check_fs_roots(root, &root_cache);
6742         if (ret)
6743                 goto out;
6744
6745         fprintf(stderr, "checking csums\n");
6746         ret = check_csums(root);
6747         if (ret)
6748                 goto out;
6749
6750         fprintf(stderr, "checking root refs\n");
6751         ret = check_root_refs(root, &root_cache);
6752         if (ret)
6753                 goto out;
6754
6755         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
6756                 struct extent_buffer *eb;
6757
6758                 eb = list_first_entry(&root->fs_info->recow_ebs,
6759                                       struct extent_buffer, recow);
6760                 ret = recow_extent_buffer(root, eb);
6761                 if (ret)
6762                         break;
6763         }
6764
6765         while (!list_empty(&delete_items)) {
6766                 struct bad_item *bad;
6767
6768                 bad = list_first_entry(&delete_items, struct bad_item, list);
6769                 list_del_init(&bad->list);
6770                 if (repair)
6771                         ret = delete_bad_item(root, bad);
6772                 free(bad);
6773         }
6774
6775         if (info->quota_enabled) {
6776                 int err;
6777                 fprintf(stderr, "checking quota groups\n");
6778                 err = qgroup_verify_all(info);
6779                 if (err)
6780                         goto out;
6781         }
6782
6783         if (!list_empty(&root->fs_info->recow_ebs)) {
6784                 fprintf(stderr, "Transid errors in file system\n");
6785                 ret = 1;
6786         }
6787 out:
6788         print_qgroup_report(0);
6789         if (found_old_backref) { /*
6790                  * there was a disk format change when mixed
6791                  * backref was in testing tree. The old format
6792                  * existed about one week.
6793                  */
6794                 printf("\n * Found old mixed backref format. "
6795                        "The old format is not supported! *"
6796                        "\n * Please mount the FS in readonly mode, "
6797                        "backup data and re-format the FS. *\n\n");
6798                 ret = 1;
6799         }
6800         printf("found %llu bytes used err is %d\n",
6801                (unsigned long long)bytes_used, ret);
6802         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
6803         printf("total tree bytes: %llu\n",
6804                (unsigned long long)total_btree_bytes);
6805         printf("total fs tree bytes: %llu\n",
6806                (unsigned long long)total_fs_tree_bytes);
6807         printf("total extent tree bytes: %llu\n",
6808                (unsigned long long)total_extent_tree_bytes);
6809         printf("btree space waste bytes: %llu\n",
6810                (unsigned long long)btree_space_waste);
6811         printf("file data blocks allocated: %llu\n referenced %llu\n",
6812                 (unsigned long long)data_bytes_allocated,
6813                 (unsigned long long)data_bytes_referenced);
6814         printf("%s\n", BTRFS_BUILD_VERSION);
6815
6816         free_root_recs_tree(&root_cache);
6817 close_out:
6818         close_ctree(root);
6819 err_out:
6820         return ret;
6821 }