btrfs-progs: add root to dirty list when fixing bad keys
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 struct tree_backref {
84         struct extent_backref node;
85         union {
86                 u64 parent;
87                 u64 root;
88         };
89 };
90
91 struct extent_record {
92         struct list_head backrefs;
93         struct list_head dups;
94         struct list_head list;
95         struct cache_extent cache;
96         struct btrfs_disk_key parent_key;
97         u64 start;
98         u64 max_size;
99         u64 nr;
100         u64 refs;
101         u64 extent_item_refs;
102         u64 generation;
103         u64 parent_generation;
104         u64 info_objectid;
105         u32 num_duplicates;
106         u8 info_level;
107         unsigned int found_rec:1;
108         unsigned int content_checked:1;
109         unsigned int owner_ref_checked:1;
110         unsigned int is_root:1;
111         unsigned int metadata:1;
112 };
113
114 struct inode_backref {
115         struct list_head list;
116         unsigned int found_dir_item:1;
117         unsigned int found_dir_index:1;
118         unsigned int found_inode_ref:1;
119         unsigned int filetype:8;
120         int errors;
121         unsigned int ref_type;
122         u64 dir;
123         u64 index;
124         u16 namelen;
125         char name[0];
126 };
127
128 struct dropping_root_item_record {
129         struct list_head list;
130         struct btrfs_root_item ri;
131         struct btrfs_key found_key;
132 };
133
134 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
135 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
136 #define REF_ERR_NO_INODE_REF            (1 << 2)
137 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
138 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
139 #define REF_ERR_DUP_INODE_REF           (1 << 5)
140 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
141 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
142 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
143 #define REF_ERR_NO_ROOT_REF             (1 << 9)
144 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
145 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
146 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
147
148 struct inode_record {
149         struct list_head backrefs;
150         unsigned int checked:1;
151         unsigned int merging:1;
152         unsigned int found_inode_item:1;
153         unsigned int found_dir_item:1;
154         unsigned int found_file_extent:1;
155         unsigned int found_csum_item:1;
156         unsigned int some_csum_missing:1;
157         unsigned int nodatasum:1;
158         int errors;
159
160         u64 ino;
161         u32 nlink;
162         u32 imode;
163         u64 isize;
164         u64 nbytes;
165
166         u32 found_link;
167         u64 found_size;
168         u64 extent_start;
169         u64 extent_end;
170         u64 first_extent_gap;
171
172         u32 refs;
173 };
174
175 #define I_ERR_NO_INODE_ITEM             (1 << 0)
176 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
177 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
178 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
179 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
180 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
181 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
182 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
183 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
184 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
185 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
186 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
187 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
188 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
189
190 struct root_backref {
191         struct list_head list;
192         unsigned int found_dir_item:1;
193         unsigned int found_dir_index:1;
194         unsigned int found_back_ref:1;
195         unsigned int found_forward_ref:1;
196         unsigned int reachable:1;
197         int errors;
198         u64 ref_root;
199         u64 dir;
200         u64 index;
201         u16 namelen;
202         char name[0];
203 };
204
205 struct root_record {
206         struct list_head backrefs;
207         struct cache_extent cache;
208         unsigned int found_root_item:1;
209         u64 objectid;
210         u32 found_ref;
211 };
212
213 struct ptr_node {
214         struct cache_extent cache;
215         void *data;
216 };
217
218 struct shared_node {
219         struct cache_extent cache;
220         struct cache_tree root_cache;
221         struct cache_tree inode_cache;
222         struct inode_record *current;
223         u32 refs;
224 };
225
226 struct block_info {
227         u64 start;
228         u32 size;
229 };
230
231 struct walk_control {
232         struct cache_tree shared;
233         struct shared_node *nodes[BTRFS_MAX_LEVEL];
234         int active_node;
235         int root_level;
236 };
237
238 struct bad_item {
239         struct btrfs_key key;
240         u64 root_id;
241         struct list_head list;
242 };
243
244 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
245
246 static void record_root_in_trans(struct btrfs_trans_handle *trans,
247                                  struct btrfs_root *root)
248 {
249         if (root->last_trans != trans->transid) {
250                 root->track_dirty = 1;
251                 root->last_trans = trans->transid;
252                 root->commit_root = root->node;
253                 extent_buffer_get(root->node);
254         }
255 }
256
257 static u8 imode_to_type(u32 imode)
258 {
259 #define S_SHIFT 12
260         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
261                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
262                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
263                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
264                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
265                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
266                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
267                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
268         };
269
270         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
271 #undef S_SHIFT
272 }
273
274 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
275 {
276         struct device_record *rec1;
277         struct device_record *rec2;
278
279         rec1 = rb_entry(node1, struct device_record, node);
280         rec2 = rb_entry(node2, struct device_record, node);
281         if (rec1->devid > rec2->devid)
282                 return -1;
283         else if (rec1->devid < rec2->devid)
284                 return 1;
285         else
286                 return 0;
287 }
288
289 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
290 {
291         struct inode_record *rec;
292         struct inode_backref *backref;
293         struct inode_backref *orig;
294         size_t size;
295
296         rec = malloc(sizeof(*rec));
297         memcpy(rec, orig_rec, sizeof(*rec));
298         rec->refs = 1;
299         INIT_LIST_HEAD(&rec->backrefs);
300
301         list_for_each_entry(orig, &orig_rec->backrefs, list) {
302                 size = sizeof(*orig) + orig->namelen + 1;
303                 backref = malloc(size);
304                 memcpy(backref, orig, size);
305                 list_add_tail(&backref->list, &rec->backrefs);
306         }
307         return rec;
308 }
309
310 static void print_inode_error(int errors)
311 {
312         if (errors & I_ERR_NO_INODE_ITEM)
313                 fprintf(stderr, ", no inode item");
314         if (errors & I_ERR_NO_ORPHAN_ITEM)
315                 fprintf(stderr, ", no orphan item");
316         if (errors & I_ERR_DUP_INODE_ITEM)
317                 fprintf(stderr, ", dup inode item");
318         if (errors & I_ERR_DUP_DIR_INDEX)
319                 fprintf(stderr, ", dup dir index");
320         if (errors & I_ERR_ODD_DIR_ITEM)
321                 fprintf(stderr, ", odd dir item");
322         if (errors & I_ERR_ODD_FILE_EXTENT)
323                 fprintf(stderr, ", odd file extent");
324         if (errors & I_ERR_BAD_FILE_EXTENT)
325                 fprintf(stderr, ", bad file extent");
326         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
327                 fprintf(stderr, ", file extent overlap");
328         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
329                 fprintf(stderr, ", file extent discount");
330         if (errors & I_ERR_DIR_ISIZE_WRONG)
331                 fprintf(stderr, ", dir isize wrong");
332         if (errors & I_ERR_FILE_NBYTES_WRONG)
333                 fprintf(stderr, ", nbytes wrong");
334         if (errors & I_ERR_ODD_CSUM_ITEM)
335                 fprintf(stderr, ", odd csum item");
336         if (errors & I_ERR_SOME_CSUM_MISSING)
337                 fprintf(stderr, ", some csum missing");
338         if (errors & I_ERR_LINK_COUNT_WRONG)
339                 fprintf(stderr, ", link count wrong");
340         fprintf(stderr, "\n");
341 }
342
343 static void print_ref_error(int errors)
344 {
345         if (errors & REF_ERR_NO_DIR_ITEM)
346                 fprintf(stderr, ", no dir item");
347         if (errors & REF_ERR_NO_DIR_INDEX)
348                 fprintf(stderr, ", no dir index");
349         if (errors & REF_ERR_NO_INODE_REF)
350                 fprintf(stderr, ", no inode ref");
351         if (errors & REF_ERR_DUP_DIR_ITEM)
352                 fprintf(stderr, ", dup dir item");
353         if (errors & REF_ERR_DUP_DIR_INDEX)
354                 fprintf(stderr, ", dup dir index");
355         if (errors & REF_ERR_DUP_INODE_REF)
356                 fprintf(stderr, ", dup inode ref");
357         if (errors & REF_ERR_INDEX_UNMATCH)
358                 fprintf(stderr, ", index unmatch");
359         if (errors & REF_ERR_FILETYPE_UNMATCH)
360                 fprintf(stderr, ", filetype unmatch");
361         if (errors & REF_ERR_NAME_TOO_LONG)
362                 fprintf(stderr, ", name too long");
363         if (errors & REF_ERR_NO_ROOT_REF)
364                 fprintf(stderr, ", no root ref");
365         if (errors & REF_ERR_NO_ROOT_BACKREF)
366                 fprintf(stderr, ", no root backref");
367         if (errors & REF_ERR_DUP_ROOT_REF)
368                 fprintf(stderr, ", dup root ref");
369         if (errors & REF_ERR_DUP_ROOT_BACKREF)
370                 fprintf(stderr, ", dup root backref");
371         fprintf(stderr, "\n");
372 }
373
374 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
375                                           u64 ino, int mod)
376 {
377         struct ptr_node *node;
378         struct cache_extent *cache;
379         struct inode_record *rec = NULL;
380         int ret;
381
382         cache = lookup_cache_extent(inode_cache, ino, 1);
383         if (cache) {
384                 node = container_of(cache, struct ptr_node, cache);
385                 rec = node->data;
386                 if (mod && rec->refs > 1) {
387                         node->data = clone_inode_rec(rec);
388                         rec->refs--;
389                         rec = node->data;
390                 }
391         } else if (mod) {
392                 rec = calloc(1, sizeof(*rec));
393                 rec->ino = ino;
394                 rec->extent_start = (u64)-1;
395                 rec->first_extent_gap = (u64)-1;
396                 rec->refs = 1;
397                 INIT_LIST_HEAD(&rec->backrefs);
398
399                 node = malloc(sizeof(*node));
400                 node->cache.start = ino;
401                 node->cache.size = 1;
402                 node->data = rec;
403
404                 if (ino == BTRFS_FREE_INO_OBJECTID)
405                         rec->found_link = 1;
406
407                 ret = insert_cache_extent(inode_cache, &node->cache);
408                 BUG_ON(ret);
409         }
410         return rec;
411 }
412
413 static void free_inode_rec(struct inode_record *rec)
414 {
415         struct inode_backref *backref;
416
417         if (--rec->refs > 0)
418                 return;
419
420         while (!list_empty(&rec->backrefs)) {
421                 backref = list_entry(rec->backrefs.next,
422                                      struct inode_backref, list);
423                 list_del(&backref->list);
424                 free(backref);
425         }
426         free(rec);
427 }
428
429 static int can_free_inode_rec(struct inode_record *rec)
430 {
431         if (!rec->errors && rec->checked && rec->found_inode_item &&
432             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
433                 return 1;
434         return 0;
435 }
436
437 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
438                                  struct inode_record *rec)
439 {
440         struct cache_extent *cache;
441         struct inode_backref *tmp, *backref;
442         struct ptr_node *node;
443         unsigned char filetype;
444
445         if (!rec->found_inode_item)
446                 return;
447
448         filetype = imode_to_type(rec->imode);
449         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
450                 if (backref->found_dir_item && backref->found_dir_index) {
451                         if (backref->filetype != filetype)
452                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
453                         if (!backref->errors && backref->found_inode_ref) {
454                                 list_del(&backref->list);
455                                 free(backref);
456                         }
457                 }
458         }
459
460         if (!rec->checked || rec->merging)
461                 return;
462
463         if (S_ISDIR(rec->imode)) {
464                 if (rec->found_size != rec->isize)
465                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
466                 if (rec->found_file_extent)
467                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
468         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
469                 if (rec->found_dir_item)
470                         rec->errors |= I_ERR_ODD_DIR_ITEM;
471                 if (rec->found_size != rec->nbytes)
472                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
473                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
474                         rec->first_extent_gap = 0;
475                 if (rec->nlink > 0 && !no_holes &&
476                     (rec->extent_end < rec->isize ||
477                      rec->first_extent_gap < rec->isize))
478                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
479         }
480
481         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
482                 if (rec->found_csum_item && rec->nodatasum)
483                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
484                 if (rec->some_csum_missing && !rec->nodatasum)
485                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
486         }
487
488         BUG_ON(rec->refs != 1);
489         if (can_free_inode_rec(rec)) {
490                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
491                 node = container_of(cache, struct ptr_node, cache);
492                 BUG_ON(node->data != rec);
493                 remove_cache_extent(inode_cache, &node->cache);
494                 free(node);
495                 free_inode_rec(rec);
496         }
497 }
498
499 static int check_orphan_item(struct btrfs_root *root, u64 ino)
500 {
501         struct btrfs_path path;
502         struct btrfs_key key;
503         int ret;
504
505         key.objectid = BTRFS_ORPHAN_OBJECTID;
506         key.type = BTRFS_ORPHAN_ITEM_KEY;
507         key.offset = ino;
508
509         btrfs_init_path(&path);
510         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
511         btrfs_release_path(&path);
512         if (ret > 0)
513                 ret = -ENOENT;
514         return ret;
515 }
516
517 static int process_inode_item(struct extent_buffer *eb,
518                               int slot, struct btrfs_key *key,
519                               struct shared_node *active_node)
520 {
521         struct inode_record *rec;
522         struct btrfs_inode_item *item;
523
524         rec = active_node->current;
525         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
526         if (rec->found_inode_item) {
527                 rec->errors |= I_ERR_DUP_INODE_ITEM;
528                 return 1;
529         }
530         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
531         rec->nlink = btrfs_inode_nlink(eb, item);
532         rec->isize = btrfs_inode_size(eb, item);
533         rec->nbytes = btrfs_inode_nbytes(eb, item);
534         rec->imode = btrfs_inode_mode(eb, item);
535         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
536                 rec->nodatasum = 1;
537         rec->found_inode_item = 1;
538         if (rec->nlink == 0)
539                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
540         maybe_free_inode_rec(&active_node->inode_cache, rec);
541         return 0;
542 }
543
544 static struct inode_backref *get_inode_backref(struct inode_record *rec,
545                                                 const char *name,
546                                                 int namelen, u64 dir)
547 {
548         struct inode_backref *backref;
549
550         list_for_each_entry(backref, &rec->backrefs, list) {
551                 if (backref->dir != dir || backref->namelen != namelen)
552                         continue;
553                 if (memcmp(name, backref->name, namelen))
554                         continue;
555                 return backref;
556         }
557
558         backref = malloc(sizeof(*backref) + namelen + 1);
559         memset(backref, 0, sizeof(*backref));
560         backref->dir = dir;
561         backref->namelen = namelen;
562         memcpy(backref->name, name, namelen);
563         backref->name[namelen] = '\0';
564         list_add_tail(&backref->list, &rec->backrefs);
565         return backref;
566 }
567
568 static int add_inode_backref(struct cache_tree *inode_cache,
569                              u64 ino, u64 dir, u64 index,
570                              const char *name, int namelen,
571                              int filetype, int itemtype, int errors)
572 {
573         struct inode_record *rec;
574         struct inode_backref *backref;
575
576         rec = get_inode_rec(inode_cache, ino, 1);
577         backref = get_inode_backref(rec, name, namelen, dir);
578         if (errors)
579                 backref->errors |= errors;
580         if (itemtype == BTRFS_DIR_INDEX_KEY) {
581                 if (backref->found_dir_index)
582                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
583                 if (backref->found_inode_ref && backref->index != index)
584                         backref->errors |= REF_ERR_INDEX_UNMATCH;
585                 if (backref->found_dir_item && backref->filetype != filetype)
586                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
587
588                 backref->index = index;
589                 backref->filetype = filetype;
590                 backref->found_dir_index = 1;
591         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
592                 rec->found_link++;
593                 if (backref->found_dir_item)
594                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
595                 if (backref->found_dir_index && backref->filetype != filetype)
596                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
597
598                 backref->filetype = filetype;
599                 backref->found_dir_item = 1;
600         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
601                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
602                 if (backref->found_inode_ref)
603                         backref->errors |= REF_ERR_DUP_INODE_REF;
604                 if (backref->found_dir_index && backref->index != index)
605                         backref->errors |= REF_ERR_INDEX_UNMATCH;
606
607                 backref->ref_type = itemtype;
608                 backref->index = index;
609                 backref->found_inode_ref = 1;
610         } else {
611                 BUG_ON(1);
612         }
613
614         maybe_free_inode_rec(inode_cache, rec);
615         return 0;
616 }
617
618 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
619                             struct cache_tree *dst_cache)
620 {
621         struct inode_backref *backref;
622         u32 dir_count = 0;
623
624         dst->merging = 1;
625         list_for_each_entry(backref, &src->backrefs, list) {
626                 if (backref->found_dir_index) {
627                         add_inode_backref(dst_cache, dst->ino, backref->dir,
628                                         backref->index, backref->name,
629                                         backref->namelen, backref->filetype,
630                                         BTRFS_DIR_INDEX_KEY, backref->errors);
631                 }
632                 if (backref->found_dir_item) {
633                         dir_count++;
634                         add_inode_backref(dst_cache, dst->ino,
635                                         backref->dir, 0, backref->name,
636                                         backref->namelen, backref->filetype,
637                                         BTRFS_DIR_ITEM_KEY, backref->errors);
638                 }
639                 if (backref->found_inode_ref) {
640                         add_inode_backref(dst_cache, dst->ino,
641                                         backref->dir, backref->index,
642                                         backref->name, backref->namelen, 0,
643                                         backref->ref_type, backref->errors);
644                 }
645         }
646
647         if (src->found_dir_item)
648                 dst->found_dir_item = 1;
649         if (src->found_file_extent)
650                 dst->found_file_extent = 1;
651         if (src->found_csum_item)
652                 dst->found_csum_item = 1;
653         if (src->some_csum_missing)
654                 dst->some_csum_missing = 1;
655         if (dst->first_extent_gap > src->first_extent_gap)
656                 dst->first_extent_gap = src->first_extent_gap;
657
658         BUG_ON(src->found_link < dir_count);
659         dst->found_link += src->found_link - dir_count;
660         dst->found_size += src->found_size;
661         if (src->extent_start != (u64)-1) {
662                 if (dst->extent_start == (u64)-1) {
663                         dst->extent_start = src->extent_start;
664                         dst->extent_end = src->extent_end;
665                 } else {
666                         if (dst->extent_end > src->extent_start)
667                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
668                         else if (dst->extent_end < src->extent_start &&
669                                  dst->extent_end < dst->first_extent_gap)
670                                 dst->first_extent_gap = dst->extent_end;
671                         if (dst->extent_end < src->extent_end)
672                                 dst->extent_end = src->extent_end;
673                 }
674         }
675
676         dst->errors |= src->errors;
677         if (src->found_inode_item) {
678                 if (!dst->found_inode_item) {
679                         dst->nlink = src->nlink;
680                         dst->isize = src->isize;
681                         dst->nbytes = src->nbytes;
682                         dst->imode = src->imode;
683                         dst->nodatasum = src->nodatasum;
684                         dst->found_inode_item = 1;
685                 } else {
686                         dst->errors |= I_ERR_DUP_INODE_ITEM;
687                 }
688         }
689         dst->merging = 0;
690
691         return 0;
692 }
693
694 static int splice_shared_node(struct shared_node *src_node,
695                               struct shared_node *dst_node)
696 {
697         struct cache_extent *cache;
698         struct ptr_node *node, *ins;
699         struct cache_tree *src, *dst;
700         struct inode_record *rec, *conflict;
701         u64 current_ino = 0;
702         int splice = 0;
703         int ret;
704
705         if (--src_node->refs == 0)
706                 splice = 1;
707         if (src_node->current)
708                 current_ino = src_node->current->ino;
709
710         src = &src_node->root_cache;
711         dst = &dst_node->root_cache;
712 again:
713         cache = search_cache_extent(src, 0);
714         while (cache) {
715                 node = container_of(cache, struct ptr_node, cache);
716                 rec = node->data;
717                 cache = next_cache_extent(cache);
718
719                 if (splice) {
720                         remove_cache_extent(src, &node->cache);
721                         ins = node;
722                 } else {
723                         ins = malloc(sizeof(*ins));
724                         ins->cache.start = node->cache.start;
725                         ins->cache.size = node->cache.size;
726                         ins->data = rec;
727                         rec->refs++;
728                 }
729                 ret = insert_cache_extent(dst, &ins->cache);
730                 if (ret == -EEXIST) {
731                         conflict = get_inode_rec(dst, rec->ino, 1);
732                         merge_inode_recs(rec, conflict, dst);
733                         if (rec->checked) {
734                                 conflict->checked = 1;
735                                 if (dst_node->current == conflict)
736                                         dst_node->current = NULL;
737                         }
738                         maybe_free_inode_rec(dst, conflict);
739                         free_inode_rec(rec);
740                         free(ins);
741                 } else {
742                         BUG_ON(ret);
743                 }
744         }
745
746         if (src == &src_node->root_cache) {
747                 src = &src_node->inode_cache;
748                 dst = &dst_node->inode_cache;
749                 goto again;
750         }
751
752         if (current_ino > 0 && (!dst_node->current ||
753             current_ino > dst_node->current->ino)) {
754                 if (dst_node->current) {
755                         dst_node->current->checked = 1;
756                         maybe_free_inode_rec(dst, dst_node->current);
757                 }
758                 dst_node->current = get_inode_rec(dst, current_ino, 1);
759         }
760         return 0;
761 }
762
763 static void free_inode_ptr(struct cache_extent *cache)
764 {
765         struct ptr_node *node;
766         struct inode_record *rec;
767
768         node = container_of(cache, struct ptr_node, cache);
769         rec = node->data;
770         free_inode_rec(rec);
771         free(node);
772 }
773
774 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
775
776 static struct shared_node *find_shared_node(struct cache_tree *shared,
777                                             u64 bytenr)
778 {
779         struct cache_extent *cache;
780         struct shared_node *node;
781
782         cache = lookup_cache_extent(shared, bytenr, 1);
783         if (cache) {
784                 node = container_of(cache, struct shared_node, cache);
785                 return node;
786         }
787         return NULL;
788 }
789
790 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
791 {
792         int ret;
793         struct shared_node *node;
794
795         node = calloc(1, sizeof(*node));
796         node->cache.start = bytenr;
797         node->cache.size = 1;
798         cache_tree_init(&node->root_cache);
799         cache_tree_init(&node->inode_cache);
800         node->refs = refs;
801
802         ret = insert_cache_extent(shared, &node->cache);
803         BUG_ON(ret);
804         return 0;
805 }
806
807 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
808                              struct walk_control *wc, int level)
809 {
810         struct shared_node *node;
811         struct shared_node *dest;
812
813         if (level == wc->active_node)
814                 return 0;
815
816         BUG_ON(wc->active_node <= level);
817         node = find_shared_node(&wc->shared, bytenr);
818         if (!node) {
819                 add_shared_node(&wc->shared, bytenr, refs);
820                 node = find_shared_node(&wc->shared, bytenr);
821                 wc->nodes[level] = node;
822                 wc->active_node = level;
823                 return 0;
824         }
825
826         if (wc->root_level == wc->active_node &&
827             btrfs_root_refs(&root->root_item) == 0) {
828                 if (--node->refs == 0) {
829                         free_inode_recs_tree(&node->root_cache);
830                         free_inode_recs_tree(&node->inode_cache);
831                         remove_cache_extent(&wc->shared, &node->cache);
832                         free(node);
833                 }
834                 return 1;
835         }
836
837         dest = wc->nodes[wc->active_node];
838         splice_shared_node(node, dest);
839         if (node->refs == 0) {
840                 remove_cache_extent(&wc->shared, &node->cache);
841                 free(node);
842         }
843         return 1;
844 }
845
846 static int leave_shared_node(struct btrfs_root *root,
847                              struct walk_control *wc, int level)
848 {
849         struct shared_node *node;
850         struct shared_node *dest;
851         int i;
852
853         if (level == wc->root_level)
854                 return 0;
855
856         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
857                 if (wc->nodes[i])
858                         break;
859         }
860         BUG_ON(i >= BTRFS_MAX_LEVEL);
861
862         node = wc->nodes[wc->active_node];
863         wc->nodes[wc->active_node] = NULL;
864         wc->active_node = i;
865
866         dest = wc->nodes[wc->active_node];
867         if (wc->active_node < wc->root_level ||
868             btrfs_root_refs(&root->root_item) > 0) {
869                 BUG_ON(node->refs <= 1);
870                 splice_shared_node(node, dest);
871         } else {
872                 BUG_ON(node->refs < 2);
873                 node->refs--;
874         }
875         return 0;
876 }
877
878 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
879                          u64 child_root_id)
880 {
881         struct btrfs_path path;
882         struct btrfs_key key;
883         struct extent_buffer *leaf;
884         int has_parent = 0;
885         int ret;
886
887         btrfs_init_path(&path);
888
889         key.objectid = parent_root_id;
890         key.type = BTRFS_ROOT_REF_KEY;
891         key.offset = child_root_id;
892         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
893                                 0, 0);
894         BUG_ON(ret < 0);
895         btrfs_release_path(&path);
896         if (!ret)
897                 return 1;
898
899         key.objectid = child_root_id;
900         key.type = BTRFS_ROOT_BACKREF_KEY;
901         key.offset = 0;
902         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
903                                 0, 0);
904         BUG_ON(ret <= 0);
905
906         while (1) {
907                 leaf = path.nodes[0];
908                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
909                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
910                         BUG_ON(ret < 0);
911
912                         if (ret > 0)
913                                 break;
914                         leaf = path.nodes[0];
915                 }
916
917                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
918                 if (key.objectid != child_root_id ||
919                     key.type != BTRFS_ROOT_BACKREF_KEY)
920                         break;
921
922                 has_parent = 1;
923
924                 if (key.offset == parent_root_id) {
925                         btrfs_release_path(&path);
926                         return 1;
927                 }
928
929                 path.slots[0]++;
930         }
931
932         btrfs_release_path(&path);
933         return has_parent? 0 : -1;
934 }
935
936 static int process_dir_item(struct btrfs_root *root,
937                             struct extent_buffer *eb,
938                             int slot, struct btrfs_key *key,
939                             struct shared_node *active_node)
940 {
941         u32 total;
942         u32 cur = 0;
943         u32 len;
944         u32 name_len;
945         u32 data_len;
946         int error;
947         int nritems = 0;
948         int filetype;
949         struct btrfs_dir_item *di;
950         struct inode_record *rec;
951         struct cache_tree *root_cache;
952         struct cache_tree *inode_cache;
953         struct btrfs_key location;
954         char namebuf[BTRFS_NAME_LEN];
955
956         root_cache = &active_node->root_cache;
957         inode_cache = &active_node->inode_cache;
958         rec = active_node->current;
959         rec->found_dir_item = 1;
960
961         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
962         total = btrfs_item_size_nr(eb, slot);
963         while (cur < total) {
964                 nritems++;
965                 btrfs_dir_item_key_to_cpu(eb, di, &location);
966                 name_len = btrfs_dir_name_len(eb, di);
967                 data_len = btrfs_dir_data_len(eb, di);
968                 filetype = btrfs_dir_type(eb, di);
969
970                 rec->found_size += name_len;
971                 if (name_len <= BTRFS_NAME_LEN) {
972                         len = name_len;
973                         error = 0;
974                 } else {
975                         len = BTRFS_NAME_LEN;
976                         error = REF_ERR_NAME_TOO_LONG;
977                 }
978                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
979
980                 if (location.type == BTRFS_INODE_ITEM_KEY) {
981                         add_inode_backref(inode_cache, location.objectid,
982                                           key->objectid, key->offset, namebuf,
983                                           len, filetype, key->type, error);
984                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
985                         add_inode_backref(root_cache, location.objectid,
986                                           key->objectid, key->offset,
987                                           namebuf, len, filetype,
988                                           key->type, error);
989                 } else {
990                         fprintf(stderr, "warning line %d\n", __LINE__);
991                 }
992
993                 len = sizeof(*di) + name_len + data_len;
994                 di = (struct btrfs_dir_item *)((char *)di + len);
995                 cur += len;
996         }
997         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
998                 rec->errors |= I_ERR_DUP_DIR_INDEX;
999
1000         return 0;
1001 }
1002
1003 static int process_inode_ref(struct extent_buffer *eb,
1004                              int slot, struct btrfs_key *key,
1005                              struct shared_node *active_node)
1006 {
1007         u32 total;
1008         u32 cur = 0;
1009         u32 len;
1010         u32 name_len;
1011         u64 index;
1012         int error;
1013         struct cache_tree *inode_cache;
1014         struct btrfs_inode_ref *ref;
1015         char namebuf[BTRFS_NAME_LEN];
1016
1017         inode_cache = &active_node->inode_cache;
1018
1019         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1020         total = btrfs_item_size_nr(eb, slot);
1021         while (cur < total) {
1022                 name_len = btrfs_inode_ref_name_len(eb, ref);
1023                 index = btrfs_inode_ref_index(eb, ref);
1024                 if (name_len <= BTRFS_NAME_LEN) {
1025                         len = name_len;
1026                         error = 0;
1027                 } else {
1028                         len = BTRFS_NAME_LEN;
1029                         error = REF_ERR_NAME_TOO_LONG;
1030                 }
1031                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1032                 add_inode_backref(inode_cache, key->objectid, key->offset,
1033                                   index, namebuf, len, 0, key->type, error);
1034
1035                 len = sizeof(*ref) + name_len;
1036                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1037                 cur += len;
1038         }
1039         return 0;
1040 }
1041
1042 static int process_inode_extref(struct extent_buffer *eb,
1043                                 int slot, struct btrfs_key *key,
1044                                 struct shared_node *active_node)
1045 {
1046         u32 total;
1047         u32 cur = 0;
1048         u32 len;
1049         u32 name_len;
1050         u64 index;
1051         u64 parent;
1052         int error;
1053         struct cache_tree *inode_cache;
1054         struct btrfs_inode_extref *extref;
1055         char namebuf[BTRFS_NAME_LEN];
1056
1057         inode_cache = &active_node->inode_cache;
1058
1059         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1060         total = btrfs_item_size_nr(eb, slot);
1061         while (cur < total) {
1062                 name_len = btrfs_inode_extref_name_len(eb, extref);
1063                 index = btrfs_inode_extref_index(eb, extref);
1064                 parent = btrfs_inode_extref_parent(eb, extref);
1065                 if (name_len <= BTRFS_NAME_LEN) {
1066                         len = name_len;
1067                         error = 0;
1068                 } else {
1069                         len = BTRFS_NAME_LEN;
1070                         error = REF_ERR_NAME_TOO_LONG;
1071                 }
1072                 read_extent_buffer(eb, namebuf,
1073                                    (unsigned long)(extref + 1), len);
1074                 add_inode_backref(inode_cache, key->objectid, parent,
1075                                   index, namebuf, len, 0, key->type, error);
1076
1077                 len = sizeof(*extref) + name_len;
1078                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1079                 cur += len;
1080         }
1081         return 0;
1082
1083 }
1084
1085 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
1086 {
1087         struct btrfs_key key;
1088         struct btrfs_path path;
1089         struct extent_buffer *leaf;
1090         int ret ;
1091         size_t size;
1092         u64 found = 0;
1093         u64 csum_end;
1094         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1095
1096         btrfs_init_path(&path);
1097
1098         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1099         key.offset = start;
1100         key.type = BTRFS_EXTENT_CSUM_KEY;
1101
1102         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1103                                 &key, &path, 0, 0);
1104         BUG_ON(ret < 0);
1105         if (ret > 0 && path.slots[0] > 0) {
1106                 leaf = path.nodes[0];
1107                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1108                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1109                     key.type == BTRFS_EXTENT_CSUM_KEY)
1110                         path.slots[0]--;
1111         }
1112
1113         while (len > 0) {
1114                 leaf = path.nodes[0];
1115                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1116                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1117                         BUG_ON(ret < 0);
1118                         if (ret > 0)
1119                                 break;
1120                         leaf = path.nodes[0];
1121                 }
1122
1123                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1124                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1125                     key.type != BTRFS_EXTENT_CSUM_KEY)
1126                         break;
1127
1128                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1129                 if (key.offset >= start + len)
1130                         break;
1131
1132                 if (key.offset > start)
1133                         start = key.offset;
1134
1135                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1136                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1137                 if (csum_end > start) {
1138                         size = min(csum_end - start, len);
1139                         len -= size;
1140                         start += size;
1141                         found += size;
1142                 }
1143
1144                 path.slots[0]++;
1145         }
1146         btrfs_release_path(&path);
1147         return found;
1148 }
1149
1150 static int process_file_extent(struct btrfs_root *root,
1151                                 struct extent_buffer *eb,
1152                                 int slot, struct btrfs_key *key,
1153                                 struct shared_node *active_node)
1154 {
1155         struct inode_record *rec;
1156         struct btrfs_file_extent_item *fi;
1157         u64 num_bytes = 0;
1158         u64 disk_bytenr = 0;
1159         u64 extent_offset = 0;
1160         u64 mask = root->sectorsize - 1;
1161         int extent_type;
1162
1163         rec = active_node->current;
1164         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1165         rec->found_file_extent = 1;
1166
1167         if (rec->extent_start == (u64)-1) {
1168                 rec->extent_start = key->offset;
1169                 rec->extent_end = key->offset;
1170         }
1171
1172         if (rec->extent_end > key->offset)
1173                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174         else if (rec->extent_end < key->offset &&
1175                  rec->extent_end < rec->first_extent_gap)
1176                 rec->first_extent_gap = rec->extent_end;
1177
1178         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1179         extent_type = btrfs_file_extent_type(eb, fi);
1180
1181         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1182                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1183                 if (num_bytes == 0)
1184                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1185                 rec->found_size += num_bytes;
1186                 num_bytes = (num_bytes + mask) & ~mask;
1187         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1188                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1189                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1190                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1191                 extent_offset = btrfs_file_extent_offset(eb, fi);
1192                 if (num_bytes == 0 || (num_bytes & mask))
1193                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1194                 if (num_bytes + extent_offset >
1195                     btrfs_file_extent_ram_bytes(eb, fi))
1196                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1197                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1198                     (btrfs_file_extent_compression(eb, fi) ||
1199                      btrfs_file_extent_encryption(eb, fi) ||
1200                      btrfs_file_extent_other_encoding(eb, fi)))
1201                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1202                 if (disk_bytenr > 0)
1203                         rec->found_size += num_bytes;
1204         } else {
1205                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1206         }
1207         rec->extent_end = key->offset + num_bytes;
1208
1209         if (disk_bytenr > 0) {
1210                 u64 found;
1211                 if (btrfs_file_extent_compression(eb, fi))
1212                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1213                 else
1214                         disk_bytenr += extent_offset;
1215
1216                 found = count_csum_range(root, disk_bytenr, num_bytes);
1217                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1218                         if (found > 0)
1219                                 rec->found_csum_item = 1;
1220                         if (found < num_bytes)
1221                                 rec->some_csum_missing = 1;
1222                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1223                         if (found > 0)
1224                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1225                 }
1226         }
1227         return 0;
1228 }
1229
1230 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1231                             struct walk_control *wc)
1232 {
1233         struct btrfs_key key;
1234         u32 nritems;
1235         int i;
1236         int ret = 0;
1237         int error = 0;
1238         struct cache_tree *inode_cache;
1239         struct shared_node *active_node;
1240
1241         if (wc->root_level == wc->active_node &&
1242             btrfs_root_refs(&root->root_item) == 0)
1243                 return 0;
1244
1245         active_node = wc->nodes[wc->active_node];
1246         inode_cache = &active_node->inode_cache;
1247         nritems = btrfs_header_nritems(eb);
1248         for (i = 0; i < nritems; i++) {
1249                 btrfs_item_key_to_cpu(eb, &key, i);
1250
1251                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1252                         continue;
1253                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1254                         continue;
1255
1256                 if (active_node->current == NULL ||
1257                     active_node->current->ino < key.objectid) {
1258                         if (active_node->current) {
1259                                 active_node->current->checked = 1;
1260                                 maybe_free_inode_rec(inode_cache,
1261                                                      active_node->current);
1262                         }
1263                         active_node->current = get_inode_rec(inode_cache,
1264                                                              key.objectid, 1);
1265                 }
1266                 switch (key.type) {
1267                 case BTRFS_DIR_ITEM_KEY:
1268                 case BTRFS_DIR_INDEX_KEY:
1269                         ret = process_dir_item(root, eb, i, &key, active_node);
1270                         break;
1271                 case BTRFS_INODE_REF_KEY:
1272                         ret = process_inode_ref(eb, i, &key, active_node);
1273                         break;
1274                 case BTRFS_INODE_EXTREF_KEY:
1275                         ret = process_inode_extref(eb, i, &key, active_node);
1276                         break;
1277                 case BTRFS_INODE_ITEM_KEY:
1278                         ret = process_inode_item(eb, i, &key, active_node);
1279                         break;
1280                 case BTRFS_EXTENT_DATA_KEY:
1281                         ret = process_file_extent(root, eb, i, &key,
1282                                                   active_node);
1283                         break;
1284                 default:
1285                         break;
1286                 };
1287                 if (ret != 0)
1288                         error = 1;
1289         }
1290         return error;
1291 }
1292
1293 static void reada_walk_down(struct btrfs_root *root,
1294                             struct extent_buffer *node, int slot)
1295 {
1296         u64 bytenr;
1297         u64 ptr_gen;
1298         u32 nritems;
1299         u32 blocksize;
1300         int i;
1301         int level;
1302
1303         level = btrfs_header_level(node);
1304         if (level != 1)
1305                 return;
1306
1307         nritems = btrfs_header_nritems(node);
1308         blocksize = btrfs_level_size(root, level - 1);
1309         for (i = slot; i < nritems; i++) {
1310                 bytenr = btrfs_node_blockptr(node, i);
1311                 ptr_gen = btrfs_node_ptr_generation(node, i);
1312                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1313         }
1314 }
1315
1316 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1317                           struct walk_control *wc, int *level)
1318 {
1319         u64 bytenr;
1320         u64 ptr_gen;
1321         struct extent_buffer *next;
1322         struct extent_buffer *cur;
1323         u32 blocksize;
1324         int ret, err = 0;
1325         u64 refs;
1326
1327         WARN_ON(*level < 0);
1328         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1329         ret = btrfs_lookup_extent_info(NULL, root,
1330                                        path->nodes[*level]->start,
1331                                        *level, 1, &refs, NULL);
1332         if (ret < 0) {
1333                 err = ret;
1334                 goto out;
1335         }
1336
1337         if (refs > 1) {
1338                 ret = enter_shared_node(root, path->nodes[*level]->start,
1339                                         refs, wc, *level);
1340                 if (ret > 0) {
1341                         err = ret;
1342                         goto out;
1343                 }
1344         }
1345
1346         while (*level >= 0) {
1347                 WARN_ON(*level < 0);
1348                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1349                 cur = path->nodes[*level];
1350
1351                 if (btrfs_header_level(cur) != *level)
1352                         WARN_ON(1);
1353
1354                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1355                         break;
1356                 if (*level == 0) {
1357                         ret = process_one_leaf(root, cur, wc);
1358                         break;
1359                 }
1360                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1361                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1362                 blocksize = btrfs_level_size(root, *level - 1);
1363                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1364                                                1, &refs, NULL);
1365                 if (ret < 0)
1366                         refs = 0;
1367
1368                 if (refs > 1) {
1369                         ret = enter_shared_node(root, bytenr, refs,
1370                                                 wc, *level - 1);
1371                         if (ret > 0) {
1372                                 path->slots[*level]++;
1373                                 continue;
1374                         }
1375                 }
1376
1377                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1378                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1379                         free_extent_buffer(next);
1380                         reada_walk_down(root, cur, path->slots[*level]);
1381                         next = read_tree_block(root, bytenr, blocksize,
1382                                                ptr_gen);
1383                         if (!next) {
1384                                 err = -EIO;
1385                                 goto out;
1386                         }
1387                 }
1388
1389                 *level = *level - 1;
1390                 free_extent_buffer(path->nodes[*level]);
1391                 path->nodes[*level] = next;
1392                 path->slots[*level] = 0;
1393         }
1394 out:
1395         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1396         return err;
1397 }
1398
1399 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1400                         struct walk_control *wc, int *level)
1401 {
1402         int i;
1403         struct extent_buffer *leaf;
1404
1405         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1406                 leaf = path->nodes[i];
1407                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1408                         path->slots[i]++;
1409                         *level = i;
1410                         return 0;
1411                 } else {
1412                         free_extent_buffer(path->nodes[*level]);
1413                         path->nodes[*level] = NULL;
1414                         BUG_ON(*level > wc->active_node);
1415                         if (*level == wc->active_node)
1416                                 leave_shared_node(root, wc, *level);
1417                         *level = i + 1;
1418                 }
1419         }
1420         return 1;
1421 }
1422
1423 static int check_root_dir(struct inode_record *rec)
1424 {
1425         struct inode_backref *backref;
1426         int ret = -1;
1427
1428         if (!rec->found_inode_item || rec->errors)
1429                 goto out;
1430         if (rec->nlink != 1 || rec->found_link != 0)
1431                 goto out;
1432         if (list_empty(&rec->backrefs))
1433                 goto out;
1434         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1435         if (!backref->found_inode_ref)
1436                 goto out;
1437         if (backref->index != 0 || backref->namelen != 2 ||
1438             memcmp(backref->name, "..", 2))
1439                 goto out;
1440         if (backref->found_dir_index || backref->found_dir_item)
1441                 goto out;
1442         ret = 0;
1443 out:
1444         return ret;
1445 }
1446
1447 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1448                               struct btrfs_root *root, struct btrfs_path *path,
1449                               struct inode_record *rec)
1450 {
1451         struct btrfs_inode_item *ei;
1452         struct btrfs_key key;
1453         int ret;
1454
1455         key.objectid = rec->ino;
1456         key.type = BTRFS_INODE_ITEM_KEY;
1457         key.offset = (u64)-1;
1458
1459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1460         if (ret < 0)
1461                 goto out;
1462         if (ret) {
1463                 if (!path->slots[0]) {
1464                         ret = -ENOENT;
1465                         goto out;
1466                 }
1467                 path->slots[0]--;
1468                 ret = 0;
1469         }
1470         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1471         if (key.objectid != rec->ino) {
1472                 ret = -ENOENT;
1473                 goto out;
1474         }
1475
1476         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1477                             struct btrfs_inode_item);
1478         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1479         btrfs_mark_buffer_dirty(path->nodes[0]);
1480         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1481         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1482                root->root_key.objectid);
1483 out:
1484         btrfs_release_path(path);
1485         return ret;
1486 }
1487
1488 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1489                                     struct btrfs_root *root,
1490                                     struct btrfs_path *path,
1491                                     struct inode_record *rec)
1492 {
1493         struct btrfs_key key;
1494         int ret;
1495
1496         key.objectid = BTRFS_ORPHAN_OBJECTID;
1497         key.type = BTRFS_ORPHAN_ITEM_KEY;
1498         key.offset = rec->ino;
1499
1500         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1501         btrfs_release_path(path);
1502         if (!ret)
1503                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1504         return ret;
1505 }
1506
1507 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1508 {
1509         struct btrfs_trans_handle *trans;
1510         struct btrfs_path *path;
1511         int ret = 0;
1512
1513         /* So far we just fix dir isize wrong */
1514         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1515                 return 1;
1516
1517         path = btrfs_alloc_path();
1518         if (!path)
1519                 return -ENOMEM;
1520
1521         trans = btrfs_start_transaction(root, 1);
1522         if (IS_ERR(trans)) {
1523                 btrfs_free_path(path);
1524                 return PTR_ERR(trans);
1525         }
1526
1527         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1528                 ret = repair_inode_isize(trans, root, path, rec);
1529         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1530                 ret = repair_inode_orphan_item(trans, root, path, rec);
1531         btrfs_commit_transaction(trans, root);
1532         btrfs_free_path(path);
1533         return ret;
1534 }
1535
1536 static int check_inode_recs(struct btrfs_root *root,
1537                             struct cache_tree *inode_cache)
1538 {
1539         struct cache_extent *cache;
1540         struct ptr_node *node;
1541         struct inode_record *rec;
1542         struct inode_backref *backref;
1543         int ret;
1544         u64 error = 0;
1545         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1546
1547         if (btrfs_root_refs(&root->root_item) == 0) {
1548                 if (!cache_tree_empty(inode_cache))
1549                         fprintf(stderr, "warning line %d\n", __LINE__);
1550                 return 0;
1551         }
1552
1553         rec = get_inode_rec(inode_cache, root_dirid, 0);
1554         if (rec) {
1555                 ret = check_root_dir(rec);
1556                 if (ret) {
1557                         fprintf(stderr, "root %llu root dir %llu error\n",
1558                                 (unsigned long long)root->root_key.objectid,
1559                                 (unsigned long long)root_dirid);
1560                         error++;
1561                 }
1562         } else {
1563                 fprintf(stderr, "root %llu root dir %llu not found\n",
1564                         (unsigned long long)root->root_key.objectid,
1565                         (unsigned long long)root_dirid);
1566         }
1567
1568         while (1) {
1569                 cache = search_cache_extent(inode_cache, 0);
1570                 if (!cache)
1571                         break;
1572                 node = container_of(cache, struct ptr_node, cache);
1573                 rec = node->data;
1574                 remove_cache_extent(inode_cache, &node->cache);
1575                 free(node);
1576                 if (rec->ino == root_dirid ||
1577                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1578                         free_inode_rec(rec);
1579                         continue;
1580                 }
1581
1582                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1583                         ret = check_orphan_item(root, rec->ino);
1584                         if (ret == 0)
1585                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1586                         if (can_free_inode_rec(rec)) {
1587                                 free_inode_rec(rec);
1588                                 continue;
1589                         }
1590                 }
1591
1592                 if (repair) {
1593                         ret = try_repair_inode(root, rec);
1594                         if (ret == 0 && can_free_inode_rec(rec)) {
1595                                 free_inode_rec(rec);
1596                                 continue;
1597                         }
1598                         ret = 0;
1599                 }
1600
1601                 error++;
1602                 if (!rec->found_inode_item)
1603                         rec->errors |= I_ERR_NO_INODE_ITEM;
1604                 if (rec->found_link != rec->nlink)
1605                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1606                 fprintf(stderr, "root %llu inode %llu errors %x",
1607                         (unsigned long long) root->root_key.objectid,
1608                         (unsigned long long) rec->ino, rec->errors);
1609                 print_inode_error(rec->errors);
1610                 list_for_each_entry(backref, &rec->backrefs, list) {
1611                         if (!backref->found_dir_item)
1612                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1613                         if (!backref->found_dir_index)
1614                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1615                         if (!backref->found_inode_ref)
1616                                 backref->errors |= REF_ERR_NO_INODE_REF;
1617                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1618                                 " namelen %u name %s filetype %d errors %x",
1619                                 (unsigned long long)backref->dir,
1620                                 (unsigned long long)backref->index,
1621                                 backref->namelen, backref->name,
1622                                 backref->filetype, backref->errors);
1623                         print_ref_error(backref->errors);
1624                 }
1625                 free_inode_rec(rec);
1626         }
1627         return (error > 0) ? -1 : 0;
1628 }
1629
1630 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1631                                         u64 objectid)
1632 {
1633         struct cache_extent *cache;
1634         struct root_record *rec = NULL;
1635         int ret;
1636
1637         cache = lookup_cache_extent(root_cache, objectid, 1);
1638         if (cache) {
1639                 rec = container_of(cache, struct root_record, cache);
1640         } else {
1641                 rec = calloc(1, sizeof(*rec));
1642                 rec->objectid = objectid;
1643                 INIT_LIST_HEAD(&rec->backrefs);
1644                 rec->cache.start = objectid;
1645                 rec->cache.size = 1;
1646
1647                 ret = insert_cache_extent(root_cache, &rec->cache);
1648                 BUG_ON(ret);
1649         }
1650         return rec;
1651 }
1652
1653 static struct root_backref *get_root_backref(struct root_record *rec,
1654                                              u64 ref_root, u64 dir, u64 index,
1655                                              const char *name, int namelen)
1656 {
1657         struct root_backref *backref;
1658
1659         list_for_each_entry(backref, &rec->backrefs, list) {
1660                 if (backref->ref_root != ref_root || backref->dir != dir ||
1661                     backref->namelen != namelen)
1662                         continue;
1663                 if (memcmp(name, backref->name, namelen))
1664                         continue;
1665                 return backref;
1666         }
1667
1668         backref = malloc(sizeof(*backref) + namelen + 1);
1669         memset(backref, 0, sizeof(*backref));
1670         backref->ref_root = ref_root;
1671         backref->dir = dir;
1672         backref->index = index;
1673         backref->namelen = namelen;
1674         memcpy(backref->name, name, namelen);
1675         backref->name[namelen] = '\0';
1676         list_add_tail(&backref->list, &rec->backrefs);
1677         return backref;
1678 }
1679
1680 static void free_root_record(struct cache_extent *cache)
1681 {
1682         struct root_record *rec;
1683         struct root_backref *backref;
1684
1685         rec = container_of(cache, struct root_record, cache);
1686         while (!list_empty(&rec->backrefs)) {
1687                 backref = list_entry(rec->backrefs.next,
1688                                      struct root_backref, list);
1689                 list_del(&backref->list);
1690                 free(backref);
1691         }
1692
1693         kfree(rec);
1694 }
1695
1696 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1697
1698 static int add_root_backref(struct cache_tree *root_cache,
1699                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1700                             const char *name, int namelen,
1701                             int item_type, int errors)
1702 {
1703         struct root_record *rec;
1704         struct root_backref *backref;
1705
1706         rec = get_root_rec(root_cache, root_id);
1707         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1708
1709         backref->errors |= errors;
1710
1711         if (item_type != BTRFS_DIR_ITEM_KEY) {
1712                 if (backref->found_dir_index || backref->found_back_ref ||
1713                     backref->found_forward_ref) {
1714                         if (backref->index != index)
1715                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1716                 } else {
1717                         backref->index = index;
1718                 }
1719         }
1720
1721         if (item_type == BTRFS_DIR_ITEM_KEY) {
1722                 if (backref->found_forward_ref)
1723                         rec->found_ref++;
1724                 backref->found_dir_item = 1;
1725         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1726                 backref->found_dir_index = 1;
1727         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1728                 if (backref->found_forward_ref)
1729                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1730                 else if (backref->found_dir_item)
1731                         rec->found_ref++;
1732                 backref->found_forward_ref = 1;
1733         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1734                 if (backref->found_back_ref)
1735                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1736                 backref->found_back_ref = 1;
1737         } else {
1738                 BUG_ON(1);
1739         }
1740
1741         if (backref->found_forward_ref && backref->found_dir_item)
1742                 backref->reachable = 1;
1743         return 0;
1744 }
1745
1746 static int merge_root_recs(struct btrfs_root *root,
1747                            struct cache_tree *src_cache,
1748                            struct cache_tree *dst_cache)
1749 {
1750         struct cache_extent *cache;
1751         struct ptr_node *node;
1752         struct inode_record *rec;
1753         struct inode_backref *backref;
1754
1755         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1756                 free_inode_recs_tree(src_cache);
1757                 return 0;
1758         }
1759
1760         while (1) {
1761                 cache = search_cache_extent(src_cache, 0);
1762                 if (!cache)
1763                         break;
1764                 node = container_of(cache, struct ptr_node, cache);
1765                 rec = node->data;
1766                 remove_cache_extent(src_cache, &node->cache);
1767                 free(node);
1768
1769                 if (!is_child_root(root, root->objectid, rec->ino))
1770                         goto skip;
1771
1772                 list_for_each_entry(backref, &rec->backrefs, list) {
1773                         BUG_ON(backref->found_inode_ref);
1774                         if (backref->found_dir_item)
1775                                 add_root_backref(dst_cache, rec->ino,
1776                                         root->root_key.objectid, backref->dir,
1777                                         backref->index, backref->name,
1778                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1779                                         backref->errors);
1780                         if (backref->found_dir_index)
1781                                 add_root_backref(dst_cache, rec->ino,
1782                                         root->root_key.objectid, backref->dir,
1783                                         backref->index, backref->name,
1784                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1785                                         backref->errors);
1786                 }
1787 skip:
1788                 free_inode_rec(rec);
1789         }
1790         return 0;
1791 }
1792
1793 static int check_root_refs(struct btrfs_root *root,
1794                            struct cache_tree *root_cache)
1795 {
1796         struct root_record *rec;
1797         struct root_record *ref_root;
1798         struct root_backref *backref;
1799         struct cache_extent *cache;
1800         int loop = 1;
1801         int ret;
1802         int error;
1803         int errors = 0;
1804
1805         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1806         rec->found_ref = 1;
1807
1808         /* fixme: this can not detect circular references */
1809         while (loop) {
1810                 loop = 0;
1811                 cache = search_cache_extent(root_cache, 0);
1812                 while (1) {
1813                         if (!cache)
1814                                 break;
1815                         rec = container_of(cache, struct root_record, cache);
1816                         cache = next_cache_extent(cache);
1817
1818                         if (rec->found_ref == 0)
1819                                 continue;
1820
1821                         list_for_each_entry(backref, &rec->backrefs, list) {
1822                                 if (!backref->reachable)
1823                                         continue;
1824
1825                                 ref_root = get_root_rec(root_cache,
1826                                                         backref->ref_root);
1827                                 if (ref_root->found_ref > 0)
1828                                         continue;
1829
1830                                 backref->reachable = 0;
1831                                 rec->found_ref--;
1832                                 if (rec->found_ref == 0)
1833                                         loop = 1;
1834                         }
1835                 }
1836         }
1837
1838         cache = search_cache_extent(root_cache, 0);
1839         while (1) {
1840                 if (!cache)
1841                         break;
1842                 rec = container_of(cache, struct root_record, cache);
1843                 cache = next_cache_extent(cache);
1844
1845                 if (rec->found_ref == 0 &&
1846                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1847                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1848                         ret = check_orphan_item(root->fs_info->tree_root,
1849                                                 rec->objectid);
1850                         if (ret == 0)
1851                                 continue;
1852
1853                         /*
1854                          * If we don't have a root item then we likely just have
1855                          * a dir item in a snapshot for this root but no actual
1856                          * ref key or anything so it's meaningless.
1857                          */
1858                         if (!rec->found_root_item)
1859                                 continue;
1860                         errors++;
1861                         fprintf(stderr, "fs tree %llu not referenced\n",
1862                                 (unsigned long long)rec->objectid);
1863                 }
1864
1865                 error = 0;
1866                 if (rec->found_ref > 0 && !rec->found_root_item)
1867                         error = 1;
1868                 list_for_each_entry(backref, &rec->backrefs, list) {
1869                         if (!backref->found_dir_item)
1870                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1871                         if (!backref->found_dir_index)
1872                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1873                         if (!backref->found_back_ref)
1874                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1875                         if (!backref->found_forward_ref)
1876                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1877                         if (backref->reachable && backref->errors)
1878                                 error = 1;
1879                 }
1880                 if (!error)
1881                         continue;
1882
1883                 errors++;
1884                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1885                         (unsigned long long)rec->objectid, rec->found_ref,
1886                          rec->found_root_item ? "" : "not found");
1887
1888                 list_for_each_entry(backref, &rec->backrefs, list) {
1889                         if (!backref->reachable)
1890                                 continue;
1891                         if (!backref->errors && rec->found_root_item)
1892                                 continue;
1893                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1894                                 " index %llu namelen %u name %s errors %x\n",
1895                                 (unsigned long long)backref->ref_root,
1896                                 (unsigned long long)backref->dir,
1897                                 (unsigned long long)backref->index,
1898                                 backref->namelen, backref->name,
1899                                 backref->errors);
1900                         print_ref_error(backref->errors);
1901                 }
1902         }
1903         return errors > 0 ? 1 : 0;
1904 }
1905
1906 static int process_root_ref(struct extent_buffer *eb, int slot,
1907                             struct btrfs_key *key,
1908                             struct cache_tree *root_cache)
1909 {
1910         u64 dirid;
1911         u64 index;
1912         u32 len;
1913         u32 name_len;
1914         struct btrfs_root_ref *ref;
1915         char namebuf[BTRFS_NAME_LEN];
1916         int error;
1917
1918         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1919
1920         dirid = btrfs_root_ref_dirid(eb, ref);
1921         index = btrfs_root_ref_sequence(eb, ref);
1922         name_len = btrfs_root_ref_name_len(eb, ref);
1923
1924         if (name_len <= BTRFS_NAME_LEN) {
1925                 len = name_len;
1926                 error = 0;
1927         } else {
1928                 len = BTRFS_NAME_LEN;
1929                 error = REF_ERR_NAME_TOO_LONG;
1930         }
1931         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1932
1933         if (key->type == BTRFS_ROOT_REF_KEY) {
1934                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1935                                  index, namebuf, len, key->type, error);
1936         } else {
1937                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1938                                  index, namebuf, len, key->type, error);
1939         }
1940         return 0;
1941 }
1942
1943 static int check_fs_root(struct btrfs_root *root,
1944                          struct cache_tree *root_cache,
1945                          struct walk_control *wc)
1946 {
1947         int ret = 0;
1948         int wret;
1949         int level;
1950         struct btrfs_path path;
1951         struct shared_node root_node;
1952         struct root_record *rec;
1953         struct btrfs_root_item *root_item = &root->root_item;
1954
1955         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1956                 rec = get_root_rec(root_cache, root->root_key.objectid);
1957                 if (btrfs_root_refs(root_item) > 0)
1958                         rec->found_root_item = 1;
1959         }
1960
1961         btrfs_init_path(&path);
1962         memset(&root_node, 0, sizeof(root_node));
1963         cache_tree_init(&root_node.root_cache);
1964         cache_tree_init(&root_node.inode_cache);
1965
1966         level = btrfs_header_level(root->node);
1967         memset(wc->nodes, 0, sizeof(wc->nodes));
1968         wc->nodes[level] = &root_node;
1969         wc->active_node = level;
1970         wc->root_level = level;
1971
1972         if (btrfs_root_refs(root_item) > 0 ||
1973             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1974                 path.nodes[level] = root->node;
1975                 extent_buffer_get(root->node);
1976                 path.slots[level] = 0;
1977         } else {
1978                 struct btrfs_key key;
1979                 struct btrfs_disk_key found_key;
1980
1981                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1982                 level = root_item->drop_level;
1983                 path.lowest_level = level;
1984                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1985                 BUG_ON(wret < 0);
1986                 btrfs_node_key(path.nodes[level], &found_key,
1987                                 path.slots[level]);
1988                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1989                                         sizeof(found_key)));
1990         }
1991
1992         while (1) {
1993                 wret = walk_down_tree(root, &path, wc, &level);
1994                 if (wret < 0)
1995                         ret = wret;
1996                 if (wret != 0)
1997                         break;
1998
1999                 wret = walk_up_tree(root, &path, wc, &level);
2000                 if (wret < 0)
2001                         ret = wret;
2002                 if (wret != 0)
2003                         break;
2004         }
2005         btrfs_release_path(&path);
2006
2007         merge_root_recs(root, &root_node.root_cache, root_cache);
2008
2009         if (root_node.current) {
2010                 root_node.current->checked = 1;
2011                 maybe_free_inode_rec(&root_node.inode_cache,
2012                                 root_node.current);
2013         }
2014
2015         ret = check_inode_recs(root, &root_node.inode_cache);
2016         return ret;
2017 }
2018
2019 static int fs_root_objectid(u64 objectid)
2020 {
2021         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2022             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2023                 return 1;
2024         return is_fstree(objectid);
2025 }
2026
2027 static int check_fs_roots(struct btrfs_root *root,
2028                           struct cache_tree *root_cache)
2029 {
2030         struct btrfs_path path;
2031         struct btrfs_key key;
2032         struct walk_control wc;
2033         struct extent_buffer *leaf;
2034         struct btrfs_root *tmp_root;
2035         struct btrfs_root *tree_root = root->fs_info->tree_root;
2036         int ret;
2037         int err = 0;
2038
2039         /*
2040          * Just in case we made any changes to the extent tree that weren't
2041          * reflected into the free space cache yet.
2042          */
2043         if (repair)
2044                 reset_cached_block_groups(root->fs_info);
2045         memset(&wc, 0, sizeof(wc));
2046         cache_tree_init(&wc.shared);
2047         btrfs_init_path(&path);
2048
2049         key.offset = 0;
2050         key.objectid = 0;
2051         key.type = BTRFS_ROOT_ITEM_KEY;
2052         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2053         BUG_ON(ret < 0);
2054         while (1) {
2055                 leaf = path.nodes[0];
2056                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2057                         ret = btrfs_next_leaf(tree_root, &path);
2058                         if (ret != 0)
2059                                 break;
2060                         leaf = path.nodes[0];
2061                 }
2062                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2063                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2064                     fs_root_objectid(key.objectid)) {
2065                         key.offset = (u64)-1;
2066                         tmp_root = btrfs_read_fs_root(root->fs_info, &key);
2067                         if (IS_ERR(tmp_root)) {
2068                                 err = 1;
2069                                 goto next;
2070                         }
2071                         ret = check_fs_root(tmp_root, root_cache, &wc);
2072                         if (ret)
2073                                 err = 1;
2074                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2075                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2076                         process_root_ref(leaf, path.slots[0], &key,
2077                                          root_cache);
2078                 }
2079 next:
2080                 path.slots[0]++;
2081         }
2082         btrfs_release_path(&path);
2083
2084         if (!cache_tree_empty(&wc.shared))
2085                 fprintf(stderr, "warning line %d\n", __LINE__);
2086
2087         return err;
2088 }
2089
2090 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2091 {
2092         struct list_head *cur = rec->backrefs.next;
2093         struct extent_backref *back;
2094         struct tree_backref *tback;
2095         struct data_backref *dback;
2096         u64 found = 0;
2097         int err = 0;
2098
2099         while(cur != &rec->backrefs) {
2100                 back = list_entry(cur, struct extent_backref, list);
2101                 cur = cur->next;
2102                 if (!back->found_extent_tree) {
2103                         err = 1;
2104                         if (!print_errs)
2105                                 goto out;
2106                         if (back->is_data) {
2107                                 dback = (struct data_backref *)back;
2108                                 fprintf(stderr, "Backref %llu %s %llu"
2109                                         " owner %llu offset %llu num_refs %lu"
2110                                         " not found in extent tree\n",
2111                                         (unsigned long long)rec->start,
2112                                         back->full_backref ?
2113                                         "parent" : "root",
2114                                         back->full_backref ?
2115                                         (unsigned long long)dback->parent:
2116                                         (unsigned long long)dback->root,
2117                                         (unsigned long long)dback->owner,
2118                                         (unsigned long long)dback->offset,
2119                                         (unsigned long)dback->num_refs);
2120                         } else {
2121                                 tback = (struct tree_backref *)back;
2122                                 fprintf(stderr, "Backref %llu parent %llu"
2123                                         " root %llu not found in extent tree\n",
2124                                         (unsigned long long)rec->start,
2125                                         (unsigned long long)tback->parent,
2126                                         (unsigned long long)tback->root);
2127                         }
2128                 }
2129                 if (!back->is_data && !back->found_ref) {
2130                         err = 1;
2131                         if (!print_errs)
2132                                 goto out;
2133                         tback = (struct tree_backref *)back;
2134                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2135                                 (unsigned long long)rec->start,
2136                                 back->full_backref ? "parent" : "root",
2137                                 back->full_backref ?
2138                                 (unsigned long long)tback->parent :
2139                                 (unsigned long long)tback->root, back);
2140                 }
2141                 if (back->is_data) {
2142                         dback = (struct data_backref *)back;
2143                         if (dback->found_ref != dback->num_refs) {
2144                                 err = 1;
2145                                 if (!print_errs)
2146                                         goto out;
2147                                 fprintf(stderr, "Incorrect local backref count"
2148                                         " on %llu %s %llu owner %llu"
2149                                         " offset %llu found %u wanted %u back %p\n",
2150                                         (unsigned long long)rec->start,
2151                                         back->full_backref ?
2152                                         "parent" : "root",
2153                                         back->full_backref ?
2154                                         (unsigned long long)dback->parent:
2155                                         (unsigned long long)dback->root,
2156                                         (unsigned long long)dback->owner,
2157                                         (unsigned long long)dback->offset,
2158                                         dback->found_ref, dback->num_refs, back);
2159                         }
2160                         if (dback->disk_bytenr != rec->start) {
2161                                 err = 1;
2162                                 if (!print_errs)
2163                                         goto out;
2164                                 fprintf(stderr, "Backref disk bytenr does not"
2165                                         " match extent record, bytenr=%llu, "
2166                                         "ref bytenr=%llu\n",
2167                                         (unsigned long long)rec->start,
2168                                         (unsigned long long)dback->disk_bytenr);
2169                         }
2170
2171                         if (dback->bytes != rec->nr) {
2172                                 err = 1;
2173                                 if (!print_errs)
2174                                         goto out;
2175                                 fprintf(stderr, "Backref bytes do not match "
2176                                         "extent backref, bytenr=%llu, ref "
2177                                         "bytes=%llu, backref bytes=%llu\n",
2178                                         (unsigned long long)rec->start,
2179                                         (unsigned long long)rec->nr,
2180                                         (unsigned long long)dback->bytes);
2181                         }
2182                 }
2183                 if (!back->is_data) {
2184                         found += 1;
2185                 } else {
2186                         dback = (struct data_backref *)back;
2187                         found += dback->found_ref;
2188                 }
2189         }
2190         if (found != rec->refs) {
2191                 err = 1;
2192                 if (!print_errs)
2193                         goto out;
2194                 fprintf(stderr, "Incorrect global backref count "
2195                         "on %llu found %llu wanted %llu\n",
2196                         (unsigned long long)rec->start,
2197                         (unsigned long long)found,
2198                         (unsigned long long)rec->refs);
2199         }
2200 out:
2201         return err;
2202 }
2203
2204 static int free_all_extent_backrefs(struct extent_record *rec)
2205 {
2206         struct extent_backref *back;
2207         struct list_head *cur;
2208         while (!list_empty(&rec->backrefs)) {
2209                 cur = rec->backrefs.next;
2210                 back = list_entry(cur, struct extent_backref, list);
2211                 list_del(cur);
2212                 free(back);
2213         }
2214         return 0;
2215 }
2216
2217 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2218                                      struct cache_tree *extent_cache)
2219 {
2220         struct cache_extent *cache;
2221         struct extent_record *rec;
2222
2223         while (1) {
2224                 cache = first_cache_extent(extent_cache);
2225                 if (!cache)
2226                         break;
2227                 rec = container_of(cache, struct extent_record, cache);
2228                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2229                 remove_cache_extent(extent_cache, cache);
2230                 free_all_extent_backrefs(rec);
2231                 free(rec);
2232         }
2233 }
2234
2235 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2236                                  struct extent_record *rec)
2237 {
2238         if (rec->content_checked && rec->owner_ref_checked &&
2239             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2240             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2241                 remove_cache_extent(extent_cache, &rec->cache);
2242                 free_all_extent_backrefs(rec);
2243                 list_del_init(&rec->list);
2244                 free(rec);
2245         }
2246         return 0;
2247 }
2248
2249 static int check_owner_ref(struct btrfs_root *root,
2250                             struct extent_record *rec,
2251                             struct extent_buffer *buf)
2252 {
2253         struct extent_backref *node;
2254         struct tree_backref *back;
2255         struct btrfs_root *ref_root;
2256         struct btrfs_key key;
2257         struct btrfs_path path;
2258         struct extent_buffer *parent;
2259         int level;
2260         int found = 0;
2261         int ret;
2262
2263         list_for_each_entry(node, &rec->backrefs, list) {
2264                 if (node->is_data)
2265                         continue;
2266                 if (!node->found_ref)
2267                         continue;
2268                 if (node->full_backref)
2269                         continue;
2270                 back = (struct tree_backref *)node;
2271                 if (btrfs_header_owner(buf) == back->root)
2272                         return 0;
2273         }
2274         BUG_ON(rec->is_root);
2275
2276         /* try to find the block by search corresponding fs tree */
2277         key.objectid = btrfs_header_owner(buf);
2278         key.type = BTRFS_ROOT_ITEM_KEY;
2279         key.offset = (u64)-1;
2280
2281         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2282         if (IS_ERR(ref_root))
2283                 return 1;
2284
2285         level = btrfs_header_level(buf);
2286         if (level == 0)
2287                 btrfs_item_key_to_cpu(buf, &key, 0);
2288         else
2289                 btrfs_node_key_to_cpu(buf, &key, 0);
2290
2291         btrfs_init_path(&path);
2292         path.lowest_level = level + 1;
2293         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2294         if (ret < 0)
2295                 return 0;
2296
2297         parent = path.nodes[level + 1];
2298         if (parent && buf->start == btrfs_node_blockptr(parent,
2299                                                         path.slots[level + 1]))
2300                 found = 1;
2301
2302         btrfs_release_path(&path);
2303         return found ? 0 : 1;
2304 }
2305
2306 static int is_extent_tree_record(struct extent_record *rec)
2307 {
2308         struct list_head *cur = rec->backrefs.next;
2309         struct extent_backref *node;
2310         struct tree_backref *back;
2311         int is_extent = 0;
2312
2313         while(cur != &rec->backrefs) {
2314                 node = list_entry(cur, struct extent_backref, list);
2315                 cur = cur->next;
2316                 if (node->is_data)
2317                         return 0;
2318                 back = (struct tree_backref *)node;
2319                 if (node->full_backref)
2320                         return 0;
2321                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2322                         is_extent = 1;
2323         }
2324         return is_extent;
2325 }
2326
2327
2328 static int record_bad_block_io(struct btrfs_fs_info *info,
2329                                struct cache_tree *extent_cache,
2330                                u64 start, u64 len)
2331 {
2332         struct extent_record *rec;
2333         struct cache_extent *cache;
2334         struct btrfs_key key;
2335
2336         cache = lookup_cache_extent(extent_cache, start, len);
2337         if (!cache)
2338                 return 0;
2339
2340         rec = container_of(cache, struct extent_record, cache);
2341         if (!is_extent_tree_record(rec))
2342                 return 0;
2343
2344         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2345         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2346 }
2347
2348 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2349                        struct extent_buffer *buf, int slot)
2350 {
2351         if (btrfs_header_level(buf)) {
2352                 struct btrfs_key_ptr ptr1, ptr2;
2353
2354                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2355                                    sizeof(struct btrfs_key_ptr));
2356                 read_extent_buffer(buf, &ptr2,
2357                                    btrfs_node_key_ptr_offset(slot + 1),
2358                                    sizeof(struct btrfs_key_ptr));
2359                 write_extent_buffer(buf, &ptr1,
2360                                     btrfs_node_key_ptr_offset(slot + 1),
2361                                     sizeof(struct btrfs_key_ptr));
2362                 write_extent_buffer(buf, &ptr2,
2363                                     btrfs_node_key_ptr_offset(slot),
2364                                     sizeof(struct btrfs_key_ptr));
2365                 if (slot == 0) {
2366                         struct btrfs_disk_key key;
2367                         btrfs_node_key(buf, &key, 0);
2368                         btrfs_fixup_low_keys(root, path, &key,
2369                                              btrfs_header_level(buf) + 1);
2370                 }
2371         } else {
2372                 struct btrfs_item *item1, *item2;
2373                 struct btrfs_key k1, k2;
2374                 char *item1_data, *item2_data;
2375                 u32 item1_offset, item2_offset, item1_size, item2_size;
2376
2377                 item1 = btrfs_item_nr(slot);
2378                 item2 = btrfs_item_nr(slot + 1);
2379                 btrfs_item_key_to_cpu(buf, &k1, slot);
2380                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2381                 item1_offset = btrfs_item_offset(buf, item1);
2382                 item2_offset = btrfs_item_offset(buf, item2);
2383                 item1_size = btrfs_item_size(buf, item1);
2384                 item2_size = btrfs_item_size(buf, item2);
2385
2386                 item1_data = malloc(item1_size);
2387                 if (!item1_data)
2388                         return -ENOMEM;
2389                 item2_data = malloc(item2_size);
2390                 if (!item2_data) {
2391                         free(item1_data);
2392                         return -ENOMEM;
2393                 }
2394
2395                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2396                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2397
2398                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2399                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2400                 free(item1_data);
2401                 free(item2_data);
2402
2403                 btrfs_set_item_offset(buf, item1, item2_offset);
2404                 btrfs_set_item_offset(buf, item2, item1_offset);
2405                 btrfs_set_item_size(buf, item1, item2_size);
2406                 btrfs_set_item_size(buf, item2, item1_size);
2407
2408                 path->slots[0] = slot;
2409                 btrfs_set_item_key_unsafe(root, path, &k2);
2410                 path->slots[0] = slot + 1;
2411                 btrfs_set_item_key_unsafe(root, path, &k1);
2412         }
2413         return 0;
2414 }
2415
2416 /*
2417  * Attempt to fix basic block failures.  Currently we only handle bad key
2418  * orders, we will cycle through the keys and swap them if necessary.
2419  */
2420 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2421                                 struct btrfs_root *root,
2422                                 struct extent_buffer *buf,
2423                                 struct btrfs_disk_key *parent_key,
2424                                 enum btrfs_tree_block_status status)
2425 {
2426         struct btrfs_path *path;
2427         struct btrfs_key k1, k2;
2428         int i;
2429         int level;
2430         int ret;
2431
2432         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2433                 return -EIO;
2434
2435         k1.objectid = btrfs_header_owner(buf);
2436         k1.type = BTRFS_ROOT_ITEM_KEY;
2437         k1.offset = (u64)-1;
2438
2439         root = btrfs_read_fs_root(root->fs_info, &k1);
2440         if (IS_ERR(root))
2441                 return -EIO;
2442
2443         record_root_in_trans(trans, root);
2444
2445         path = btrfs_alloc_path();
2446         if (!path)
2447                 return -EIO;
2448
2449         level = btrfs_header_level(buf);
2450         path->lowest_level = level;
2451         path->skip_check_block = 1;
2452         if (level)
2453                 btrfs_node_key_to_cpu(buf, &k1, 0);
2454         else
2455                 btrfs_item_key_to_cpu(buf, &k1, 0);
2456
2457         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2458         if (ret) {
2459                 btrfs_free_path(path);
2460                 return -EIO;
2461         }
2462
2463         buf = path->nodes[level];
2464         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2465                 if (level) {
2466                         btrfs_node_key_to_cpu(buf, &k1, i);
2467                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2468                 } else {
2469                         btrfs_item_key_to_cpu(buf, &k1, i);
2470                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2471                 }
2472                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2473                         continue;
2474                 ret = swap_values(root, path, buf, i);
2475                 if (ret)
2476                         break;
2477                 btrfs_mark_buffer_dirty(buf);
2478                 i = 0;
2479         }
2480
2481         btrfs_free_path(path);
2482         return ret;
2483 }
2484
2485 static int check_block(struct btrfs_trans_handle *trans,
2486                        struct btrfs_root *root,
2487                        struct cache_tree *extent_cache,
2488                        struct extent_buffer *buf, u64 flags)
2489 {
2490         struct extent_record *rec;
2491         struct cache_extent *cache;
2492         struct btrfs_key key;
2493         enum btrfs_tree_block_status status;
2494         int ret = 0;
2495         int level;
2496
2497         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2498         if (!cache)
2499                 return 1;
2500         rec = container_of(cache, struct extent_record, cache);
2501         rec->generation = btrfs_header_generation(buf);
2502
2503         level = btrfs_header_level(buf);
2504         if (btrfs_header_nritems(buf) > 0) {
2505
2506                 if (level == 0)
2507                         btrfs_item_key_to_cpu(buf, &key, 0);
2508                 else
2509                         btrfs_node_key_to_cpu(buf, &key, 0);
2510
2511                 rec->info_objectid = key.objectid;
2512         }
2513         rec->info_level = level;
2514
2515         if (btrfs_is_leaf(buf))
2516                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2517         else
2518                 status = btrfs_check_node(root, &rec->parent_key, buf);
2519
2520         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2521                 if (repair)
2522                         status = try_to_fix_bad_block(trans, root, buf,
2523                                                       &rec->parent_key,
2524                                                       status);
2525                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2526                         ret = -EIO;
2527                         fprintf(stderr, "bad block %llu\n",
2528                                 (unsigned long long)buf->start);
2529                 } else {
2530                         /*
2531                          * Signal to callers we need to start the scan over
2532                          * again since we'll have cow'ed blocks.
2533                          */
2534                         ret = -EAGAIN;
2535                 }
2536         } else {
2537                 rec->content_checked = 1;
2538                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2539                         rec->owner_ref_checked = 1;
2540                 else {
2541                         ret = check_owner_ref(root, rec, buf);
2542                         if (!ret)
2543                                 rec->owner_ref_checked = 1;
2544                 }
2545         }
2546         if (!ret)
2547                 maybe_free_extent_rec(extent_cache, rec);
2548         return ret;
2549 }
2550
2551 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2552                                                 u64 parent, u64 root)
2553 {
2554         struct list_head *cur = rec->backrefs.next;
2555         struct extent_backref *node;
2556         struct tree_backref *back;
2557
2558         while(cur != &rec->backrefs) {
2559                 node = list_entry(cur, struct extent_backref, list);
2560                 cur = cur->next;
2561                 if (node->is_data)
2562                         continue;
2563                 back = (struct tree_backref *)node;
2564                 if (parent > 0) {
2565                         if (!node->full_backref)
2566                                 continue;
2567                         if (parent == back->parent)
2568                                 return back;
2569                 } else {
2570                         if (node->full_backref)
2571                                 continue;
2572                         if (back->root == root)
2573                                 return back;
2574                 }
2575         }
2576         return NULL;
2577 }
2578
2579 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2580                                                 u64 parent, u64 root)
2581 {
2582         struct tree_backref *ref = malloc(sizeof(*ref));
2583         memset(&ref->node, 0, sizeof(ref->node));
2584         if (parent > 0) {
2585                 ref->parent = parent;
2586                 ref->node.full_backref = 1;
2587         } else {
2588                 ref->root = root;
2589                 ref->node.full_backref = 0;
2590         }
2591         list_add_tail(&ref->node.list, &rec->backrefs);
2592
2593         return ref;
2594 }
2595
2596 static struct data_backref *find_data_backref(struct extent_record *rec,
2597                                                 u64 parent, u64 root,
2598                                                 u64 owner, u64 offset,
2599                                                 int found_ref,
2600                                                 u64 disk_bytenr, u64 bytes)
2601 {
2602         struct list_head *cur = rec->backrefs.next;
2603         struct extent_backref *node;
2604         struct data_backref *back;
2605
2606         while(cur != &rec->backrefs) {
2607                 node = list_entry(cur, struct extent_backref, list);
2608                 cur = cur->next;
2609                 if (!node->is_data)
2610                         continue;
2611                 back = (struct data_backref *)node;
2612                 if (parent > 0) {
2613                         if (!node->full_backref)
2614                                 continue;
2615                         if (parent == back->parent)
2616                                 return back;
2617                 } else {
2618                         if (node->full_backref)
2619                                 continue;
2620                         if (back->root == root && back->owner == owner &&
2621                             back->offset == offset) {
2622                                 if (found_ref && node->found_ref &&
2623                                     (back->bytes != bytes ||
2624                                     back->disk_bytenr != disk_bytenr))
2625                                         continue;
2626                                 return back;
2627                         }
2628                 }
2629         }
2630         return NULL;
2631 }
2632
2633 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2634                                                 u64 parent, u64 root,
2635                                                 u64 owner, u64 offset,
2636                                                 u64 max_size)
2637 {
2638         struct data_backref *ref = malloc(sizeof(*ref));
2639         memset(&ref->node, 0, sizeof(ref->node));
2640         ref->node.is_data = 1;
2641
2642         if (parent > 0) {
2643                 ref->parent = parent;
2644                 ref->owner = 0;
2645                 ref->offset = 0;
2646                 ref->node.full_backref = 1;
2647         } else {
2648                 ref->root = root;
2649                 ref->owner = owner;
2650                 ref->offset = offset;
2651                 ref->node.full_backref = 0;
2652         }
2653         ref->bytes = max_size;
2654         ref->found_ref = 0;
2655         ref->num_refs = 0;
2656         list_add_tail(&ref->node.list, &rec->backrefs);
2657         if (max_size > rec->max_size)
2658                 rec->max_size = max_size;
2659         return ref;
2660 }
2661
2662 static int add_extent_rec(struct cache_tree *extent_cache,
2663                           struct btrfs_key *parent_key, u64 parent_gen,
2664                           u64 start, u64 nr, u64 extent_item_refs,
2665                           int is_root, int inc_ref, int set_checked,
2666                           int metadata, int extent_rec, u64 max_size)
2667 {
2668         struct extent_record *rec;
2669         struct cache_extent *cache;
2670         int ret = 0;
2671         int dup = 0;
2672
2673         cache = lookup_cache_extent(extent_cache, start, nr);
2674         if (cache) {
2675                 rec = container_of(cache, struct extent_record, cache);
2676                 if (inc_ref)
2677                         rec->refs++;
2678                 if (rec->nr == 1)
2679                         rec->nr = max(nr, max_size);
2680
2681                 /*
2682                  * We need to make sure to reset nr to whatever the extent
2683                  * record says was the real size, this way we can compare it to
2684                  * the backrefs.
2685                  */
2686                 if (extent_rec) {
2687                         if (start != rec->start || rec->found_rec) {
2688                                 struct extent_record *tmp;
2689
2690                                 dup = 1;
2691                                 if (list_empty(&rec->list))
2692                                         list_add_tail(&rec->list,
2693                                                       &duplicate_extents);
2694
2695                                 /*
2696                                  * We have to do this song and dance in case we
2697                                  * find an extent record that falls inside of
2698                                  * our current extent record but does not have
2699                                  * the same objectid.
2700                                  */
2701                                 tmp = malloc(sizeof(*tmp));
2702                                 if (!tmp)
2703                                         return -ENOMEM;
2704                                 tmp->start = start;
2705                                 tmp->max_size = max_size;
2706                                 tmp->nr = nr;
2707                                 tmp->found_rec = 1;
2708                                 tmp->metadata = metadata;
2709                                 tmp->extent_item_refs = extent_item_refs;
2710                                 INIT_LIST_HEAD(&tmp->list);
2711                                 list_add_tail(&tmp->list, &rec->dups);
2712                                 rec->num_duplicates++;
2713                         } else {
2714                                 rec->nr = nr;
2715                                 rec->found_rec = 1;
2716                         }
2717                 }
2718
2719                 if (extent_item_refs && !dup) {
2720                         if (rec->extent_item_refs) {
2721                                 fprintf(stderr, "block %llu rec "
2722                                         "extent_item_refs %llu, passed %llu\n",
2723                                         (unsigned long long)start,
2724                                         (unsigned long long)
2725                                                         rec->extent_item_refs,
2726                                         (unsigned long long)extent_item_refs);
2727                         }
2728                         rec->extent_item_refs = extent_item_refs;
2729                 }
2730                 if (is_root)
2731                         rec->is_root = 1;
2732                 if (set_checked) {
2733                         rec->content_checked = 1;
2734                         rec->owner_ref_checked = 1;
2735                 }
2736
2737                 if (parent_key)
2738                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2739                 if (parent_gen)
2740                         rec->parent_generation = parent_gen;
2741
2742                 if (rec->max_size < max_size)
2743                         rec->max_size = max_size;
2744
2745                 maybe_free_extent_rec(extent_cache, rec);
2746                 return ret;
2747         }
2748         rec = malloc(sizeof(*rec));
2749         rec->start = start;
2750         rec->max_size = max_size;
2751         rec->nr = max(nr, max_size);
2752         rec->found_rec = !!extent_rec;
2753         rec->content_checked = 0;
2754         rec->owner_ref_checked = 0;
2755         rec->num_duplicates = 0;
2756         rec->metadata = metadata;
2757         INIT_LIST_HEAD(&rec->backrefs);
2758         INIT_LIST_HEAD(&rec->dups);
2759         INIT_LIST_HEAD(&rec->list);
2760
2761         if (is_root)
2762                 rec->is_root = 1;
2763         else
2764                 rec->is_root = 0;
2765
2766         if (inc_ref)
2767                 rec->refs = 1;
2768         else
2769                 rec->refs = 0;
2770
2771         if (extent_item_refs)
2772                 rec->extent_item_refs = extent_item_refs;
2773         else
2774                 rec->extent_item_refs = 0;
2775
2776         if (parent_key)
2777                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2778         else
2779                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2780
2781         if (parent_gen)
2782                 rec->parent_generation = parent_gen;
2783         else
2784                 rec->parent_generation = 0;
2785
2786         rec->cache.start = start;
2787         rec->cache.size = nr;
2788         ret = insert_cache_extent(extent_cache, &rec->cache);
2789         BUG_ON(ret);
2790         bytes_used += nr;
2791         if (set_checked) {
2792                 rec->content_checked = 1;
2793                 rec->owner_ref_checked = 1;
2794         }
2795         return ret;
2796 }
2797
2798 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2799                             u64 parent, u64 root, int found_ref)
2800 {
2801         struct extent_record *rec;
2802         struct tree_backref *back;
2803         struct cache_extent *cache;
2804
2805         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2806         if (!cache) {
2807                 add_extent_rec(extent_cache, NULL, 0, bytenr,
2808                                1, 0, 0, 0, 0, 1, 0, 0);
2809                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2810                 if (!cache)
2811                         abort();
2812         }
2813
2814         rec = container_of(cache, struct extent_record, cache);
2815         if (rec->start != bytenr) {
2816                 abort();
2817         }
2818
2819         back = find_tree_backref(rec, parent, root);
2820         if (!back)
2821                 back = alloc_tree_backref(rec, parent, root);
2822
2823         if (found_ref) {
2824                 if (back->node.found_ref) {
2825                         fprintf(stderr, "Extent back ref already exists "
2826                                 "for %llu parent %llu root %llu \n",
2827                                 (unsigned long long)bytenr,
2828                                 (unsigned long long)parent,
2829                                 (unsigned long long)root);
2830                 }
2831                 back->node.found_ref = 1;
2832         } else {
2833                 if (back->node.found_extent_tree) {
2834                         fprintf(stderr, "Extent back ref already exists "
2835                                 "for %llu parent %llu root %llu \n",
2836                                 (unsigned long long)bytenr,
2837                                 (unsigned long long)parent,
2838                                 (unsigned long long)root);
2839                 }
2840                 back->node.found_extent_tree = 1;
2841         }
2842         maybe_free_extent_rec(extent_cache, rec);
2843         return 0;
2844 }
2845
2846 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2847                             u64 parent, u64 root, u64 owner, u64 offset,
2848                             u32 num_refs, int found_ref, u64 max_size)
2849 {
2850         struct extent_record *rec;
2851         struct data_backref *back;
2852         struct cache_extent *cache;
2853
2854         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2855         if (!cache) {
2856                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
2857                                0, 0, max_size);
2858                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2859                 if (!cache)
2860                         abort();
2861         }
2862
2863         rec = container_of(cache, struct extent_record, cache);
2864         if (rec->max_size < max_size)
2865                 rec->max_size = max_size;
2866
2867         /*
2868          * If found_ref is set then max_size is the real size and must match the
2869          * existing refs.  So if we have already found a ref then we need to
2870          * make sure that this ref matches the existing one, otherwise we need
2871          * to add a new backref so we can notice that the backrefs don't match
2872          * and we need to figure out who is telling the truth.  This is to
2873          * account for that awful fsync bug I introduced where we'd end up with
2874          * a btrfs_file_extent_item that would have its length include multiple
2875          * prealloc extents or point inside of a prealloc extent.
2876          */
2877         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2878                                  bytenr, max_size);
2879         if (!back)
2880                 back = alloc_data_backref(rec, parent, root, owner, offset,
2881                                           max_size);
2882
2883         if (found_ref) {
2884                 BUG_ON(num_refs != 1);
2885                 if (back->node.found_ref)
2886                         BUG_ON(back->bytes != max_size);
2887                 back->node.found_ref = 1;
2888                 back->found_ref += 1;
2889                 back->bytes = max_size;
2890                 back->disk_bytenr = bytenr;
2891                 rec->refs += 1;
2892                 rec->content_checked = 1;
2893                 rec->owner_ref_checked = 1;
2894         } else {
2895                 if (back->node.found_extent_tree) {
2896                         fprintf(stderr, "Extent back ref already exists "
2897                                 "for %llu parent %llu root %llu "
2898                                 "owner %llu offset %llu num_refs %lu\n",
2899                                 (unsigned long long)bytenr,
2900                                 (unsigned long long)parent,
2901                                 (unsigned long long)root,
2902                                 (unsigned long long)owner,
2903                                 (unsigned long long)offset,
2904                                 (unsigned long)num_refs);
2905                 }
2906                 back->num_refs = num_refs;
2907                 back->node.found_extent_tree = 1;
2908         }
2909         maybe_free_extent_rec(extent_cache, rec);
2910         return 0;
2911 }
2912
2913 static int add_pending(struct cache_tree *pending,
2914                        struct cache_tree *seen, u64 bytenr, u32 size)
2915 {
2916         int ret;
2917         ret = add_cache_extent(seen, bytenr, size);
2918         if (ret)
2919                 return ret;
2920         add_cache_extent(pending, bytenr, size);
2921         return 0;
2922 }
2923
2924 static int pick_next_pending(struct cache_tree *pending,
2925                         struct cache_tree *reada,
2926                         struct cache_tree *nodes,
2927                         u64 last, struct block_info *bits, int bits_nr,
2928                         int *reada_bits)
2929 {
2930         unsigned long node_start = last;
2931         struct cache_extent *cache;
2932         int ret;
2933
2934         cache = search_cache_extent(reada, 0);
2935         if (cache) {
2936                 bits[0].start = cache->start;
2937                 bits[0].size = cache->size;
2938                 *reada_bits = 1;
2939                 return 1;
2940         }
2941         *reada_bits = 0;
2942         if (node_start > 32768)
2943                 node_start -= 32768;
2944
2945         cache = search_cache_extent(nodes, node_start);
2946         if (!cache)
2947                 cache = search_cache_extent(nodes, 0);
2948
2949         if (!cache) {
2950                  cache = search_cache_extent(pending, 0);
2951                  if (!cache)
2952                          return 0;
2953                  ret = 0;
2954                  do {
2955                          bits[ret].start = cache->start;
2956                          bits[ret].size = cache->size;
2957                          cache = next_cache_extent(cache);
2958                          ret++;
2959                  } while (cache && ret < bits_nr);
2960                  return ret;
2961         }
2962
2963         ret = 0;
2964         do {
2965                 bits[ret].start = cache->start;
2966                 bits[ret].size = cache->size;
2967                 cache = next_cache_extent(cache);
2968                 ret++;
2969         } while (cache && ret < bits_nr);
2970
2971         if (bits_nr - ret > 8) {
2972                 u64 lookup = bits[0].start + bits[0].size;
2973                 struct cache_extent *next;
2974                 next = search_cache_extent(pending, lookup);
2975                 while(next) {
2976                         if (next->start - lookup > 32768)
2977                                 break;
2978                         bits[ret].start = next->start;
2979                         bits[ret].size = next->size;
2980                         lookup = next->start + next->size;
2981                         ret++;
2982                         if (ret == bits_nr)
2983                                 break;
2984                         next = next_cache_extent(next);
2985                         if (!next)
2986                                 break;
2987                 }
2988         }
2989         return ret;
2990 }
2991
2992 static void free_chunk_record(struct cache_extent *cache)
2993 {
2994         struct chunk_record *rec;
2995
2996         rec = container_of(cache, struct chunk_record, cache);
2997         free(rec);
2998 }
2999
3000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
3001 {
3002         cache_tree_free_extents(chunk_cache, free_chunk_record);
3003 }
3004
3005 static void free_device_record(struct rb_node *node)
3006 {
3007         struct device_record *rec;
3008
3009         rec = container_of(node, struct device_record, node);
3010         free(rec);
3011 }
3012
3013 FREE_RB_BASED_TREE(device_cache, free_device_record);
3014
3015 int insert_block_group_record(struct block_group_tree *tree,
3016                               struct block_group_record *bg_rec)
3017 {
3018         int ret;
3019
3020         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3021         if (ret)
3022                 return ret;
3023
3024         list_add_tail(&bg_rec->list, &tree->block_groups);
3025         return 0;
3026 }
3027
3028 static void free_block_group_record(struct cache_extent *cache)
3029 {
3030         struct block_group_record *rec;
3031
3032         rec = container_of(cache, struct block_group_record, cache);
3033         free(rec);
3034 }
3035
3036 void free_block_group_tree(struct block_group_tree *tree)
3037 {
3038         cache_tree_free_extents(&tree->tree, free_block_group_record);
3039 }
3040
3041 int insert_device_extent_record(struct device_extent_tree *tree,
3042                                 struct device_extent_record *de_rec)
3043 {
3044         int ret;
3045
3046         /*
3047          * Device extent is a bit different from the other extents, because
3048          * the extents which belong to the different devices may have the
3049          * same start and size, so we need use the special extent cache
3050          * search/insert functions.
3051          */
3052         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3053         if (ret)
3054                 return ret;
3055
3056         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3057         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3058         return 0;
3059 }
3060
3061 static void free_device_extent_record(struct cache_extent *cache)
3062 {
3063         struct device_extent_record *rec;
3064
3065         rec = container_of(cache, struct device_extent_record, cache);
3066         free(rec);
3067 }
3068
3069 void free_device_extent_tree(struct device_extent_tree *tree)
3070 {
3071         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3072 }
3073
3074 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3075 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3076                                  struct extent_buffer *leaf, int slot)
3077 {
3078         struct btrfs_extent_ref_v0 *ref0;
3079         struct btrfs_key key;
3080
3081         btrfs_item_key_to_cpu(leaf, &key, slot);
3082         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3083         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3084                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3085         } else {
3086                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3087                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3088         }
3089         return 0;
3090 }
3091 #endif
3092
3093 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3094                                             struct btrfs_key *key,
3095                                             int slot)
3096 {
3097         struct btrfs_chunk *ptr;
3098         struct chunk_record *rec;
3099         int num_stripes, i;
3100
3101         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3102         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3103
3104         rec = malloc(btrfs_chunk_record_size(num_stripes));
3105         if (!rec) {
3106                 fprintf(stderr, "memory allocation failed\n");
3107                 exit(-1);
3108         }
3109
3110         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3111
3112         INIT_LIST_HEAD(&rec->list);
3113         INIT_LIST_HEAD(&rec->dextents);
3114         rec->bg_rec = NULL;
3115
3116         rec->cache.start = key->offset;
3117         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3118
3119         rec->generation = btrfs_header_generation(leaf);
3120
3121         rec->objectid = key->objectid;
3122         rec->type = key->type;
3123         rec->offset = key->offset;
3124
3125         rec->length = rec->cache.size;
3126         rec->owner = btrfs_chunk_owner(leaf, ptr);
3127         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3128         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3129         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3130         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3131         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3132         rec->num_stripes = num_stripes;
3133         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3134
3135         for (i = 0; i < rec->num_stripes; ++i) {
3136                 rec->stripes[i].devid =
3137                         btrfs_stripe_devid_nr(leaf, ptr, i);
3138                 rec->stripes[i].offset =
3139                         btrfs_stripe_offset_nr(leaf, ptr, i);
3140                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3141                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3142                                 BTRFS_UUID_SIZE);
3143         }
3144
3145         return rec;
3146 }
3147
3148 static int process_chunk_item(struct cache_tree *chunk_cache,
3149                               struct btrfs_key *key, struct extent_buffer *eb,
3150                               int slot)
3151 {
3152         struct chunk_record *rec;
3153         int ret = 0;
3154
3155         rec = btrfs_new_chunk_record(eb, key, slot);
3156         ret = insert_cache_extent(chunk_cache, &rec->cache);
3157         if (ret) {
3158                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3159                         rec->offset, rec->length);
3160                 free(rec);
3161         }
3162
3163         return ret;
3164 }
3165
3166 static int process_device_item(struct rb_root *dev_cache,
3167                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3168 {
3169         struct btrfs_dev_item *ptr;
3170         struct device_record *rec;
3171         int ret = 0;
3172
3173         ptr = btrfs_item_ptr(eb,
3174                 slot, struct btrfs_dev_item);
3175
3176         rec = malloc(sizeof(*rec));
3177         if (!rec) {
3178                 fprintf(stderr, "memory allocation failed\n");
3179                 return -ENOMEM;
3180         }
3181
3182         rec->devid = key->offset;
3183         rec->generation = btrfs_header_generation(eb);
3184
3185         rec->objectid = key->objectid;
3186         rec->type = key->type;
3187         rec->offset = key->offset;
3188
3189         rec->devid = btrfs_device_id(eb, ptr);
3190         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3191         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3192
3193         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3194         if (ret) {
3195                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3196                 free(rec);
3197         }
3198
3199         return ret;
3200 }
3201
3202 struct block_group_record *
3203 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3204                              int slot)
3205 {
3206         struct btrfs_block_group_item *ptr;
3207         struct block_group_record *rec;
3208
3209         rec = malloc(sizeof(*rec));
3210         if (!rec) {
3211                 fprintf(stderr, "memory allocation failed\n");
3212                 exit(-1);
3213         }
3214         memset(rec, 0, sizeof(*rec));
3215
3216         rec->cache.start = key->objectid;
3217         rec->cache.size = key->offset;
3218
3219         rec->generation = btrfs_header_generation(leaf);
3220
3221         rec->objectid = key->objectid;
3222         rec->type = key->type;
3223         rec->offset = key->offset;
3224
3225         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3226         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3227
3228         INIT_LIST_HEAD(&rec->list);
3229
3230         return rec;
3231 }
3232
3233 static int process_block_group_item(struct block_group_tree *block_group_cache,
3234                                     struct btrfs_key *key,
3235                                     struct extent_buffer *eb, int slot)
3236 {
3237         struct block_group_record *rec;
3238         int ret = 0;
3239
3240         rec = btrfs_new_block_group_record(eb, key, slot);
3241         ret = insert_block_group_record(block_group_cache, rec);
3242         if (ret) {
3243                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3244                         rec->objectid, rec->offset);
3245                 free(rec);
3246         }
3247
3248         return ret;
3249 }
3250
3251 struct device_extent_record *
3252 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3253                                struct btrfs_key *key, int slot)
3254 {
3255         struct device_extent_record *rec;
3256         struct btrfs_dev_extent *ptr;
3257
3258         rec = malloc(sizeof(*rec));
3259         if (!rec) {
3260                 fprintf(stderr, "memory allocation failed\n");
3261                 exit(-1);
3262         }
3263         memset(rec, 0, sizeof(*rec));
3264
3265         rec->cache.objectid = key->objectid;
3266         rec->cache.start = key->offset;
3267
3268         rec->generation = btrfs_header_generation(leaf);
3269
3270         rec->objectid = key->objectid;
3271         rec->type = key->type;
3272         rec->offset = key->offset;
3273
3274         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3275         rec->chunk_objecteid =
3276                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3277         rec->chunk_offset =
3278                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3279         rec->length = btrfs_dev_extent_length(leaf, ptr);
3280         rec->cache.size = rec->length;
3281
3282         INIT_LIST_HEAD(&rec->chunk_list);
3283         INIT_LIST_HEAD(&rec->device_list);
3284
3285         return rec;
3286 }
3287
3288 static int
3289 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3290                            struct btrfs_key *key, struct extent_buffer *eb,
3291                            int slot)
3292 {
3293         struct device_extent_record *rec;
3294         int ret;
3295
3296         rec = btrfs_new_device_extent_record(eb, key, slot);
3297         ret = insert_device_extent_record(dev_extent_cache, rec);
3298         if (ret) {
3299                 fprintf(stderr,
3300                         "Device extent[%llu, %llu, %llu] existed.\n",
3301                         rec->objectid, rec->offset, rec->length);
3302                 free(rec);
3303         }
3304
3305         return ret;
3306 }
3307
3308 static int process_extent_item(struct btrfs_root *root,
3309                                struct cache_tree *extent_cache,
3310                                struct extent_buffer *eb, int slot)
3311 {
3312         struct btrfs_extent_item *ei;
3313         struct btrfs_extent_inline_ref *iref;
3314         struct btrfs_extent_data_ref *dref;
3315         struct btrfs_shared_data_ref *sref;
3316         struct btrfs_key key;
3317         unsigned long end;
3318         unsigned long ptr;
3319         int type;
3320         u32 item_size = btrfs_item_size_nr(eb, slot);
3321         u64 refs = 0;
3322         u64 offset;
3323         u64 num_bytes;
3324         int metadata = 0;
3325
3326         btrfs_item_key_to_cpu(eb, &key, slot);
3327
3328         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3329                 metadata = 1;
3330                 num_bytes = root->leafsize;
3331         } else {
3332                 num_bytes = key.offset;
3333         }
3334
3335         if (item_size < sizeof(*ei)) {
3336 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3337                 struct btrfs_extent_item_v0 *ei0;
3338                 BUG_ON(item_size != sizeof(*ei0));
3339                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3340                 refs = btrfs_extent_refs_v0(eb, ei0);
3341 #else
3342                 BUG();
3343 #endif
3344                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3345                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3346                                       num_bytes);
3347         }
3348
3349         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3350         refs = btrfs_extent_refs(eb, ei);
3351
3352         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3353                        refs, 0, 0, 0, metadata, 1, num_bytes);
3354
3355         ptr = (unsigned long)(ei + 1);
3356         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3357             key.type == BTRFS_EXTENT_ITEM_KEY)
3358                 ptr += sizeof(struct btrfs_tree_block_info);
3359
3360         end = (unsigned long)ei + item_size;
3361         while (ptr < end) {
3362                 iref = (struct btrfs_extent_inline_ref *)ptr;
3363                 type = btrfs_extent_inline_ref_type(eb, iref);
3364                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3365                 switch (type) {
3366                 case BTRFS_TREE_BLOCK_REF_KEY:
3367                         add_tree_backref(extent_cache, key.objectid,
3368                                          0, offset, 0);
3369                         break;
3370                 case BTRFS_SHARED_BLOCK_REF_KEY:
3371                         add_tree_backref(extent_cache, key.objectid,
3372                                          offset, 0, 0);
3373                         break;
3374                 case BTRFS_EXTENT_DATA_REF_KEY:
3375                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3376                         add_data_backref(extent_cache, key.objectid, 0,
3377                                         btrfs_extent_data_ref_root(eb, dref),
3378                                         btrfs_extent_data_ref_objectid(eb,
3379                                                                        dref),
3380                                         btrfs_extent_data_ref_offset(eb, dref),
3381                                         btrfs_extent_data_ref_count(eb, dref),
3382                                         0, num_bytes);
3383                         break;
3384                 case BTRFS_SHARED_DATA_REF_KEY:
3385                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3386                         add_data_backref(extent_cache, key.objectid, offset,
3387                                         0, 0, 0,
3388                                         btrfs_shared_data_ref_count(eb, sref),
3389                                         0, num_bytes);
3390                         break;
3391                 default:
3392                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3393                                 key.objectid, key.type, num_bytes);
3394                         goto out;
3395                 }
3396                 ptr += btrfs_extent_inline_ref_size(type);
3397         }
3398         WARN_ON(ptr > end);
3399 out:
3400         return 0;
3401 }
3402
3403 static int check_cache_range(struct btrfs_root *root,
3404                              struct btrfs_block_group_cache *cache,
3405                              u64 offset, u64 bytes)
3406 {
3407         struct btrfs_free_space *entry;
3408         u64 *logical;
3409         u64 bytenr;
3410         int stripe_len;
3411         int i, nr, ret;
3412
3413         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3414                 bytenr = btrfs_sb_offset(i);
3415                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3416                                        cache->key.objectid, bytenr, 0,
3417                                        &logical, &nr, &stripe_len);
3418                 if (ret)
3419                         return ret;
3420
3421                 while (nr--) {
3422                         if (logical[nr] + stripe_len <= offset)
3423                                 continue;
3424                         if (offset + bytes <= logical[nr])
3425                                 continue;
3426                         if (logical[nr] == offset) {
3427                                 if (stripe_len >= bytes) {
3428                                         kfree(logical);
3429                                         return 0;
3430                                 }
3431                                 bytes -= stripe_len;
3432                                 offset += stripe_len;
3433                         } else if (logical[nr] < offset) {
3434                                 if (logical[nr] + stripe_len >=
3435                                     offset + bytes) {
3436                                         kfree(logical);
3437                                         return 0;
3438                                 }
3439                                 bytes = (offset + bytes) -
3440                                         (logical[nr] + stripe_len);
3441                                 offset = logical[nr] + stripe_len;
3442                         } else {
3443                                 /*
3444                                  * Could be tricky, the super may land in the
3445                                  * middle of the area we're checking.  First
3446                                  * check the easiest case, it's at the end.
3447                                  */
3448                                 if (logical[nr] + stripe_len >=
3449                                     bytes + offset) {
3450                                         bytes = logical[nr] - offset;
3451                                         continue;
3452                                 }
3453
3454                                 /* Check the left side */
3455                                 ret = check_cache_range(root, cache,
3456                                                         offset,
3457                                                         logical[nr] - offset);
3458                                 if (ret) {
3459                                         kfree(logical);
3460                                         return ret;
3461                                 }
3462
3463                                 /* Now we continue with the right side */
3464                                 bytes = (offset + bytes) -
3465                                         (logical[nr] + stripe_len);
3466                                 offset = logical[nr] + stripe_len;
3467                         }
3468                 }
3469
3470                 kfree(logical);
3471         }
3472
3473         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3474         if (!entry) {
3475                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3476                         offset, offset+bytes);
3477                 return -EINVAL;
3478         }
3479
3480         if (entry->offset != offset) {
3481                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3482                         entry->offset);
3483                 return -EINVAL;
3484         }
3485
3486         if (entry->bytes != bytes) {
3487                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3488                         bytes, entry->bytes, offset);
3489                 return -EINVAL;
3490         }
3491
3492         unlink_free_space(cache->free_space_ctl, entry);
3493         free(entry);
3494         return 0;
3495 }
3496
3497 static int verify_space_cache(struct btrfs_root *root,
3498                               struct btrfs_block_group_cache *cache)
3499 {
3500         struct btrfs_path *path;
3501         struct extent_buffer *leaf;
3502         struct btrfs_key key;
3503         u64 last;
3504         int ret = 0;
3505
3506         path = btrfs_alloc_path();
3507         if (!path)
3508                 return -ENOMEM;
3509
3510         root = root->fs_info->extent_root;
3511
3512         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3513
3514         key.objectid = last;
3515         key.offset = 0;
3516         key.type = BTRFS_EXTENT_ITEM_KEY;
3517
3518         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3519         if (ret < 0)
3520                 goto out;
3521         ret = 0;
3522         while (1) {
3523                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3524                         ret = btrfs_next_leaf(root, path);
3525                         if (ret < 0)
3526                                 goto out;
3527                         if (ret > 0) {
3528                                 ret = 0;
3529                                 break;
3530                         }
3531                 }
3532                 leaf = path->nodes[0];
3533                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3534                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3535                         break;
3536                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3537                     key.type != BTRFS_METADATA_ITEM_KEY) {
3538                         path->slots[0]++;
3539                         continue;
3540                 }
3541
3542                 if (last == key.objectid) {
3543                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3544                                 last = key.objectid + key.offset;
3545                         else
3546                                 last = key.objectid + root->leafsize;
3547                         path->slots[0]++;
3548                         continue;
3549                 }
3550
3551                 ret = check_cache_range(root, cache, last,
3552                                         key.objectid - last);
3553                 if (ret)
3554                         break;
3555                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3556                         last = key.objectid + key.offset;
3557                 else
3558                         last = key.objectid + root->leafsize;
3559                 path->slots[0]++;
3560         }
3561
3562         if (last < cache->key.objectid + cache->key.offset)
3563                 ret = check_cache_range(root, cache, last,
3564                                         cache->key.objectid +
3565                                         cache->key.offset - last);
3566
3567 out:
3568         btrfs_free_path(path);
3569
3570         if (!ret &&
3571             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3572                 fprintf(stderr, "There are still entries left in the space "
3573                         "cache\n");
3574                 ret = -EINVAL;
3575         }
3576
3577         return ret;
3578 }
3579
3580 static int check_space_cache(struct btrfs_root *root)
3581 {
3582         struct btrfs_block_group_cache *cache;
3583         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3584         int ret;
3585         int error = 0;
3586
3587         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3588             btrfs_super_generation(root->fs_info->super_copy) !=
3589             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3590                 printf("cache and super generation don't match, space cache "
3591                        "will be invalidated\n");
3592                 return 0;
3593         }
3594
3595         while (1) {
3596                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3597                 if (!cache)
3598                         break;
3599
3600                 start = cache->key.objectid + cache->key.offset;
3601                 if (!cache->free_space_ctl) {
3602                         if (btrfs_init_free_space_ctl(cache,
3603                                                       root->sectorsize)) {
3604                                 ret = -ENOMEM;
3605                                 break;
3606                         }
3607                 } else {
3608                         btrfs_remove_free_space_cache(cache);
3609                 }
3610
3611                 ret = load_free_space_cache(root->fs_info, cache);
3612                 if (!ret)
3613                         continue;
3614
3615                 ret = verify_space_cache(root, cache);
3616                 if (ret) {
3617                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3618                                 cache->key.objectid);
3619                         error++;
3620                 }
3621         }
3622
3623         return error ? -EINVAL : 0;
3624 }
3625
3626 static int read_extent_data(struct btrfs_root *root, char *data,
3627                         u64 logical, u64 *len, int mirror)
3628 {
3629         u64 offset = 0;
3630         struct btrfs_multi_bio *multi = NULL;
3631         struct btrfs_fs_info *info = root->fs_info;
3632         struct btrfs_device *device;
3633         int ret = 0;
3634         u64 max_len = *len;
3635
3636         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
3637                               &multi, mirror, NULL);
3638         if (ret) {
3639                 fprintf(stderr, "Couldn't map the block %llu\n",
3640                                 logical + offset);
3641                 goto err;
3642         }
3643         device = multi->stripes[0].dev;
3644
3645         if (device->fd == 0)
3646                 goto err;
3647         if (*len > max_len)
3648                 *len = max_len;
3649
3650         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
3651         if (ret != *len)
3652                 ret = -EIO;
3653         else
3654                 ret = 0;
3655 err:
3656         kfree(multi);
3657         return ret;
3658 }
3659
3660 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
3661                         u64 num_bytes, unsigned long leaf_offset,
3662                         struct extent_buffer *eb) {
3663
3664         u64 offset = 0;
3665         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3666         char *data;
3667         unsigned long csum_offset;
3668         u32 csum;
3669         u32 csum_expected;
3670         u64 read_len;
3671         u64 data_checked = 0;
3672         u64 tmp;
3673         int ret = 0;
3674         int mirror;
3675         int num_copies;
3676
3677         if (num_bytes % root->sectorsize)
3678                 return -EINVAL;
3679
3680         data = malloc(num_bytes);
3681         if (!data)
3682                 return -ENOMEM;
3683
3684         while (offset < num_bytes) {
3685                 mirror = 0;
3686 again:
3687                 read_len = num_bytes - offset;
3688                 /* read as much space once a time */
3689                 ret = read_extent_data(root, data + offset,
3690                                 bytenr + offset, &read_len, mirror);
3691                 if (ret)
3692                         goto out;
3693                 data_checked = 0;
3694                 /* verify every 4k data's checksum */
3695                 while (data_checked < read_len) {
3696                         csum = ~(u32)0;
3697                         tmp = offset + data_checked;
3698
3699                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
3700                                                csum, root->sectorsize);
3701                         btrfs_csum_final(csum, (char *)&csum);
3702
3703                         csum_offset = leaf_offset +
3704                                  tmp / root->sectorsize * csum_size;
3705                         read_extent_buffer(eb, (char *)&csum_expected,
3706                                            csum_offset, csum_size);
3707                         /* try another mirror */
3708                         if (csum != csum_expected) {
3709                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
3710                                                 mirror, bytenr + tmp,
3711                                                 csum, csum_expected);
3712                                 num_copies = btrfs_num_copies(
3713                                                 &root->fs_info->mapping_tree,
3714                                                 bytenr, num_bytes);
3715                                 if (mirror < num_copies - 1) {
3716                                         mirror += 1;
3717                                         goto again;
3718                                 }
3719                         }
3720                         data_checked += root->sectorsize;
3721                 }
3722                 offset += read_len;
3723         }
3724 out:
3725         free(data);
3726         return ret;
3727 }
3728
3729 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3730                                u64 num_bytes)
3731 {
3732         struct btrfs_path *path;
3733         struct extent_buffer *leaf;
3734         struct btrfs_key key;
3735         int ret;
3736
3737         path = btrfs_alloc_path();
3738         if (!path) {
3739                 fprintf(stderr, "Error allocing path\n");
3740                 return -ENOMEM;
3741         }
3742
3743         key.objectid = bytenr;
3744         key.type = BTRFS_EXTENT_ITEM_KEY;
3745         key.offset = 0;
3746
3747
3748 again:
3749         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3750                                 0, 0);
3751         if (ret < 0) {
3752                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3753                 btrfs_free_path(path);
3754                 return ret;
3755         } else if (ret) {
3756                 if (path->slots[0])
3757                         path->slots[0]--;
3758                 else
3759                         btrfs_prev_leaf(root, path);
3760         }
3761
3762         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3763
3764         /*
3765          * Block group items come before extent items if they have the same
3766          * bytenr, so walk back one more just in case.  Dear future traveler,
3767          * first congrats on mastering time travel.  Now if it's not too much
3768          * trouble could you go back to 2006 and tell Chris to make the
3769          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
3770          */
3771         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3772                 if (path->slots[0])
3773                         path->slots[0]--;
3774                 else
3775                         btrfs_prev_leaf(root, path);
3776         }
3777
3778         while (num_bytes) {
3779                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3780                         ret = btrfs_next_leaf(root, path);
3781                         if (ret < 0) {
3782                                 fprintf(stderr, "Error going to next leaf "
3783                                         "%d\n", ret);
3784                                 btrfs_free_path(path);
3785                                 return ret;
3786                         } else if (ret) {
3787                                 break;
3788                         }
3789                 }
3790                 leaf = path->nodes[0];
3791                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3792                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3793                         path->slots[0]++;
3794                         continue;
3795                 }
3796                 if (key.objectid + key.offset < bytenr) {
3797                         path->slots[0]++;
3798                         continue;
3799                 }
3800                 if (key.objectid > bytenr + num_bytes)
3801                         break;
3802
3803                 if (key.objectid == bytenr) {
3804                         if (key.offset >= num_bytes) {
3805                                 num_bytes = 0;
3806                                 break;
3807                         }
3808                         num_bytes -= key.offset;
3809                         bytenr += key.offset;
3810                 } else if (key.objectid < bytenr) {
3811                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3812                                 num_bytes = 0;
3813                                 break;
3814                         }
3815                         num_bytes = (bytenr + num_bytes) -
3816                                 (key.objectid + key.offset);
3817                         bytenr = key.objectid + key.offset;
3818                 } else {
3819                         if (key.objectid + key.offset < bytenr + num_bytes) {
3820                                 u64 new_start = key.objectid + key.offset;
3821                                 u64 new_bytes = bytenr + num_bytes - new_start;
3822
3823                                 /*
3824                                  * Weird case, the extent is in the middle of
3825                                  * our range, we'll have to search one side
3826                                  * and then the other.  Not sure if this happens
3827                                  * in real life, but no harm in coding it up
3828                                  * anyway just in case.
3829                                  */
3830                                 btrfs_release_path(path);
3831                                 ret = check_extent_exists(root, new_start,
3832                                                           new_bytes);
3833                                 if (ret) {
3834                                         fprintf(stderr, "Right section didn't "
3835                                                 "have a record\n");
3836                                         break;
3837                                 }
3838                                 num_bytes = key.objectid - bytenr;
3839                                 goto again;
3840                         }
3841                         num_bytes = key.objectid - bytenr;
3842                 }
3843                 path->slots[0]++;
3844         }
3845         ret = 0;
3846
3847         if (num_bytes) {
3848                 fprintf(stderr, "There are no extents for csum range "
3849                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3850                 ret = 1;
3851         }
3852
3853         btrfs_free_path(path);
3854         return ret;
3855 }
3856
3857 static int check_csums(struct btrfs_root *root)
3858 {
3859         struct btrfs_path *path;
3860         struct extent_buffer *leaf;
3861         struct btrfs_key key;
3862         u64 offset = 0, num_bytes = 0;
3863         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3864         int errors = 0;
3865         int ret;
3866         u64 data_len;
3867         unsigned long leaf_offset;
3868
3869         root = root->fs_info->csum_root;
3870
3871         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3872         key.type = BTRFS_EXTENT_CSUM_KEY;
3873         key.offset = 0;
3874
3875         path = btrfs_alloc_path();
3876         if (!path)
3877                 return -ENOMEM;
3878
3879         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3880         if (ret < 0) {
3881                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3882                 btrfs_free_path(path);
3883                 return ret;
3884         }
3885
3886         if (ret > 0 && path->slots[0])
3887                 path->slots[0]--;
3888         ret = 0;
3889
3890         while (1) {
3891                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3892                         ret = btrfs_next_leaf(root, path);
3893                         if (ret < 0) {
3894                                 fprintf(stderr, "Error going to next leaf "
3895                                         "%d\n", ret);
3896                                 break;
3897                         }
3898                         if (ret)
3899                                 break;
3900                 }
3901                 leaf = path->nodes[0];
3902
3903                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3904                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3905                         path->slots[0]++;
3906                         continue;
3907                 }
3908
3909                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
3910                               csum_size) * root->sectorsize;
3911                 if (!check_data_csum)
3912                         goto skip_csum_check;
3913                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
3914                 ret = check_extent_csums(root, key.offset, data_len,
3915                                          leaf_offset, leaf);
3916                 if (ret)
3917                         break;
3918 skip_csum_check:
3919                 if (!num_bytes) {
3920                         offset = key.offset;
3921                 } else if (key.offset != offset + num_bytes) {
3922                         ret = check_extent_exists(root, offset, num_bytes);
3923                         if (ret) {
3924                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3925                                         "there is no extent record\n",
3926                                         offset, offset+num_bytes);
3927                                 errors++;
3928                         }
3929                         offset = key.offset;
3930                         num_bytes = 0;
3931                 }
3932                 num_bytes += data_len;
3933                 path->slots[0]++;
3934         }
3935
3936         btrfs_free_path(path);
3937         return errors;
3938 }
3939
3940 static int is_dropped_key(struct btrfs_key *key,
3941                           struct btrfs_key *drop_key) {
3942         if (key->objectid < drop_key->objectid)
3943                 return 1;
3944         else if (key->objectid == drop_key->objectid) {
3945                 if (key->type < drop_key->type)
3946                         return 1;
3947                 else if (key->type == drop_key->type) {
3948                         if (key->offset < drop_key->offset)
3949                                 return 1;
3950                 }
3951         }
3952         return 0;
3953 }
3954
3955 static int run_next_block(struct btrfs_trans_handle *trans,
3956                           struct btrfs_root *root,
3957                           struct block_info *bits,
3958                           int bits_nr,
3959                           u64 *last,
3960                           struct cache_tree *pending,
3961                           struct cache_tree *seen,
3962                           struct cache_tree *reada,
3963                           struct cache_tree *nodes,
3964                           struct cache_tree *extent_cache,
3965                           struct cache_tree *chunk_cache,
3966                           struct rb_root *dev_cache,
3967                           struct block_group_tree *block_group_cache,
3968                           struct device_extent_tree *dev_extent_cache,
3969                           struct btrfs_root_item *ri)
3970 {
3971         struct extent_buffer *buf;
3972         u64 bytenr;
3973         u32 size;
3974         u64 parent;
3975         u64 owner;
3976         u64 flags;
3977         u64 ptr;
3978         u64 gen = 0;
3979         int ret = 0;
3980         int i;
3981         int nritems;
3982         struct btrfs_key key;
3983         struct cache_extent *cache;
3984         int reada_bits;
3985
3986         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3987                                     bits_nr, &reada_bits);
3988         if (nritems == 0)
3989                 return 1;
3990
3991         if (!reada_bits) {
3992                 for(i = 0; i < nritems; i++) {
3993                         ret = add_cache_extent(reada, bits[i].start,
3994                                                bits[i].size);
3995                         if (ret == -EEXIST)
3996                                 continue;
3997
3998                         /* fixme, get the parent transid */
3999                         readahead_tree_block(root, bits[i].start,
4000                                              bits[i].size, 0);
4001                 }
4002         }
4003         *last = bits[0].start;
4004         bytenr = bits[0].start;
4005         size = bits[0].size;
4006
4007         cache = lookup_cache_extent(pending, bytenr, size);
4008         if (cache) {
4009                 remove_cache_extent(pending, cache);
4010                 free(cache);
4011         }
4012         cache = lookup_cache_extent(reada, bytenr, size);
4013         if (cache) {
4014                 remove_cache_extent(reada, cache);
4015                 free(cache);
4016         }
4017         cache = lookup_cache_extent(nodes, bytenr, size);
4018         if (cache) {
4019                 remove_cache_extent(nodes, cache);
4020                 free(cache);
4021         }
4022         cache = lookup_cache_extent(extent_cache, bytenr, size);
4023         if (cache) {
4024                 struct extent_record *rec;
4025
4026                 rec = container_of(cache, struct extent_record, cache);
4027                 gen = rec->parent_generation;
4028         }
4029
4030         /* fixme, get the real parent transid */
4031         buf = read_tree_block(root, bytenr, size, gen);
4032         if (!extent_buffer_uptodate(buf)) {
4033                 record_bad_block_io(root->fs_info,
4034                                     extent_cache, bytenr, size);
4035                 goto out;
4036         }
4037
4038         nritems = btrfs_header_nritems(buf);
4039
4040         /*
4041          * FIXME, this only works only if we don't have any full
4042          * backref mode.
4043          */
4044         if (!init_extent_tree) {
4045                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4046                                        btrfs_header_level(buf), 1, NULL,
4047                                        &flags);
4048                 if (ret < 0)
4049                         flags = 0;
4050         } else {
4051                 flags = 0;
4052         }
4053
4054         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4055                 parent = bytenr;
4056                 owner = 0;
4057         } else {
4058                 parent = 0;
4059                 owner = btrfs_header_owner(buf);
4060         }
4061
4062         ret = check_block(trans, root, extent_cache, buf, flags);
4063         if (ret)
4064                 goto out;
4065
4066         if (btrfs_is_leaf(buf)) {
4067                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4068                 for (i = 0; i < nritems; i++) {
4069                         struct btrfs_file_extent_item *fi;
4070                         btrfs_item_key_to_cpu(buf, &key, i);
4071                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4072                                 process_extent_item(root, extent_cache, buf,
4073                                                     i);
4074                                 continue;
4075                         }
4076                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4077                                 process_extent_item(root, extent_cache, buf,
4078                                                     i);
4079                                 continue;
4080                         }
4081                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4082                                 total_csum_bytes +=
4083                                         btrfs_item_size_nr(buf, i);
4084                                 continue;
4085                         }
4086                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4087                                 process_chunk_item(chunk_cache, &key, buf, i);
4088                                 continue;
4089                         }
4090                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4091                                 process_device_item(dev_cache, &key, buf, i);
4092                                 continue;
4093                         }
4094                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4095                                 process_block_group_item(block_group_cache,
4096                                         &key, buf, i);
4097                                 continue;
4098                         }
4099                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4100                                 process_device_extent_item(dev_extent_cache,
4101                                         &key, buf, i);
4102                                 continue;
4103
4104                         }
4105                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4106 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4107                                 process_extent_ref_v0(extent_cache, buf, i);
4108 #else
4109                                 BUG();
4110 #endif
4111                                 continue;
4112                         }
4113
4114                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4115                                 add_tree_backref(extent_cache, key.objectid, 0,
4116                                                  key.offset, 0);
4117                                 continue;
4118                         }
4119                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4120                                 add_tree_backref(extent_cache, key.objectid,
4121                                                  key.offset, 0, 0);
4122                                 continue;
4123                         }
4124                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4125                                 struct btrfs_extent_data_ref *ref;
4126                                 ref = btrfs_item_ptr(buf, i,
4127                                                 struct btrfs_extent_data_ref);
4128                                 add_data_backref(extent_cache,
4129                                         key.objectid, 0,
4130                                         btrfs_extent_data_ref_root(buf, ref),
4131                                         btrfs_extent_data_ref_objectid(buf,
4132                                                                        ref),
4133                                         btrfs_extent_data_ref_offset(buf, ref),
4134                                         btrfs_extent_data_ref_count(buf, ref),
4135                                         0, root->sectorsize);
4136                                 continue;
4137                         }
4138                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4139                                 struct btrfs_shared_data_ref *ref;
4140                                 ref = btrfs_item_ptr(buf, i,
4141                                                 struct btrfs_shared_data_ref);
4142                                 add_data_backref(extent_cache,
4143                                         key.objectid, key.offset, 0, 0, 0,
4144                                         btrfs_shared_data_ref_count(buf, ref),
4145                                         0, root->sectorsize);
4146                                 continue;
4147                         }
4148                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4149                                 struct bad_item *bad;
4150
4151                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4152                                         continue;
4153                                 if (!owner)
4154                                         continue;
4155                                 bad = malloc(sizeof(struct bad_item));
4156                                 if (!bad)
4157                                         continue;
4158                                 INIT_LIST_HEAD(&bad->list);
4159                                 memcpy(&bad->key, &key,
4160                                        sizeof(struct btrfs_key));
4161                                 bad->root_id = owner;
4162                                 list_add_tail(&bad->list, &delete_items);
4163                                 continue;
4164                         }
4165                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4166                                 continue;
4167                         fi = btrfs_item_ptr(buf, i,
4168                                             struct btrfs_file_extent_item);
4169                         if (btrfs_file_extent_type(buf, fi) ==
4170                             BTRFS_FILE_EXTENT_INLINE)
4171                                 continue;
4172                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4173                                 continue;
4174
4175                         data_bytes_allocated +=
4176                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4177                         if (data_bytes_allocated < root->sectorsize) {
4178                                 abort();
4179                         }
4180                         data_bytes_referenced +=
4181                                 btrfs_file_extent_num_bytes(buf, fi);
4182                         add_data_backref(extent_cache,
4183                                 btrfs_file_extent_disk_bytenr(buf, fi),
4184                                 parent, owner, key.objectid, key.offset -
4185                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4186                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4187                 }
4188         } else {
4189                 int level;
4190                 struct btrfs_key first_key;
4191
4192                 first_key.objectid = 0;
4193
4194                 if (nritems > 0)
4195                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4196                 level = btrfs_header_level(buf);
4197                 for (i = 0; i < nritems; i++) {
4198                         ptr = btrfs_node_blockptr(buf, i);
4199                         size = btrfs_level_size(root, level - 1);
4200                         btrfs_node_key_to_cpu(buf, &key, i);
4201                         if (ri != NULL) {
4202                                 struct btrfs_key drop_key;
4203                                 btrfs_disk_key_to_cpu(&drop_key,
4204                                                       &ri->drop_progress);
4205                                 if ((level == ri->drop_level)
4206                                     && is_dropped_key(&key, &drop_key)) {
4207                                         continue;
4208                                 }
4209                         }
4210                         ret = add_extent_rec(extent_cache, &key,
4211                                              btrfs_node_ptr_generation(buf, i),
4212                                              ptr, size, 0, 0, 1, 0, 1, 0,
4213                                              size);
4214                         BUG_ON(ret);
4215
4216                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4217
4218                         if (level > 1) {
4219                                 add_pending(nodes, seen, ptr, size);
4220                         } else {
4221                                 add_pending(pending, seen, ptr, size);
4222                         }
4223                 }
4224                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4225                                       nritems) * sizeof(struct btrfs_key_ptr);
4226         }
4227         total_btree_bytes += buf->len;
4228         if (fs_root_objectid(btrfs_header_owner(buf)))
4229                 total_fs_tree_bytes += buf->len;
4230         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4231                 total_extent_tree_bytes += buf->len;
4232         if (!found_old_backref &&
4233             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4234             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4235             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4236                 found_old_backref = 1;
4237 out:
4238         free_extent_buffer(buf);
4239         return ret;
4240 }
4241
4242 static int add_root_to_pending(struct extent_buffer *buf,
4243                                struct cache_tree *extent_cache,
4244                                struct cache_tree *pending,
4245                                struct cache_tree *seen,
4246                                struct cache_tree *nodes,
4247                                struct btrfs_key *root_key)
4248 {
4249         if (btrfs_header_level(buf) > 0)
4250                 add_pending(nodes, seen, buf->start, buf->len);
4251         else
4252                 add_pending(pending, seen, buf->start, buf->len);
4253         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4254                        0, 1, 1, 0, 1, 0, buf->len);
4255
4256         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4257             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4258                 add_tree_backref(extent_cache, buf->start, buf->start,
4259                                  0, 1);
4260         else
4261                 add_tree_backref(extent_cache, buf->start, 0,
4262                                  root_key->objectid, 1);
4263         return 0;
4264 }
4265
4266 /* as we fix the tree, we might be deleting blocks that
4267  * we're tracking for repair.  This hook makes sure we
4268  * remove any backrefs for blocks as we are fixing them.
4269  */
4270 static int free_extent_hook(struct btrfs_trans_handle *trans,
4271                             struct btrfs_root *root,
4272                             u64 bytenr, u64 num_bytes, u64 parent,
4273                             u64 root_objectid, u64 owner, u64 offset,
4274                             int refs_to_drop)
4275 {
4276         struct extent_record *rec;
4277         struct cache_extent *cache;
4278         int is_data;
4279         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4280
4281         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4282         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4283         if (!cache)
4284                 return 0;
4285
4286         rec = container_of(cache, struct extent_record, cache);
4287         if (is_data) {
4288                 struct data_backref *back;
4289                 back = find_data_backref(rec, parent, root_objectid, owner,
4290                                          offset, 1, bytenr, num_bytes);
4291                 if (!back)
4292                         goto out;
4293                 if (back->node.found_ref) {
4294                         back->found_ref -= refs_to_drop;
4295                         if (rec->refs)
4296                                 rec->refs -= refs_to_drop;
4297                 }
4298                 if (back->node.found_extent_tree) {
4299                         back->num_refs -= refs_to_drop;
4300                         if (rec->extent_item_refs)
4301                                 rec->extent_item_refs -= refs_to_drop;
4302                 }
4303                 if (back->found_ref == 0)
4304                         back->node.found_ref = 0;
4305                 if (back->num_refs == 0)
4306                         back->node.found_extent_tree = 0;
4307
4308                 if (!back->node.found_extent_tree && back->node.found_ref) {
4309                         list_del(&back->node.list);
4310                         free(back);
4311                 }
4312         } else {
4313                 struct tree_backref *back;
4314                 back = find_tree_backref(rec, parent, root_objectid);
4315                 if (!back)
4316                         goto out;
4317                 if (back->node.found_ref) {
4318                         if (rec->refs)
4319                                 rec->refs--;
4320                         back->node.found_ref = 0;
4321                 }
4322                 if (back->node.found_extent_tree) {
4323                         if (rec->extent_item_refs)
4324                                 rec->extent_item_refs--;
4325                         back->node.found_extent_tree = 0;
4326                 }
4327                 if (!back->node.found_extent_tree && back->node.found_ref) {
4328                         list_del(&back->node.list);
4329                         free(back);
4330                 }
4331         }
4332         maybe_free_extent_rec(extent_cache, rec);
4333 out:
4334         return 0;
4335 }
4336
4337 static int delete_extent_records(struct btrfs_trans_handle *trans,
4338                                  struct btrfs_root *root,
4339                                  struct btrfs_path *path,
4340                                  u64 bytenr, u64 new_len)
4341 {
4342         struct btrfs_key key;
4343         struct btrfs_key found_key;
4344         struct extent_buffer *leaf;
4345         int ret;
4346         int slot;
4347
4348
4349         key.objectid = bytenr;
4350         key.type = (u8)-1;
4351         key.offset = (u64)-1;
4352
4353         while(1) {
4354                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4355                                         &key, path, 0, 1);
4356                 if (ret < 0)
4357                         break;
4358
4359                 if (ret > 0) {
4360                         ret = 0;
4361                         if (path->slots[0] == 0)
4362                                 break;
4363                         path->slots[0]--;
4364                 }
4365                 ret = 0;
4366
4367                 leaf = path->nodes[0];
4368                 slot = path->slots[0];
4369
4370                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4371                 if (found_key.objectid != bytenr)
4372                         break;
4373
4374                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4375                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4376                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4377                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4378                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4379                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4380                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4381                         btrfs_release_path(path);
4382                         if (found_key.type == 0) {
4383                                 if (found_key.offset == 0)
4384                                         break;
4385                                 key.offset = found_key.offset - 1;
4386                                 key.type = found_key.type;
4387                         }
4388                         key.type = found_key.type - 1;
4389                         key.offset = (u64)-1;
4390                         continue;
4391                 }
4392
4393                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4394                         found_key.objectid, found_key.type, found_key.offset);
4395
4396                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4397                 if (ret)
4398                         break;
4399                 btrfs_release_path(path);
4400
4401                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4402                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4403                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4404                                 found_key.offset : root->leafsize;
4405
4406                         ret = btrfs_update_block_group(trans, root, bytenr,
4407                                                        bytes, 0, 0);
4408                         if (ret)
4409                                 break;
4410                 }
4411         }
4412
4413         btrfs_release_path(path);
4414         return ret;
4415 }
4416
4417 /*
4418  * for a single backref, this will allocate a new extent
4419  * and add the backref to it.
4420  */
4421 static int record_extent(struct btrfs_trans_handle *trans,
4422                          struct btrfs_fs_info *info,
4423                          struct btrfs_path *path,
4424                          struct extent_record *rec,
4425                          struct extent_backref *back,
4426                          int allocated, u64 flags)
4427 {
4428         int ret;
4429         struct btrfs_root *extent_root = info->extent_root;
4430         struct extent_buffer *leaf;
4431         struct btrfs_key ins_key;
4432         struct btrfs_extent_item *ei;
4433         struct tree_backref *tback;
4434         struct data_backref *dback;
4435         struct btrfs_tree_block_info *bi;
4436
4437         if (!back->is_data)
4438                 rec->max_size = max_t(u64, rec->max_size,
4439                                     info->extent_root->leafsize);
4440
4441         if (!allocated) {
4442                 u32 item_size = sizeof(*ei);
4443
4444                 if (!back->is_data)
4445                         item_size += sizeof(*bi);
4446
4447                 ins_key.objectid = rec->start;
4448                 ins_key.offset = rec->max_size;
4449                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4450
4451                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4452                                         &ins_key, item_size);
4453                 if (ret)
4454                         goto fail;
4455
4456                 leaf = path->nodes[0];
4457                 ei = btrfs_item_ptr(leaf, path->slots[0],
4458                                     struct btrfs_extent_item);
4459
4460                 btrfs_set_extent_refs(leaf, ei, 0);
4461                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4462
4463                 if (back->is_data) {
4464                         btrfs_set_extent_flags(leaf, ei,
4465                                                BTRFS_EXTENT_FLAG_DATA);
4466                 } else {
4467                         struct btrfs_disk_key copy_key;;
4468
4469                         tback = (struct tree_backref *)back;
4470                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4471                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4472                                              sizeof(*bi));
4473
4474                         btrfs_set_disk_key_objectid(&copy_key,
4475                                                     rec->info_objectid);
4476                         btrfs_set_disk_key_type(&copy_key, 0);
4477                         btrfs_set_disk_key_offset(&copy_key, 0);
4478
4479                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4480                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4481
4482                         btrfs_set_extent_flags(leaf, ei,
4483                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4484                 }
4485
4486                 btrfs_mark_buffer_dirty(leaf);
4487                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4488                                                rec->max_size, 1, 0);
4489                 if (ret)
4490                         goto fail;
4491                 btrfs_release_path(path);
4492         }
4493
4494         if (back->is_data) {
4495                 u64 parent;
4496                 int i;
4497
4498                 dback = (struct data_backref *)back;
4499                 if (back->full_backref)
4500                         parent = dback->parent;
4501                 else
4502                         parent = 0;
4503
4504                 for (i = 0; i < dback->found_ref; i++) {
4505                         /* if parent != 0, we're doing a full backref
4506                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4507                          * just makes the backref allocator create a data
4508                          * backref
4509                          */
4510                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4511                                                    rec->start, rec->max_size,
4512                                                    parent,
4513                                                    dback->root,
4514                                                    parent ?
4515                                                    BTRFS_FIRST_FREE_OBJECTID :
4516                                                    dback->owner,
4517                                                    dback->offset);
4518                         if (ret)
4519                                 break;
4520                 }
4521                 fprintf(stderr, "adding new data backref"
4522                                 " on %llu %s %llu owner %llu"
4523                                 " offset %llu found %d\n",
4524                                 (unsigned long long)rec->start,
4525                                 back->full_backref ?
4526                                 "parent" : "root",
4527                                 back->full_backref ?
4528                                 (unsigned long long)parent :
4529                                 (unsigned long long)dback->root,
4530                                 (unsigned long long)dback->owner,
4531                                 (unsigned long long)dback->offset,
4532                                 dback->found_ref);
4533         } else {
4534                 u64 parent;
4535
4536                 tback = (struct tree_backref *)back;
4537                 if (back->full_backref)
4538                         parent = tback->parent;
4539                 else
4540                         parent = 0;
4541
4542                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4543                                            rec->start, rec->max_size,
4544                                            parent, tback->root, 0, 0);
4545                 fprintf(stderr, "adding new tree backref on "
4546                         "start %llu len %llu parent %llu root %llu\n",
4547                         rec->start, rec->max_size, tback->parent, tback->root);
4548         }
4549         if (ret)
4550                 goto fail;
4551 fail:
4552         btrfs_release_path(path);
4553         return ret;
4554 }
4555
4556 struct extent_entry {
4557         u64 bytenr;
4558         u64 bytes;
4559         int count;
4560         int broken;
4561         struct list_head list;
4562 };
4563
4564 static struct extent_entry *find_entry(struct list_head *entries,
4565                                        u64 bytenr, u64 bytes)
4566 {
4567         struct extent_entry *entry = NULL;
4568
4569         list_for_each_entry(entry, entries, list) {
4570                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4571                         return entry;
4572         }
4573
4574         return NULL;
4575 }
4576
4577 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4578 {
4579         struct extent_entry *entry, *best = NULL, *prev = NULL;
4580
4581         list_for_each_entry(entry, entries, list) {
4582                 if (!prev) {
4583                         prev = entry;
4584                         continue;
4585                 }
4586
4587                 /*
4588                  * If there are as many broken entries as entries then we know
4589                  * not to trust this particular entry.
4590                  */
4591                 if (entry->broken == entry->count)
4592                         continue;
4593
4594                 /*
4595                  * If our current entry == best then we can't be sure our best
4596                  * is really the best, so we need to keep searching.
4597                  */
4598                 if (best && best->count == entry->count) {
4599                         prev = entry;
4600                         best = NULL;
4601                         continue;
4602                 }
4603
4604                 /* Prev == entry, not good enough, have to keep searching */
4605                 if (!prev->broken && prev->count == entry->count)
4606                         continue;
4607
4608                 if (!best)
4609                         best = (prev->count > entry->count) ? prev : entry;
4610                 else if (best->count < entry->count)
4611                         best = entry;
4612                 prev = entry;
4613         }
4614
4615         return best;
4616 }
4617
4618 static int repair_ref(struct btrfs_trans_handle *trans,
4619                       struct btrfs_fs_info *info, struct btrfs_path *path,
4620                       struct data_backref *dback, struct extent_entry *entry)
4621 {
4622         struct btrfs_root *root;
4623         struct btrfs_file_extent_item *fi;
4624         struct extent_buffer *leaf;
4625         struct btrfs_key key;
4626         u64 bytenr, bytes;
4627         int ret;
4628
4629         key.objectid = dback->root;
4630         key.type = BTRFS_ROOT_ITEM_KEY;
4631         key.offset = (u64)-1;
4632         root = btrfs_read_fs_root(info, &key);
4633         if (IS_ERR(root)) {
4634                 fprintf(stderr, "Couldn't find root for our ref\n");
4635                 return -EINVAL;
4636         }
4637
4638         /*
4639          * The backref points to the original offset of the extent if it was
4640          * split, so we need to search down to the offset we have and then walk
4641          * forward until we find the backref we're looking for.
4642          */
4643         key.objectid = dback->owner;
4644         key.type = BTRFS_EXTENT_DATA_KEY;
4645         key.offset = dback->offset;
4646         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4647         if (ret < 0) {
4648                 fprintf(stderr, "Error looking up ref %d\n", ret);
4649                 return ret;
4650         }
4651
4652         while (1) {
4653                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4654                         ret = btrfs_next_leaf(root, path);
4655                         if (ret) {
4656                                 fprintf(stderr, "Couldn't find our ref, next\n");
4657                                 return -EINVAL;
4658                         }
4659                 }
4660                 leaf = path->nodes[0];
4661                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4662                 if (key.objectid != dback->owner ||
4663                     key.type != BTRFS_EXTENT_DATA_KEY) {
4664                         fprintf(stderr, "Couldn't find our ref, search\n");
4665                         return -EINVAL;
4666                 }
4667                 fi = btrfs_item_ptr(leaf, path->slots[0],
4668                                     struct btrfs_file_extent_item);
4669                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4670                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4671
4672                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4673                         break;
4674                 path->slots[0]++;
4675         }
4676
4677         btrfs_release_path(path);
4678
4679         /*
4680          * Have to make sure that this root gets updated when we commit the
4681          * transaction
4682          */
4683         record_root_in_trans(trans, root);
4684
4685         /*
4686          * Ok we have the key of the file extent we want to fix, now we can cow
4687          * down to the thing and fix it.
4688          */
4689         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4690         if (ret < 0) {
4691                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4692                         key.objectid, key.type, key.offset, ret);
4693                 return ret;
4694         }
4695         if (ret > 0) {
4696                 fprintf(stderr, "Well that's odd, we just found this key "
4697                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4698                         key.offset);
4699                 return -EINVAL;
4700         }
4701         leaf = path->nodes[0];
4702         fi = btrfs_item_ptr(leaf, path->slots[0],
4703                             struct btrfs_file_extent_item);
4704
4705         if (btrfs_file_extent_compression(leaf, fi) &&
4706             dback->disk_bytenr != entry->bytenr) {
4707                 fprintf(stderr, "Ref doesn't match the record start and is "
4708                         "compressed, please take a btrfs-image of this file "
4709                         "system and send it to a btrfs developer so they can "
4710                         "complete this functionality for bytenr %Lu\n",
4711                         dback->disk_bytenr);
4712                 return -EINVAL;
4713         }
4714
4715         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4716                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4717         } else if (dback->disk_bytenr > entry->bytenr) {
4718                 u64 off_diff, offset;
4719
4720                 off_diff = dback->disk_bytenr - entry->bytenr;
4721                 offset = btrfs_file_extent_offset(leaf, fi);
4722                 if (dback->disk_bytenr + offset +
4723                     btrfs_file_extent_num_bytes(leaf, fi) >
4724                     entry->bytenr + entry->bytes) {
4725                         fprintf(stderr, "Ref is past the entry end, please "
4726                                 "take a btrfs-image of this file system and "
4727                                 "send it to a btrfs developer, ref %Lu\n",
4728                                 dback->disk_bytenr);
4729                         return -EINVAL;
4730                 }
4731                 offset += off_diff;
4732                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4733                 btrfs_set_file_extent_offset(leaf, fi, offset);
4734         } else if (dback->disk_bytenr < entry->bytenr) {
4735                 u64 offset;
4736
4737                 offset = btrfs_file_extent_offset(leaf, fi);
4738                 if (dback->disk_bytenr + offset < entry->bytenr) {
4739                         fprintf(stderr, "Ref is before the entry start, please"
4740                                 " take a btrfs-image of this file system and "
4741                                 "send it to a btrfs developer, ref %Lu\n",
4742                                 dback->disk_bytenr);
4743                         return -EINVAL;
4744                 }
4745
4746                 offset += dback->disk_bytenr;
4747                 offset -= entry->bytenr;
4748                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4749                 btrfs_set_file_extent_offset(leaf, fi, offset);
4750         }
4751
4752         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4753
4754         /*
4755          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4756          * only do this if we aren't using compression, otherwise it's a
4757          * trickier case.
4758          */
4759         if (!btrfs_file_extent_compression(leaf, fi))
4760                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4761         else
4762                 printf("ram bytes may be wrong?\n");
4763         btrfs_mark_buffer_dirty(leaf);
4764         btrfs_release_path(path);
4765         return 0;
4766 }
4767
4768 static int verify_backrefs(struct btrfs_trans_handle *trans,
4769                            struct btrfs_fs_info *info, struct btrfs_path *path,
4770                            struct extent_record *rec)
4771 {
4772         struct extent_backref *back;
4773         struct data_backref *dback;
4774         struct extent_entry *entry, *best = NULL;
4775         LIST_HEAD(entries);
4776         int nr_entries = 0;
4777         int broken_entries = 0;
4778         int ret = 0;
4779         short mismatch = 0;
4780
4781         /*
4782          * Metadata is easy and the backrefs should always agree on bytenr and
4783          * size, if not we've got bigger issues.
4784          */
4785         if (rec->metadata)
4786                 return 0;
4787
4788         list_for_each_entry(back, &rec->backrefs, list) {
4789                 dback = (struct data_backref *)back;
4790                 /*
4791                  * We only pay attention to backrefs that we found a real
4792                  * backref for.
4793                  */
4794                 if (dback->found_ref == 0)
4795                         continue;
4796                 if (back->full_backref)
4797                         continue;
4798
4799                 /*
4800                  * For now we only catch when the bytes don't match, not the
4801                  * bytenr.  We can easily do this at the same time, but I want
4802                  * to have a fs image to test on before we just add repair
4803                  * functionality willy-nilly so we know we won't screw up the
4804                  * repair.
4805                  */
4806
4807                 entry = find_entry(&entries, dback->disk_bytenr,
4808                                    dback->bytes);
4809                 if (!entry) {
4810                         entry = malloc(sizeof(struct extent_entry));
4811                         if (!entry) {
4812                                 ret = -ENOMEM;
4813                                 goto out;
4814                         }
4815                         memset(entry, 0, sizeof(*entry));
4816                         entry->bytenr = dback->disk_bytenr;
4817                         entry->bytes = dback->bytes;
4818                         list_add_tail(&entry->list, &entries);
4819                         nr_entries++;
4820                 }
4821
4822                 /*
4823                  * If we only have on entry we may think the entries agree when
4824                  * in reality they don't so we have to do some extra checking.
4825                  */
4826                 if (dback->disk_bytenr != rec->start ||
4827                     dback->bytes != rec->nr || back->broken)
4828                         mismatch = 1;
4829
4830                 if (back->broken) {
4831                         entry->broken++;
4832                         broken_entries++;
4833                 }
4834
4835                 entry->count++;
4836         }
4837
4838         /* Yay all the backrefs agree, carry on good sir */
4839         if (nr_entries <= 1 && !mismatch)
4840                 goto out;
4841
4842         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4843                 "%Lu\n", rec->start);
4844
4845         /*
4846          * First we want to see if the backrefs can agree amongst themselves who
4847          * is right, so figure out which one of the entries has the highest
4848          * count.
4849          */
4850         best = find_most_right_entry(&entries);
4851
4852         /*
4853          * Ok so we may have an even split between what the backrefs think, so
4854          * this is where we use the extent ref to see what it thinks.
4855          */
4856         if (!best) {
4857                 entry = find_entry(&entries, rec->start, rec->nr);
4858                 if (!entry && (!broken_entries || !rec->found_rec)) {
4859                         fprintf(stderr, "Backrefs don't agree with each other "
4860                                 "and extent record doesn't agree with anybody,"
4861                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4862                                 rec->start, rec->nr);
4863                         ret = -EINVAL;
4864                         goto out;
4865                 } else if (!entry) {
4866                         /*
4867                          * Ok our backrefs were broken, we'll assume this is the
4868                          * correct value and add an entry for this range.
4869                          */
4870                         entry = malloc(sizeof(struct extent_entry));
4871                         if (!entry) {
4872                                 ret = -ENOMEM;
4873                                 goto out;
4874                         }
4875                         memset(entry, 0, sizeof(*entry));
4876                         entry->bytenr = rec->start;
4877                         entry->bytes = rec->nr;
4878                         list_add_tail(&entry->list, &entries);
4879                         nr_entries++;
4880                 }
4881                 entry->count++;
4882                 best = find_most_right_entry(&entries);
4883                 if (!best) {
4884                         fprintf(stderr, "Backrefs and extent record evenly "
4885                                 "split on who is right, this is going to "
4886                                 "require user input to fix bytenr %Lu bytes "
4887                                 "%Lu\n", rec->start, rec->nr);
4888                         ret = -EINVAL;
4889                         goto out;
4890                 }
4891         }
4892
4893         /*
4894          * I don't think this can happen currently as we'll abort() if we catch
4895          * this case higher up, but in case somebody removes that we still can't
4896          * deal with it properly here yet, so just bail out of that's the case.
4897          */
4898         if (best->bytenr != rec->start) {
4899                 fprintf(stderr, "Extent start and backref starts don't match, "
4900                         "please use btrfs-image on this file system and send "
4901                         "it to a btrfs developer so they can make fsck fix "
4902                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
4903                         rec->start, rec->nr);
4904                 ret = -EINVAL;
4905                 goto out;
4906         }
4907
4908         /*
4909          * Ok great we all agreed on an extent record, let's go find the real
4910          * references and fix up the ones that don't match.
4911          */
4912         list_for_each_entry(back, &rec->backrefs, list) {
4913                 dback = (struct data_backref *)back;
4914
4915                 /*
4916                  * Still ignoring backrefs that don't have a real ref attached
4917                  * to them.
4918                  */
4919                 if (dback->found_ref == 0)
4920                         continue;
4921                 if (back->full_backref)
4922                         continue;
4923
4924                 if (dback->bytes == best->bytes &&
4925                     dback->disk_bytenr == best->bytenr)
4926                         continue;
4927
4928                 ret = repair_ref(trans, info, path, dback, best);
4929                 if (ret)
4930                         goto out;
4931         }
4932
4933         /*
4934          * Ok we messed with the actual refs, which means we need to drop our
4935          * entire cache and go back and rescan.  I know this is a huge pain and
4936          * adds a lot of extra work, but it's the only way to be safe.  Once all
4937          * the backrefs agree we may not need to do anything to the extent
4938          * record itself.
4939          */
4940         ret = -EAGAIN;
4941 out:
4942         while (!list_empty(&entries)) {
4943                 entry = list_entry(entries.next, struct extent_entry, list);
4944                 list_del_init(&entry->list);
4945                 free(entry);
4946         }
4947         return ret;
4948 }
4949
4950 static int process_duplicates(struct btrfs_root *root,
4951                               struct cache_tree *extent_cache,
4952                               struct extent_record *rec)
4953 {
4954         struct extent_record *good, *tmp;
4955         struct cache_extent *cache;
4956         int ret;
4957
4958         /*
4959          * If we found a extent record for this extent then return, or if we
4960          * have more than one duplicate we are likely going to need to delete
4961          * something.
4962          */
4963         if (rec->found_rec || rec->num_duplicates > 1)
4964                 return 0;
4965
4966         /* Shouldn't happen but just in case */
4967         BUG_ON(!rec->num_duplicates);
4968
4969         /*
4970          * So this happens if we end up with a backref that doesn't match the
4971          * actual extent entry.  So either the backref is bad or the extent
4972          * entry is bad.  Either way we want to have the extent_record actually
4973          * reflect what we found in the extent_tree, so we need to take the
4974          * duplicate out and use that as the extent_record since the only way we
4975          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4976          */
4977         remove_cache_extent(extent_cache, &rec->cache);
4978
4979         good = list_entry(rec->dups.next, struct extent_record, list);
4980         list_del_init(&good->list);
4981         INIT_LIST_HEAD(&good->backrefs);
4982         INIT_LIST_HEAD(&good->dups);
4983         good->cache.start = good->start;
4984         good->cache.size = good->nr;
4985         good->content_checked = 0;
4986         good->owner_ref_checked = 0;
4987         good->num_duplicates = 0;
4988         good->refs = rec->refs;
4989         list_splice_init(&rec->backrefs, &good->backrefs);
4990         while (1) {
4991                 cache = lookup_cache_extent(extent_cache, good->start,
4992                                             good->nr);
4993                 if (!cache)
4994                         break;
4995                 tmp = container_of(cache, struct extent_record, cache);
4996
4997                 /*
4998                  * If we find another overlapping extent and it's found_rec is
4999                  * set then it's a duplicate and we need to try and delete
5000                  * something.
5001                  */
5002                 if (tmp->found_rec || tmp->num_duplicates > 0) {
5003                         if (list_empty(&good->list))
5004                                 list_add_tail(&good->list,
5005                                               &duplicate_extents);
5006                         good->num_duplicates += tmp->num_duplicates + 1;
5007                         list_splice_init(&tmp->dups, &good->dups);
5008                         list_del_init(&tmp->list);
5009                         list_add_tail(&tmp->list, &good->dups);
5010                         remove_cache_extent(extent_cache, &tmp->cache);
5011                         continue;
5012                 }
5013
5014                 /*
5015                  * Ok we have another non extent item backed extent rec, so lets
5016                  * just add it to this extent and carry on like we did above.
5017                  */
5018                 good->refs += tmp->refs;
5019                 list_splice_init(&tmp->backrefs, &good->backrefs);
5020                 remove_cache_extent(extent_cache, &tmp->cache);
5021                 free(tmp);
5022         }
5023         ret = insert_cache_extent(extent_cache, &good->cache);
5024         BUG_ON(ret);
5025         free(rec);
5026         return good->num_duplicates ? 0 : 1;
5027 }
5028
5029 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5030                                     struct btrfs_root *root,
5031                                     struct extent_record *rec)
5032 {
5033         LIST_HEAD(delete_list);
5034         struct btrfs_path *path;
5035         struct extent_record *tmp, *good, *n;
5036         int nr_del = 0;
5037         int ret = 0;
5038         struct btrfs_key key;
5039
5040         path = btrfs_alloc_path();
5041         if (!path) {
5042                 ret = -ENOMEM;
5043                 goto out;
5044         }
5045
5046         good = rec;
5047         /* Find the record that covers all of the duplicates. */
5048         list_for_each_entry(tmp, &rec->dups, list) {
5049                 if (good->start < tmp->start)
5050                         continue;
5051                 if (good->nr > tmp->nr)
5052                         continue;
5053
5054                 if (tmp->start + tmp->nr < good->start + good->nr) {
5055                         fprintf(stderr, "Ok we have overlapping extents that "
5056                                 "aren't completely covered by eachother, this "
5057                                 "is going to require more careful thought.  "
5058                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5059                                 tmp->start, tmp->nr, good->start, good->nr);
5060                         abort();
5061                 }
5062                 good = tmp;
5063         }
5064
5065         if (good != rec)
5066                 list_add_tail(&rec->list, &delete_list);
5067
5068         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5069                 if (tmp == good)
5070                         continue;
5071                 list_move_tail(&tmp->list, &delete_list);
5072         }
5073
5074         root = root->fs_info->extent_root;
5075         list_for_each_entry(tmp, &delete_list, list) {
5076                 if (tmp->found_rec == 0)
5077                         continue;
5078                 key.objectid = tmp->start;
5079                 key.type = BTRFS_EXTENT_ITEM_KEY;
5080                 key.offset = tmp->nr;
5081
5082                 /* Shouldn't happen but just in case */
5083                 if (tmp->metadata) {
5084                         fprintf(stderr, "Well this shouldn't happen, extent "
5085                                 "record overlaps but is metadata? "
5086                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5087                         abort();
5088                 }
5089
5090                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5091                 if (ret) {
5092                         if (ret > 0)
5093                                 ret = -EINVAL;
5094                         goto out;
5095                 }
5096                 ret = btrfs_del_item(trans, root, path);
5097                 if (ret)
5098                         goto out;
5099                 btrfs_release_path(path);
5100                 nr_del++;
5101         }
5102
5103 out:
5104         while (!list_empty(&delete_list)) {
5105                 tmp = list_entry(delete_list.next, struct extent_record, list);
5106                 list_del_init(&tmp->list);
5107                 if (tmp == rec)
5108                         continue;
5109                 free(tmp);
5110         }
5111
5112         while (!list_empty(&rec->dups)) {
5113                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5114                 list_del_init(&tmp->list);
5115                 free(tmp);
5116         }
5117
5118         btrfs_free_path(path);
5119
5120         if (!ret && !nr_del)
5121                 rec->num_duplicates = 0;
5122
5123         return ret ? ret : nr_del;
5124 }
5125
5126 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5127                                   struct btrfs_fs_info *info,
5128                                   struct btrfs_path *path,
5129                                   struct cache_tree *extent_cache,
5130                                   struct extent_record *rec)
5131 {
5132         struct btrfs_root *root;
5133         struct extent_backref *back;
5134         struct data_backref *dback;
5135         struct cache_extent *cache;
5136         struct btrfs_file_extent_item *fi;
5137         struct btrfs_key key;
5138         u64 bytenr, bytes;
5139         int ret;
5140
5141         list_for_each_entry(back, &rec->backrefs, list) {
5142                 dback = (struct data_backref *)back;
5143
5144                 /* We found this one, we don't need to do a lookup */
5145                 if (dback->found_ref)
5146                         continue;
5147                 /* Don't care about full backrefs (poor unloved backrefs) */
5148                 if (back->full_backref)
5149                         continue;
5150                 key.objectid = dback->root;
5151                 key.type = BTRFS_ROOT_ITEM_KEY;
5152                 key.offset = (u64)-1;
5153
5154                 root = btrfs_read_fs_root(info, &key);
5155
5156                 /* No root, definitely a bad ref, skip */
5157                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5158                         continue;
5159                 /* Other err, exit */
5160                 if (IS_ERR(root))
5161                         return PTR_ERR(root);
5162
5163                 key.objectid = dback->owner;
5164                 key.type = BTRFS_EXTENT_DATA_KEY;
5165                 key.offset = dback->offset;
5166                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5167                 if (ret) {
5168                         btrfs_release_path(path);
5169                         if (ret < 0)
5170                                 return ret;
5171                         /* Didn't find it, we can carry on */
5172                         ret = 0;
5173                         continue;
5174                 }
5175
5176                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5177                                     struct btrfs_file_extent_item);
5178                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5179                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5180                 btrfs_release_path(path);
5181                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5182                 if (cache) {
5183                         struct extent_record *tmp;
5184                         tmp = container_of(cache, struct extent_record, cache);
5185
5186                         /*
5187                          * If we found an extent record for the bytenr for this
5188                          * particular backref then we can't add it to our
5189                          * current extent record.  We only want to add backrefs
5190                          * that don't have a corresponding extent item in the
5191                          * extent tree since they likely belong to this record
5192                          * and we need to fix it if it doesn't match bytenrs.
5193                          */
5194                         if  (tmp->found_rec)
5195                                 continue;
5196                 }
5197
5198                 dback->found_ref += 1;
5199                 dback->disk_bytenr = bytenr;
5200                 dback->bytes = bytes;
5201
5202                 /*
5203                  * Set this so the verify backref code knows not to trust the
5204                  * values in this backref.
5205                  */
5206                 back->broken = 1;
5207         }
5208
5209         return 0;
5210 }
5211
5212 /*
5213  * when an incorrect extent item is found, this will delete
5214  * all of the existing entries for it and recreate them
5215  * based on what the tree scan found.
5216  */
5217 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5218                              struct btrfs_fs_info *info,
5219                              struct cache_tree *extent_cache,
5220                              struct extent_record *rec)
5221 {
5222         int ret;
5223         struct btrfs_path *path;
5224         struct list_head *cur = rec->backrefs.next;
5225         struct cache_extent *cache;
5226         struct extent_backref *back;
5227         int allocated = 0;
5228         u64 flags = 0;
5229
5230         /*
5231          * remember our flags for recreating the extent.
5232          * FIXME, if we have cleared extent tree, we can not
5233          * lookup extent info in extent tree.
5234          */
5235         if (!init_extent_tree) {
5236                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5237                                         rec->start, rec->max_size,
5238                                         rec->metadata, NULL, &flags);
5239                 if (ret < 0)
5240                         flags = 0;
5241         } else {
5242                 flags = 0;
5243         }
5244
5245         path = btrfs_alloc_path();
5246         if (!path)
5247                 return -ENOMEM;
5248
5249         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5250                 /*
5251                  * Sometimes the backrefs themselves are so broken they don't
5252                  * get attached to any meaningful rec, so first go back and
5253                  * check any of our backrefs that we couldn't find and throw
5254                  * them into the list if we find the backref so that
5255                  * verify_backrefs can figure out what to do.
5256                  */
5257                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5258                                              rec);
5259                 if (ret < 0)
5260                         goto out;
5261         }
5262
5263         /* step one, make sure all of the backrefs agree */
5264         ret = verify_backrefs(trans, info, path, rec);
5265         if (ret < 0)
5266                 goto out;
5267
5268         /* step two, delete all the existing records */
5269         ret = delete_extent_records(trans, info->extent_root, path,
5270                                     rec->start, rec->max_size);
5271
5272         if (ret < 0)
5273                 goto out;
5274
5275         /* was this block corrupt?  If so, don't add references to it */
5276         cache = lookup_cache_extent(info->corrupt_blocks,
5277                                     rec->start, rec->max_size);
5278         if (cache) {
5279                 ret = 0;
5280                 goto out;
5281         }
5282
5283         /* step three, recreate all the refs we did find */
5284         while(cur != &rec->backrefs) {
5285                 back = list_entry(cur, struct extent_backref, list);
5286                 cur = cur->next;
5287
5288                 /*
5289                  * if we didn't find any references, don't create a
5290                  * new extent record
5291                  */
5292                 if (!back->found_ref)
5293                         continue;
5294
5295                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5296                 allocated = 1;
5297
5298                 if (ret)
5299                         goto out;
5300         }
5301 out:
5302         btrfs_free_path(path);
5303         return ret;
5304 }
5305
5306 /* right now we only prune from the extent allocation tree */
5307 static int prune_one_block(struct btrfs_trans_handle *trans,
5308                            struct btrfs_fs_info *info,
5309                            struct btrfs_corrupt_block *corrupt)
5310 {
5311         int ret;
5312         struct btrfs_path path;
5313         struct extent_buffer *eb;
5314         u64 found;
5315         int slot;
5316         int nritems;
5317         int level = corrupt->level + 1;
5318
5319         btrfs_init_path(&path);
5320 again:
5321         /* we want to stop at the parent to our busted block */
5322         path.lowest_level = level;
5323
5324         ret = btrfs_search_slot(trans, info->extent_root,
5325                                 &corrupt->key, &path, -1, 1);
5326
5327         if (ret < 0)
5328                 goto out;
5329
5330         eb = path.nodes[level];
5331         if (!eb) {
5332                 ret = -ENOENT;
5333                 goto out;
5334         }
5335
5336         /*
5337          * hopefully the search gave us the block we want to prune,
5338          * lets try that first
5339          */
5340         slot = path.slots[level];
5341         found =  btrfs_node_blockptr(eb, slot);
5342         if (found == corrupt->cache.start)
5343                 goto del_ptr;
5344
5345         nritems = btrfs_header_nritems(eb);
5346
5347         /* the search failed, lets scan this node and hope we find it */
5348         for (slot = 0; slot < nritems; slot++) {
5349                 found =  btrfs_node_blockptr(eb, slot);
5350                 if (found == corrupt->cache.start)
5351                         goto del_ptr;
5352         }
5353         /*
5354          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5355          * to this block
5356          */
5357         if (eb == info->extent_root->node) {
5358                 ret = -ENOENT;
5359                 goto out;
5360         } else {
5361                 level++;
5362                 btrfs_release_path(&path);
5363                 goto again;
5364         }
5365
5366 del_ptr:
5367         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5368         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5369
5370 out:
5371         btrfs_release_path(&path);
5372         return ret;
5373 }
5374
5375 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5376                                 struct btrfs_fs_info *info)
5377 {
5378         struct cache_extent *cache;
5379         struct btrfs_corrupt_block *corrupt;
5380
5381         cache = search_cache_extent(info->corrupt_blocks, 0);
5382         while (1) {
5383                 if (!cache)
5384                         break;
5385                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5386                 prune_one_block(trans, info, corrupt);
5387                 cache = next_cache_extent(cache);
5388         }
5389         return 0;
5390 }
5391
5392 static void free_corrupt_block(struct cache_extent *cache)
5393 {
5394         struct btrfs_corrupt_block *corrupt;
5395
5396         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5397         free(corrupt);
5398 }
5399
5400 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5401
5402 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5403 {
5404         struct btrfs_block_group_cache *cache;
5405         u64 start, end;
5406         int ret;
5407
5408         while (1) {
5409                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5410                                             &start, &end, EXTENT_DIRTY);
5411                 if (ret)
5412                         break;
5413                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5414                                    GFP_NOFS);
5415         }
5416
5417         start = 0;
5418         while (1) {
5419                 cache = btrfs_lookup_first_block_group(fs_info, start);
5420                 if (!cache)
5421                         break;
5422                 if (cache->cached)
5423                         cache->cached = 0;
5424                 start = cache->key.objectid + cache->key.offset;
5425         }
5426 }
5427
5428 static int check_extent_refs(struct btrfs_trans_handle *trans,
5429                              struct btrfs_root *root,
5430                              struct cache_tree *extent_cache)
5431 {
5432         struct extent_record *rec;
5433         struct cache_extent *cache;
5434         int err = 0;
5435         int ret = 0;
5436         int fixed = 0;
5437         int had_dups = 0;
5438
5439         if (repair) {
5440                 /*
5441                  * if we're doing a repair, we have to make sure
5442                  * we don't allocate from the problem extents.
5443                  * In the worst case, this will be all the
5444                  * extents in the FS
5445                  */
5446                 cache = search_cache_extent(extent_cache, 0);
5447                 while(cache) {
5448                         rec = container_of(cache, struct extent_record, cache);
5449                         btrfs_pin_extent(root->fs_info,
5450                                          rec->start, rec->max_size);
5451                         cache = next_cache_extent(cache);
5452                 }
5453
5454                 /* pin down all the corrupted blocks too */
5455                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5456                 while(cache) {
5457                         btrfs_pin_extent(root->fs_info,
5458                                          cache->start, cache->size);
5459                         cache = next_cache_extent(cache);
5460                 }
5461                 prune_corrupt_blocks(trans, root->fs_info);
5462                 reset_cached_block_groups(root->fs_info);
5463         }
5464
5465         /*
5466          * We need to delete any duplicate entries we find first otherwise we
5467          * could mess up the extent tree when we have backrefs that actually
5468          * belong to a different extent item and not the weird duplicate one.
5469          */
5470         while (repair && !list_empty(&duplicate_extents)) {
5471                 rec = list_entry(duplicate_extents.next, struct extent_record,
5472                                  list);
5473                 list_del_init(&rec->list);
5474
5475                 /* Sometimes we can find a backref before we find an actual
5476                  * extent, so we need to process it a little bit to see if there
5477                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5478                  * if this is a backref screwup.  If we need to delete stuff
5479                  * process_duplicates() will return 0, otherwise it will return
5480                  * 1 and we
5481                  */
5482                 if (process_duplicates(root, extent_cache, rec))
5483                         continue;
5484                 ret = delete_duplicate_records(trans, root, rec);
5485                 if (ret < 0)
5486                         return ret;
5487                 /*
5488                  * delete_duplicate_records will return the number of entries
5489                  * deleted, so if it's greater than 0 then we know we actually
5490                  * did something and we need to remove.
5491                  */
5492                 if (ret)
5493                         had_dups = 1;
5494         }
5495
5496         if (had_dups)
5497                 return -EAGAIN;
5498
5499         while(1) {
5500                 fixed = 0;
5501                 cache = search_cache_extent(extent_cache, 0);
5502                 if (!cache)
5503                         break;
5504                 rec = container_of(cache, struct extent_record, cache);
5505                 if (rec->num_duplicates) {
5506                         fprintf(stderr, "extent item %llu has multiple extent "
5507                                 "items\n", (unsigned long long)rec->start);
5508                         err = 1;
5509                 }
5510
5511                 if (rec->refs != rec->extent_item_refs) {
5512                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5513                                 (unsigned long long)rec->start,
5514                                 (unsigned long long)rec->nr);
5515                         fprintf(stderr, "extent item %llu, found %llu\n",
5516                                 (unsigned long long)rec->extent_item_refs,
5517                                 (unsigned long long)rec->refs);
5518                         if (!fixed && repair) {
5519                                 ret = fixup_extent_refs(trans, root->fs_info,
5520                                                         extent_cache, rec);
5521                                 if (ret)
5522                                         goto repair_abort;
5523                                 fixed = 1;
5524                         }
5525                         err = 1;
5526
5527                 }
5528                 if (all_backpointers_checked(rec, 1)) {
5529                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5530                                 (unsigned long long)rec->start,
5531                                 (unsigned long long)rec->nr);
5532
5533                         if (!fixed && repair) {
5534                                 ret = fixup_extent_refs(trans, root->fs_info,
5535                                                         extent_cache, rec);
5536                                 if (ret)
5537                                         goto repair_abort;
5538                                 fixed = 1;
5539                         }
5540
5541                         err = 1;
5542                 }
5543                 if (!rec->owner_ref_checked) {
5544                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5545                                 (unsigned long long)rec->start,
5546                                 (unsigned long long)rec->nr);
5547                         if (!fixed && repair) {
5548                                 ret = fixup_extent_refs(trans, root->fs_info,
5549                                                         extent_cache, rec);
5550                                 if (ret)
5551                                         goto repair_abort;
5552                                 fixed = 1;
5553                         }
5554                         err = 1;
5555                 }
5556
5557                 remove_cache_extent(extent_cache, cache);
5558                 free_all_extent_backrefs(rec);
5559                 free(rec);
5560         }
5561 repair_abort:
5562         if (repair) {
5563                 if (ret && ret != -EAGAIN) {
5564                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5565                         exit(1);
5566                 } else if (!ret) {
5567                         btrfs_fix_block_accounting(trans, root);
5568                 }
5569                 if (err)
5570                         fprintf(stderr, "repaired damaged extent references\n");
5571                 return ret;
5572         }
5573         return err;
5574 }
5575
5576 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5577 {
5578         u64 stripe_size;
5579
5580         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5581                 stripe_size = length;
5582                 stripe_size /= num_stripes;
5583         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5584                 stripe_size = length * 2;
5585                 stripe_size /= num_stripes;
5586         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5587                 stripe_size = length;
5588                 stripe_size /= (num_stripes - 1);
5589         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5590                 stripe_size = length;
5591                 stripe_size /= (num_stripes - 2);
5592         } else {
5593                 stripe_size = length;
5594         }
5595         return stripe_size;
5596 }
5597
5598 static int check_chunk_refs(struct chunk_record *chunk_rec,
5599                             struct block_group_tree *block_group_cache,
5600                             struct device_extent_tree *dev_extent_cache,
5601                             int silent)
5602 {
5603         struct cache_extent *block_group_item;
5604         struct block_group_record *block_group_rec;
5605         struct cache_extent *dev_extent_item;
5606         struct device_extent_record *dev_extent_rec;
5607         u64 devid;
5608         u64 offset;
5609         u64 length;
5610         int i;
5611         int ret = 0;
5612
5613         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5614                                                chunk_rec->offset,
5615                                                chunk_rec->length);
5616         if (block_group_item) {
5617                 block_group_rec = container_of(block_group_item,
5618                                                struct block_group_record,
5619                                                cache);
5620                 if (chunk_rec->length != block_group_rec->offset ||
5621                     chunk_rec->offset != block_group_rec->objectid ||
5622                     chunk_rec->type_flags != block_group_rec->flags) {
5623                         if (!silent)
5624                                 fprintf(stderr,
5625                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5626                                         chunk_rec->objectid,
5627                                         chunk_rec->type,
5628                                         chunk_rec->offset,
5629                                         chunk_rec->length,
5630                                         chunk_rec->offset,
5631                                         chunk_rec->type_flags,
5632                                         block_group_rec->objectid,
5633                                         block_group_rec->type,
5634                                         block_group_rec->offset,
5635                                         block_group_rec->offset,
5636                                         block_group_rec->objectid,
5637                                         block_group_rec->flags);
5638                         ret = -1;
5639                 } else {
5640                         list_del_init(&block_group_rec->list);
5641                         chunk_rec->bg_rec = block_group_rec;
5642                 }
5643         } else {
5644                 if (!silent)
5645                         fprintf(stderr,
5646                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5647                                 chunk_rec->objectid,
5648                                 chunk_rec->type,
5649                                 chunk_rec->offset,
5650                                 chunk_rec->length,
5651                                 chunk_rec->offset,
5652                                 chunk_rec->type_flags);
5653                 ret = -1;
5654         }
5655
5656         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5657                                     chunk_rec->num_stripes);
5658         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5659                 devid = chunk_rec->stripes[i].devid;
5660                 offset = chunk_rec->stripes[i].offset;
5661                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5662                                                        devid, offset, length);
5663                 if (dev_extent_item) {
5664                         dev_extent_rec = container_of(dev_extent_item,
5665                                                 struct device_extent_record,
5666                                                 cache);
5667                         if (dev_extent_rec->objectid != devid ||
5668                             dev_extent_rec->offset != offset ||
5669                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5670                             dev_extent_rec->length != length) {
5671                                 if (!silent)
5672                                         fprintf(stderr,
5673                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5674                                                 chunk_rec->objectid,
5675                                                 chunk_rec->type,
5676                                                 chunk_rec->offset,
5677                                                 chunk_rec->stripes[i].devid,
5678                                                 chunk_rec->stripes[i].offset,
5679                                                 dev_extent_rec->objectid,
5680                                                 dev_extent_rec->offset,
5681                                                 dev_extent_rec->length);
5682                                 ret = -1;
5683                         } else {
5684                                 list_move(&dev_extent_rec->chunk_list,
5685                                           &chunk_rec->dextents);
5686                         }
5687                 } else {
5688                         if (!silent)
5689                                 fprintf(stderr,
5690                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5691                                         chunk_rec->objectid,
5692                                         chunk_rec->type,
5693                                         chunk_rec->offset,
5694                                         chunk_rec->stripes[i].devid,
5695                                         chunk_rec->stripes[i].offset);
5696                         ret = -1;
5697                 }
5698         }
5699         return ret;
5700 }
5701
5702 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5703 int check_chunks(struct cache_tree *chunk_cache,
5704                  struct block_group_tree *block_group_cache,
5705                  struct device_extent_tree *dev_extent_cache,
5706                  struct list_head *good, struct list_head *bad, int silent)
5707 {
5708         struct cache_extent *chunk_item;
5709         struct chunk_record *chunk_rec;
5710         struct block_group_record *bg_rec;
5711         struct device_extent_record *dext_rec;
5712         int err;
5713         int ret = 0;
5714
5715         chunk_item = first_cache_extent(chunk_cache);
5716         while (chunk_item) {
5717                 chunk_rec = container_of(chunk_item, struct chunk_record,
5718                                          cache);
5719                 err = check_chunk_refs(chunk_rec, block_group_cache,
5720                                        dev_extent_cache, silent);
5721                 if (err) {
5722                         ret = err;
5723                         if (bad)
5724                                 list_add_tail(&chunk_rec->list, bad);
5725                 } else {
5726                         if (good)
5727                                 list_add_tail(&chunk_rec->list, good);
5728                 }
5729
5730                 chunk_item = next_cache_extent(chunk_item);
5731         }
5732
5733         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5734                 if (!silent)
5735                         fprintf(stderr,
5736                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5737                                 bg_rec->objectid,
5738                                 bg_rec->offset,
5739                                 bg_rec->flags);
5740                 if (!ret)
5741                         ret = 1;
5742         }
5743
5744         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5745                             chunk_list) {
5746                 if (!silent)
5747                         fprintf(stderr,
5748                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5749                                 dext_rec->objectid,
5750                                 dext_rec->offset,
5751                                 dext_rec->length);
5752                 if (!ret)
5753                         ret = 1;
5754         }
5755         return ret;
5756 }
5757
5758
5759 static int check_device_used(struct device_record *dev_rec,
5760                              struct device_extent_tree *dext_cache)
5761 {
5762         struct cache_extent *cache;
5763         struct device_extent_record *dev_extent_rec;
5764         u64 total_byte = 0;
5765
5766         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5767         while (cache) {
5768                 dev_extent_rec = container_of(cache,
5769                                               struct device_extent_record,
5770                                               cache);
5771                 if (dev_extent_rec->objectid != dev_rec->devid)
5772                         break;
5773
5774                 list_del(&dev_extent_rec->device_list);
5775                 total_byte += dev_extent_rec->length;
5776                 cache = next_cache_extent(cache);
5777         }
5778
5779         if (total_byte != dev_rec->byte_used) {
5780                 fprintf(stderr,
5781                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5782                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5783                         dev_rec->type, dev_rec->offset);
5784                 return -1;
5785         } else {
5786                 return 0;
5787         }
5788 }
5789
5790 /* check btrfs_dev_item -> btrfs_dev_extent */
5791 static int check_devices(struct rb_root *dev_cache,
5792                          struct device_extent_tree *dev_extent_cache)
5793 {
5794         struct rb_node *dev_node;
5795         struct device_record *dev_rec;
5796         struct device_extent_record *dext_rec;
5797         int err;
5798         int ret = 0;
5799
5800         dev_node = rb_first(dev_cache);
5801         while (dev_node) {
5802                 dev_rec = container_of(dev_node, struct device_record, node);
5803                 err = check_device_used(dev_rec, dev_extent_cache);
5804                 if (err)
5805                         ret = err;
5806
5807                 dev_node = rb_next(dev_node);
5808         }
5809         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5810                             device_list) {
5811                 fprintf(stderr,
5812                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5813                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5814                 if (!ret)
5815                         ret = 1;
5816         }
5817         return ret;
5818 }
5819
5820 static int check_chunks_and_extents(struct btrfs_root *root)
5821 {
5822         struct rb_root dev_cache;
5823         struct cache_tree chunk_cache;
5824         struct block_group_tree block_group_cache;
5825         struct device_extent_tree dev_extent_cache;
5826         struct cache_tree extent_cache;
5827         struct cache_tree seen;
5828         struct cache_tree pending;
5829         struct cache_tree reada;
5830         struct cache_tree nodes;
5831         struct cache_tree corrupt_blocks;
5832         struct btrfs_path path;
5833         struct btrfs_key key;
5834         struct btrfs_key found_key;
5835         int ret, err = 0;
5836         u64 last = 0;
5837         struct block_info *bits;
5838         int bits_nr;
5839         struct extent_buffer *leaf;
5840         struct btrfs_trans_handle *trans = NULL;
5841         int slot;
5842         struct btrfs_root_item ri;
5843         struct list_head dropping_trees;
5844
5845         dev_cache = RB_ROOT;
5846         cache_tree_init(&chunk_cache);
5847         block_group_tree_init(&block_group_cache);
5848         device_extent_tree_init(&dev_extent_cache);
5849
5850         cache_tree_init(&extent_cache);
5851         cache_tree_init(&seen);
5852         cache_tree_init(&pending);
5853         cache_tree_init(&nodes);
5854         cache_tree_init(&reada);
5855         cache_tree_init(&corrupt_blocks);
5856         INIT_LIST_HEAD(&dropping_trees);
5857
5858         if (repair) {
5859                 trans = btrfs_start_transaction(root, 1);
5860                 if (IS_ERR(trans)) {
5861                         fprintf(stderr, "Error starting transaction\n");
5862                         return PTR_ERR(trans);
5863                 }
5864                 root->fs_info->fsck_extent_cache = &extent_cache;
5865                 root->fs_info->free_extent_hook = free_extent_hook;
5866                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5867         }
5868
5869         bits_nr = 1024;
5870         bits = malloc(bits_nr * sizeof(struct block_info));
5871         if (!bits) {
5872                 perror("malloc");
5873                 exit(1);
5874         }
5875
5876 again:
5877         add_root_to_pending(root->fs_info->tree_root->node,
5878                             &extent_cache, &pending, &seen, &nodes,
5879                             &root->fs_info->tree_root->root_key);
5880
5881         add_root_to_pending(root->fs_info->chunk_root->node,
5882                             &extent_cache, &pending, &seen, &nodes,
5883                             &root->fs_info->chunk_root->root_key);
5884
5885         btrfs_init_path(&path);
5886         key.offset = 0;
5887         key.objectid = 0;
5888         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
5889         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
5890                                         &key, &path, 0, 0);
5891         BUG_ON(ret < 0);
5892         while(1) {
5893                 leaf = path.nodes[0];
5894                 slot = path.slots[0];
5895                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
5896                         ret = btrfs_next_leaf(root, &path);
5897                         if (ret != 0)
5898                                 break;
5899                         leaf = path.nodes[0];
5900                         slot = path.slots[0];
5901                 }
5902                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
5903                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
5904                         unsigned long offset;
5905                         struct extent_buffer *buf;
5906
5907                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5908                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
5909                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
5910                                 buf = read_tree_block(root->fs_info->tree_root,
5911                                                       btrfs_root_bytenr(&ri),
5912                                                       btrfs_level_size(root,
5913                                                       btrfs_root_level(&ri)),
5914                                                       0);
5915                                 if (!buf) {
5916                                         ret = -EIO;
5917                                         goto out;
5918                                 }
5919                                 add_root_to_pending(buf, &extent_cache,
5920                                                     &pending, &seen, &nodes,
5921                                                     &found_key);
5922                                 free_extent_buffer(buf);
5923                         } else {
5924                                 struct dropping_root_item_record *dri_rec;
5925                                 dri_rec = malloc(sizeof(*dri_rec));
5926                                 if (!dri_rec) {
5927                                         perror("malloc");
5928                                         exit(1);
5929                                 }
5930                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
5931                                 memcpy(&dri_rec->found_key, &found_key,
5932                                        sizeof(found_key));
5933                                 list_add_tail(&dri_rec->list, &dropping_trees);
5934                         }
5935                 }
5936                 path.slots[0]++;
5937         }
5938         btrfs_release_path(&path);
5939         while (1) {
5940                 ret = run_next_block(trans, root, bits, bits_nr, &last,
5941                                      &pending, &seen, &reada, &nodes,
5942                                      &extent_cache, &chunk_cache, &dev_cache,
5943                                      &block_group_cache, &dev_extent_cache,
5944                                      NULL);
5945                 if (ret != 0)
5946                         break;
5947         }
5948
5949         while (!list_empty(&dropping_trees)) {
5950                 struct dropping_root_item_record *rec;
5951                 struct extent_buffer *buf;
5952                 rec = list_entry(dropping_trees.next,
5953                                  struct dropping_root_item_record, list);
5954                 last = 0;
5955                 if (!bits) {
5956                         perror("realloc");
5957                         exit(1);
5958                 }
5959                 buf = read_tree_block(root->fs_info->tree_root,
5960                                       btrfs_root_bytenr(&rec->ri),
5961                                       btrfs_level_size(root,
5962                                       btrfs_root_level(&rec->ri)), 0);
5963                 if (!buf) {
5964                         ret = -EIO;
5965                         goto out;
5966                 }
5967                 add_root_to_pending(buf, &extent_cache, &pending,
5968                                     &seen, &nodes, &rec->found_key);
5969                 while (1) {
5970                         ret = run_next_block(trans, root, bits, bits_nr, &last,
5971                                              &pending, &seen, &reada,
5972                                              &nodes, &extent_cache,
5973                                              &chunk_cache, &dev_cache,
5974                                              &block_group_cache,
5975                                              &dev_extent_cache,
5976                                              &rec->ri);
5977                         if (ret != 0)
5978                                 break;
5979                 }
5980                 free_extent_buffer(buf);
5981                 list_del(&rec->list);
5982                 free(rec);
5983         }
5984
5985         if (ret >= 0)
5986                 ret = check_extent_refs(trans, root, &extent_cache);
5987         if (ret == -EAGAIN) {
5988                 ret = btrfs_commit_transaction(trans, root);
5989                 if (ret)
5990                         goto out;
5991
5992                 trans = btrfs_start_transaction(root, 1);
5993                 if (IS_ERR(trans)) {
5994                         ret = PTR_ERR(trans);
5995                         goto out;
5996                 }
5997
5998                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5999                 free_extent_cache_tree(&seen);
6000                 free_extent_cache_tree(&pending);
6001                 free_extent_cache_tree(&reada);
6002                 free_extent_cache_tree(&nodes);
6003                 free_extent_record_cache(root->fs_info, &extent_cache);
6004                 goto again;
6005         }
6006
6007         err = check_chunks(&chunk_cache, &block_group_cache,
6008                            &dev_extent_cache, NULL, NULL, 0);
6009         if (err && !ret)
6010                 ret = err;
6011
6012         err = check_devices(&dev_cache, &dev_extent_cache);
6013         if (err && !ret)
6014                 ret = err;
6015
6016         if (trans) {
6017                 err = btrfs_commit_transaction(trans, root);
6018                 if (!ret)
6019                         ret = err;
6020         }
6021 out:
6022         if (repair) {
6023                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6024                 root->fs_info->fsck_extent_cache = NULL;
6025                 root->fs_info->free_extent_hook = NULL;
6026                 root->fs_info->corrupt_blocks = NULL;
6027         }
6028         free(bits);
6029         free_chunk_cache_tree(&chunk_cache);
6030         free_device_cache_tree(&dev_cache);
6031         free_block_group_tree(&block_group_cache);
6032         free_device_extent_tree(&dev_extent_cache);
6033         free_extent_cache_tree(&seen);
6034         free_extent_cache_tree(&pending);
6035         free_extent_cache_tree(&reada);
6036         free_extent_cache_tree(&nodes);
6037         return ret;
6038 }
6039
6040 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6041                            struct btrfs_root *root, int overwrite)
6042 {
6043         struct extent_buffer *c;
6044         struct extent_buffer *old = root->node;
6045         int level;
6046         int ret;
6047         struct btrfs_disk_key disk_key = {0,0,0};
6048
6049         level = 0;
6050
6051         if (overwrite) {
6052                 c = old;
6053                 extent_buffer_get(c);
6054                 goto init;
6055         }
6056         c = btrfs_alloc_free_block(trans, root,
6057                                    btrfs_level_size(root, 0),
6058                                    root->root_key.objectid,
6059                                    &disk_key, level, 0, 0);
6060         if (IS_ERR(c)) {
6061                 c = old;
6062                 extent_buffer_get(c);
6063                 overwrite = 1;
6064         }
6065 init:
6066         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6067         btrfs_set_header_level(c, level);
6068         btrfs_set_header_bytenr(c, c->start);
6069         btrfs_set_header_generation(c, trans->transid);
6070         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6071         btrfs_set_header_owner(c, root->root_key.objectid);
6072
6073         write_extent_buffer(c, root->fs_info->fsid,
6074                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6075
6076         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6077                             btrfs_header_chunk_tree_uuid(c),
6078                             BTRFS_UUID_SIZE);
6079
6080         btrfs_mark_buffer_dirty(c);
6081         /*
6082          * this case can happen in the following case:
6083          *
6084          * 1.overwrite previous root.
6085          *
6086          * 2.reinit reloc data root, this is because we skip pin
6087          * down reloc data tree before which means we can allocate
6088          * same block bytenr here.
6089          */
6090         if (old->start == c->start) {
6091                 btrfs_set_root_generation(&root->root_item,
6092                                           trans->transid);
6093                 root->root_item.level = btrfs_header_level(root->node);
6094                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6095                                         &root->root_key, &root->root_item);
6096                 if (ret) {
6097                         free_extent_buffer(c);
6098                         return ret;
6099                 }
6100         }
6101         free_extent_buffer(old);
6102         root->node = c;
6103         add_root_to_dirty_list(root);
6104         return 0;
6105 }
6106
6107 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6108                                 struct extent_buffer *eb, int tree_root)
6109 {
6110         struct extent_buffer *tmp;
6111         struct btrfs_root_item *ri;
6112         struct btrfs_key key;
6113         u64 bytenr;
6114         u32 leafsize;
6115         int level = btrfs_header_level(eb);
6116         int nritems;
6117         int ret;
6118         int i;
6119
6120         btrfs_pin_extent(fs_info, eb->start, eb->len);
6121
6122         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6123         nritems = btrfs_header_nritems(eb);
6124         for (i = 0; i < nritems; i++) {
6125                 if (level == 0) {
6126                         btrfs_item_key_to_cpu(eb, &key, i);
6127                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6128                                 continue;
6129                         /* Skip the extent root and reloc roots */
6130                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6131                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6132                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6133                                 continue;
6134                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6135                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6136
6137                         /*
6138                          * If at any point we start needing the real root we
6139                          * will have to build a stump root for the root we are
6140                          * in, but for now this doesn't actually use the root so
6141                          * just pass in extent_root.
6142                          */
6143                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6144                                               leafsize, 0);
6145                         if (!tmp) {
6146                                 fprintf(stderr, "Error reading root block\n");
6147                                 return -EIO;
6148                         }
6149                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6150                         free_extent_buffer(tmp);
6151                         if (ret)
6152                                 return ret;
6153                 } else {
6154                         bytenr = btrfs_node_blockptr(eb, i);
6155
6156                         /* If we aren't the tree root don't read the block */
6157                         if (level == 1 && !tree_root) {
6158                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6159                                 continue;
6160                         }
6161
6162                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6163                                               leafsize, 0);
6164                         if (!tmp) {
6165                                 fprintf(stderr, "Error reading tree block\n");
6166                                 return -EIO;
6167                         }
6168                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6169                         free_extent_buffer(tmp);
6170                         if (ret)
6171                                 return ret;
6172                 }
6173         }
6174
6175         return 0;
6176 }
6177
6178 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6179 {
6180         int ret;
6181
6182         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6183         if (ret)
6184                 return ret;
6185
6186         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6187 }
6188
6189 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6190 {
6191         struct btrfs_block_group_cache *cache;
6192         struct btrfs_path *path;
6193         struct extent_buffer *leaf;
6194         struct btrfs_chunk *chunk;
6195         struct btrfs_key key;
6196         int ret;
6197         u64 start;
6198
6199         path = btrfs_alloc_path();
6200         if (!path)
6201                 return -ENOMEM;
6202
6203         key.objectid = 0;
6204         key.type = BTRFS_CHUNK_ITEM_KEY;
6205         key.offset = 0;
6206
6207         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6208         if (ret < 0) {
6209                 btrfs_free_path(path);
6210                 return ret;
6211         }
6212
6213         /*
6214          * We do this in case the block groups were screwed up and had alloc
6215          * bits that aren't actually set on the chunks.  This happens with
6216          * restored images every time and could happen in real life I guess.
6217          */
6218         fs_info->avail_data_alloc_bits = 0;
6219         fs_info->avail_metadata_alloc_bits = 0;
6220         fs_info->avail_system_alloc_bits = 0;
6221
6222         /* First we need to create the in-memory block groups */
6223         while (1) {
6224                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6225                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6226                         if (ret < 0) {
6227                                 btrfs_free_path(path);
6228                                 return ret;
6229                         }
6230                         if (ret) {
6231                                 ret = 0;
6232                                 break;
6233                         }
6234                 }
6235                 leaf = path->nodes[0];
6236                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6237                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6238                         path->slots[0]++;
6239                         continue;
6240                 }
6241
6242                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6243                                        struct btrfs_chunk);
6244                 btrfs_add_block_group(fs_info, 0,
6245                                       btrfs_chunk_type(leaf, chunk),
6246                                       key.objectid, key.offset,
6247                                       btrfs_chunk_length(leaf, chunk));
6248                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6249                                  key.offset + btrfs_chunk_length(leaf, chunk),
6250                                  GFP_NOFS);
6251                 path->slots[0]++;
6252         }
6253         start = 0;
6254         while (1) {
6255                 cache = btrfs_lookup_first_block_group(fs_info, start);
6256                 if (!cache)
6257                         break;
6258                 cache->cached = 1;
6259                 start = cache->key.objectid + cache->key.offset;
6260         }
6261
6262         btrfs_free_path(path);
6263         return 0;
6264 }
6265
6266 static int reset_balance(struct btrfs_trans_handle *trans,
6267                          struct btrfs_fs_info *fs_info)
6268 {
6269         struct btrfs_root *root = fs_info->tree_root;
6270         struct btrfs_path *path;
6271         struct extent_buffer *leaf;
6272         struct btrfs_key key;
6273         int del_slot, del_nr = 0;
6274         int ret;
6275         int found = 0;
6276
6277         path = btrfs_alloc_path();
6278         if (!path)
6279                 return -ENOMEM;
6280
6281         key.objectid = BTRFS_BALANCE_OBJECTID;
6282         key.type = BTRFS_BALANCE_ITEM_KEY;
6283         key.offset = 0;
6284
6285         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6286         if (ret) {
6287                 if (ret > 0)
6288                         ret = 0;
6289                 if (!ret)
6290                         goto reinit_data_reloc;
6291                 else
6292                         goto out;
6293         }
6294
6295         ret = btrfs_del_item(trans, root, path);
6296         if (ret)
6297                 goto out;
6298         btrfs_release_path(path);
6299
6300         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6301         key.type = BTRFS_ROOT_ITEM_KEY;
6302         key.offset = 0;
6303
6304         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6305         if (ret < 0)
6306                 goto out;
6307         while (1) {
6308                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6309                         if (!found)
6310                                 break;
6311
6312                         if (del_nr) {
6313                                 ret = btrfs_del_items(trans, root, path,
6314                                                       del_slot, del_nr);
6315                                 del_nr = 0;
6316                                 if (ret)
6317                                         goto out;
6318                         }
6319                         key.offset++;
6320                         btrfs_release_path(path);
6321
6322                         found = 0;
6323                         ret = btrfs_search_slot(trans, root, &key, path,
6324                                                 -1, 1);
6325                         if (ret < 0)
6326                                 goto out;
6327                         continue;
6328                 }
6329                 found = 1;
6330                 leaf = path->nodes[0];
6331                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6332                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6333                         break;
6334                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6335                         path->slots[0]++;
6336                         continue;
6337                 }
6338                 if (!del_nr) {
6339                         del_slot = path->slots[0];
6340                         del_nr = 1;
6341                 } else {
6342                         del_nr++;
6343                 }
6344                 path->slots[0]++;
6345         }
6346
6347         if (del_nr) {
6348                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6349                 if (ret)
6350                         goto out;
6351         }
6352         btrfs_release_path(path);
6353
6354 reinit_data_reloc:
6355         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6356         key.type = BTRFS_ROOT_ITEM_KEY;
6357         key.offset = (u64)-1;
6358         root = btrfs_read_fs_root(fs_info, &key);
6359         if (IS_ERR(root)) {
6360                 fprintf(stderr, "Error reading data reloc tree\n");
6361                 return PTR_ERR(root);
6362         }
6363         record_root_in_trans(trans, root);
6364         ret = btrfs_fsck_reinit_root(trans, root, 0);
6365         if (ret)
6366                 goto out;
6367         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6368 out:
6369         btrfs_free_path(path);
6370         return ret;
6371 }
6372
6373 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6374                               struct btrfs_fs_info *fs_info)
6375 {
6376         u64 start = 0;
6377         int ret;
6378
6379         /*
6380          * The only reason we don't do this is because right now we're just
6381          * walking the trees we find and pinning down their bytes, we don't look
6382          * at any of the leaves.  In order to do mixed groups we'd have to check
6383          * the leaves of any fs roots and pin down the bytes for any file
6384          * extents we find.  Not hard but why do it if we don't have to?
6385          */
6386         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6387                 fprintf(stderr, "We don't support re-initing the extent tree "
6388                         "for mixed block groups yet, please notify a btrfs "
6389                         "developer you want to do this so they can add this "
6390                         "functionality.\n");
6391                 return -EINVAL;
6392         }
6393
6394         /*
6395          * first we need to walk all of the trees except the extent tree and pin
6396          * down the bytes that are in use so we don't overwrite any existing
6397          * metadata.
6398          */
6399         ret = pin_metadata_blocks(fs_info);
6400         if (ret) {
6401                 fprintf(stderr, "error pinning down used bytes\n");
6402                 return ret;
6403         }
6404
6405         /*
6406          * Need to drop all the block groups since we're going to recreate all
6407          * of them again.
6408          */
6409         btrfs_free_block_groups(fs_info);
6410         ret = reset_block_groups(fs_info);
6411         if (ret) {
6412                 fprintf(stderr, "error resetting the block groups\n");
6413                 return ret;
6414         }
6415
6416         /* Ok we can allocate now, reinit the extent root */
6417         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6418         if (ret) {
6419                 fprintf(stderr, "extent root initialization failed\n");
6420                 /*
6421                  * When the transaction code is updated we should end the
6422                  * transaction, but for now progs only knows about commit so
6423                  * just return an error.
6424                  */
6425                 return ret;
6426         }
6427
6428         /*
6429          * Now we have all the in-memory block groups setup so we can make
6430          * allocations properly, and the metadata we care about is safe since we
6431          * pinned all of it above.
6432          */
6433         while (1) {
6434                 struct btrfs_block_group_cache *cache;
6435
6436                 cache = btrfs_lookup_first_block_group(fs_info, start);
6437                 if (!cache)
6438                         break;
6439                 start = cache->key.objectid + cache->key.offset;
6440                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6441                                         &cache->key, &cache->item,
6442                                         sizeof(cache->item));
6443                 if (ret) {
6444                         fprintf(stderr, "Error adding block group\n");
6445                         return ret;
6446                 }
6447                 btrfs_extent_post_op(trans, fs_info->extent_root);
6448         }
6449
6450         ret = reset_balance(trans, fs_info);
6451         if (ret)
6452                 fprintf(stderr, "error reseting the pending balance\n");
6453
6454         return ret;
6455 }
6456
6457 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6458 {
6459         struct btrfs_path *path;
6460         struct btrfs_trans_handle *trans;
6461         struct btrfs_key key;
6462         int ret;
6463
6464         printf("Recowing metadata block %llu\n", eb->start);
6465         key.objectid = btrfs_header_owner(eb);
6466         key.type = BTRFS_ROOT_ITEM_KEY;
6467         key.offset = (u64)-1;
6468
6469         root = btrfs_read_fs_root(root->fs_info, &key);
6470         if (IS_ERR(root)) {
6471                 fprintf(stderr, "Couldn't find owner root %llu\n",
6472                         key.objectid);
6473                 return PTR_ERR(root);
6474         }
6475
6476         path = btrfs_alloc_path();
6477         if (!path)
6478                 return -ENOMEM;
6479
6480         trans = btrfs_start_transaction(root, 1);
6481         if (IS_ERR(trans)) {
6482                 btrfs_free_path(path);
6483                 return PTR_ERR(trans);
6484         }
6485
6486         path->lowest_level = btrfs_header_level(eb);
6487         if (path->lowest_level)
6488                 btrfs_node_key_to_cpu(eb, &key, 0);
6489         else
6490                 btrfs_item_key_to_cpu(eb, &key, 0);
6491
6492         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6493         btrfs_commit_transaction(trans, root);
6494         btrfs_free_path(path);
6495         return ret;
6496 }
6497
6498 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6499 {
6500         struct btrfs_path *path;
6501         struct btrfs_trans_handle *trans;
6502         struct btrfs_key key;
6503         int ret;
6504
6505         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6506                bad->key.type, bad->key.offset);
6507         key.objectid = bad->root_id;
6508         key.type = BTRFS_ROOT_ITEM_KEY;
6509         key.offset = (u64)-1;
6510
6511         root = btrfs_read_fs_root(root->fs_info, &key);
6512         if (IS_ERR(root)) {
6513                 fprintf(stderr, "Couldn't find owner root %llu\n",
6514                         key.objectid);
6515                 return PTR_ERR(root);
6516         }
6517
6518         path = btrfs_alloc_path();
6519         if (!path)
6520                 return -ENOMEM;
6521
6522         trans = btrfs_start_transaction(root, 1);
6523         if (IS_ERR(trans)) {
6524                 btrfs_free_path(path);
6525                 return PTR_ERR(trans);
6526         }
6527
6528         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6529         if (ret) {
6530                 if (ret > 0)
6531                         ret = 0;
6532                 goto out;
6533         }
6534         ret = btrfs_del_item(trans, root, path);
6535 out:
6536         btrfs_commit_transaction(trans, root);
6537         btrfs_free_path(path);
6538         return ret;
6539 }
6540
6541 static struct option long_options[] = {
6542         { "super", 1, NULL, 's' },
6543         { "repair", 0, NULL, 0 },
6544         { "init-csum-tree", 0, NULL, 0 },
6545         { "init-extent-tree", 0, NULL, 0 },
6546         { "check-data-csum", 0, NULL, 0 },
6547         { "backup", 0, NULL, 0 },
6548         { "subvol-extents", no_argument, NULL, 'E' },
6549         { "qgroup-report", 0, NULL, 'Q' },
6550         { NULL, 0, NULL, 0}
6551 };
6552
6553 const char * const cmd_check_usage[] = {
6554         "btrfs check [options] <device>",
6555         "Check an unmounted btrfs filesystem.",
6556         "",
6557         "-s|--super <superblock>     use this superblock copy",
6558         "-b|--backup                 use the backup root copy",
6559         "--repair                    try to repair the filesystem",
6560         "--init-csum-tree            create a new CRC tree",
6561         "--init-extent-tree          create a new extent tree",
6562         "--check-data-csum           verify checkums of data blocks",
6563         "--qgroup-report             print a report on qgroup consistency",
6564         "--subvol-extents            print subvolume extents and sharing state",
6565         NULL
6566 };
6567
6568 int cmd_check(int argc, char **argv)
6569 {
6570         struct cache_tree root_cache;
6571         struct btrfs_root *root;
6572         struct btrfs_fs_info *info;
6573         u64 bytenr = 0;
6574         u64 subvolid = 0;
6575         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6576         int ret;
6577         u64 num;
6578         int option_index = 0;
6579         int init_csum_tree = 0;
6580         int qgroup_report = 0;
6581         enum btrfs_open_ctree_flags ctree_flags =
6582                 OPEN_CTREE_PARTIAL | OPEN_CTREE_EXCLUSIVE;
6583
6584         while(1) {
6585                 int c;
6586                 c = getopt_long(argc, argv, "as:b", long_options,
6587                                 &option_index);
6588                 if (c < 0)
6589                         break;
6590                 switch(c) {
6591                         case 'a': /* ignored */ break;
6592                         case 'b':
6593                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6594                                 break;
6595                         case 's':
6596                                 num = arg_strtou64(optarg);
6597                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
6598                                         fprintf(stderr,
6599                                                 "ERROR: super mirror should be less than: %d\n",
6600                                                 BTRFS_SUPER_MIRROR_MAX);
6601                                         exit(1);
6602                                 }
6603                                 bytenr = btrfs_sb_offset(((int)num));
6604                                 printf("using SB copy %llu, bytenr %llu\n", num,
6605                                        (unsigned long long)bytenr);
6606                                 break;
6607                         case 'Q':
6608                                 qgroup_report = 1;
6609                                 break;
6610                         case 'E':
6611                                 subvolid = arg_strtou64(optarg);
6612                                 break;
6613                         case '?':
6614                         case 'h':
6615                                 usage(cmd_check_usage);
6616                 }
6617                 if (option_index == 1) {
6618                         printf("enabling repair mode\n");
6619                         repair = 1;
6620                         ctree_flags |= OPEN_CTREE_WRITES;
6621                 } else if (option_index == 2) {
6622                         printf("Creating a new CRC tree\n");
6623                         init_csum_tree = 1;
6624                         repair = 1;
6625                         ctree_flags |= OPEN_CTREE_WRITES;
6626                 } else if (option_index == 3) {
6627                         init_extent_tree = 1;
6628                         ctree_flags |= (OPEN_CTREE_WRITES |
6629                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6630                         repair = 1;
6631                 } else if (option_index == 4) {
6632                         check_data_csum = 1;
6633                 }
6634         }
6635         argc = argc - optind;
6636
6637         if (check_argc_exact(argc, 1))
6638                 usage(cmd_check_usage);
6639
6640         radix_tree_init();
6641         cache_tree_init(&root_cache);
6642
6643         if((ret = check_mounted(argv[optind])) < 0) {
6644                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6645                 goto err_out;
6646         } else if(ret) {
6647                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6648                 ret = -EBUSY;
6649                 goto err_out;
6650         }
6651
6652         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6653         if (!info) {
6654                 fprintf(stderr, "Couldn't open file system\n");
6655                 ret = -EIO;
6656                 goto err_out;
6657         }
6658
6659         root = info->fs_root;
6660         uuid_unparse(info->super_copy->fsid, uuidbuf);
6661         if (qgroup_report) {
6662                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
6663                        uuidbuf);
6664                 ret = qgroup_verify_all(info);
6665                 if (ret == 0)
6666                         print_qgroup_report(1);
6667                 goto close_out;
6668         }
6669         if (subvolid) {
6670                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
6671                        subvolid, argv[optind], uuidbuf);
6672                 ret = print_extent_state(info, subvolid);
6673                 goto close_out;
6674         }
6675         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6676
6677         if (!extent_buffer_uptodate(info->tree_root->node) ||
6678             !extent_buffer_uptodate(info->dev_root->node) ||
6679             !extent_buffer_uptodate(info->chunk_root->node)) {
6680                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6681                 ret = -EIO;
6682                 goto close_out;
6683         }
6684
6685         if (init_extent_tree || init_csum_tree) {
6686                 struct btrfs_trans_handle *trans;
6687
6688                 trans = btrfs_start_transaction(info->extent_root, 0);
6689                 if (IS_ERR(trans)) {
6690                         fprintf(stderr, "Error starting transaction\n");
6691                         ret = PTR_ERR(trans);
6692                         goto close_out;
6693                 }
6694
6695                 if (init_extent_tree) {
6696                         printf("Creating a new extent tree\n");
6697                         ret = reinit_extent_tree(trans, info);
6698                         if (ret)
6699                                 goto close_out;
6700                 }
6701
6702                 if (init_csum_tree) {
6703                         fprintf(stderr, "Reinit crc root\n");
6704                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6705                         if (ret) {
6706                                 fprintf(stderr, "crc root initialization failed\n");
6707                                 ret = -EIO;
6708                                 goto close_out;
6709                         }
6710                 }
6711                 /*
6712                  * Ok now we commit and run the normal fsck, which will add
6713                  * extent entries for all of the items it finds.
6714                  */
6715                 ret = btrfs_commit_transaction(trans, info->extent_root);
6716                 if (ret)
6717                         goto close_out;
6718         }
6719         if (!extent_buffer_uptodate(info->extent_root->node)) {
6720                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6721                 ret = -EIO;
6722                 goto close_out;
6723         }
6724
6725         fprintf(stderr, "checking extents\n");
6726         ret = check_chunks_and_extents(root);
6727         if (ret)
6728                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
6729
6730         fprintf(stderr, "checking free space cache\n");
6731         ret = check_space_cache(root);
6732         if (ret)
6733                 goto out;
6734
6735         /*
6736          * We used to have to have these hole extents in between our real
6737          * extents so if we don't have this flag set we need to make sure there
6738          * are no gaps in the file extents for inodes, otherwise we can just
6739          * ignore it when this happens.
6740          */
6741         no_holes = btrfs_fs_incompat(root->fs_info,
6742                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
6743         fprintf(stderr, "checking fs roots\n");
6744         ret = check_fs_roots(root, &root_cache);
6745         if (ret)
6746                 goto out;
6747
6748         fprintf(stderr, "checking csums\n");
6749         ret = check_csums(root);
6750         if (ret)
6751                 goto out;
6752
6753         fprintf(stderr, "checking root refs\n");
6754         ret = check_root_refs(root, &root_cache);
6755         if (ret)
6756                 goto out;
6757
6758         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
6759                 struct extent_buffer *eb;
6760
6761                 eb = list_first_entry(&root->fs_info->recow_ebs,
6762                                       struct extent_buffer, recow);
6763                 ret = recow_extent_buffer(root, eb);
6764                 if (ret)
6765                         break;
6766         }
6767
6768         while (!list_empty(&delete_items)) {
6769                 struct bad_item *bad;
6770
6771                 bad = list_first_entry(&delete_items, struct bad_item, list);
6772                 list_del_init(&bad->list);
6773                 if (repair)
6774                         ret = delete_bad_item(root, bad);
6775                 free(bad);
6776         }
6777
6778         if (info->quota_enabled) {
6779                 int err;
6780                 fprintf(stderr, "checking quota groups\n");
6781                 err = qgroup_verify_all(info);
6782                 if (err)
6783                         goto out;
6784         }
6785
6786         if (!list_empty(&root->fs_info->recow_ebs)) {
6787                 fprintf(stderr, "Transid errors in file system\n");
6788                 ret = 1;
6789         }
6790 out:
6791         print_qgroup_report(0);
6792         if (found_old_backref) { /*
6793                  * there was a disk format change when mixed
6794                  * backref was in testing tree. The old format
6795                  * existed about one week.
6796                  */
6797                 printf("\n * Found old mixed backref format. "
6798                        "The old format is not supported! *"
6799                        "\n * Please mount the FS in readonly mode, "
6800                        "backup data and re-format the FS. *\n\n");
6801                 ret = 1;
6802         }
6803         printf("found %llu bytes used err is %d\n",
6804                (unsigned long long)bytes_used, ret);
6805         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
6806         printf("total tree bytes: %llu\n",
6807                (unsigned long long)total_btree_bytes);
6808         printf("total fs tree bytes: %llu\n",
6809                (unsigned long long)total_fs_tree_bytes);
6810         printf("total extent tree bytes: %llu\n",
6811                (unsigned long long)total_extent_tree_bytes);
6812         printf("btree space waste bytes: %llu\n",
6813                (unsigned long long)btree_space_waste);
6814         printf("file data blocks allocated: %llu\n referenced %llu\n",
6815                 (unsigned long long)data_bytes_allocated,
6816                 (unsigned long long)data_bytes_referenced);
6817         printf("%s\n", BTRFS_BUILD_VERSION);
6818
6819         free_root_recs_tree(&root_cache);
6820 close_out:
6821         close_ctree(root);
6822 err_out:
6823         return ret;
6824 }