btrfs-progs: fsck: clear out log tree in repair mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 struct tree_backref {
84         struct extent_backref node;
85         union {
86                 u64 parent;
87                 u64 root;
88         };
89 };
90
91 struct extent_record {
92         struct list_head backrefs;
93         struct list_head dups;
94         struct list_head list;
95         struct cache_extent cache;
96         struct btrfs_disk_key parent_key;
97         u64 start;
98         u64 max_size;
99         u64 nr;
100         u64 refs;
101         u64 extent_item_refs;
102         u64 generation;
103         u64 parent_generation;
104         u64 info_objectid;
105         u32 num_duplicates;
106         u8 info_level;
107         unsigned int found_rec:1;
108         unsigned int content_checked:1;
109         unsigned int owner_ref_checked:1;
110         unsigned int is_root:1;
111         unsigned int metadata:1;
112 };
113
114 struct inode_backref {
115         struct list_head list;
116         unsigned int found_dir_item:1;
117         unsigned int found_dir_index:1;
118         unsigned int found_inode_ref:1;
119         unsigned int filetype:8;
120         int errors;
121         unsigned int ref_type;
122         u64 dir;
123         u64 index;
124         u16 namelen;
125         char name[0];
126 };
127
128 struct dropping_root_item_record {
129         struct list_head list;
130         struct btrfs_root_item ri;
131         struct btrfs_key found_key;
132 };
133
134 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
135 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
136 #define REF_ERR_NO_INODE_REF            (1 << 2)
137 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
138 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
139 #define REF_ERR_DUP_INODE_REF           (1 << 5)
140 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
141 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
142 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
143 #define REF_ERR_NO_ROOT_REF             (1 << 9)
144 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
145 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
146 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
147
148 struct inode_record {
149         struct list_head backrefs;
150         unsigned int checked:1;
151         unsigned int merging:1;
152         unsigned int found_inode_item:1;
153         unsigned int found_dir_item:1;
154         unsigned int found_file_extent:1;
155         unsigned int found_csum_item:1;
156         unsigned int some_csum_missing:1;
157         unsigned int nodatasum:1;
158         int errors;
159
160         u64 ino;
161         u32 nlink;
162         u32 imode;
163         u64 isize;
164         u64 nbytes;
165
166         u32 found_link;
167         u64 found_size;
168         u64 extent_start;
169         u64 extent_end;
170         u64 first_extent_gap;
171
172         u32 refs;
173 };
174
175 #define I_ERR_NO_INODE_ITEM             (1 << 0)
176 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
177 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
178 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
179 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
180 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
181 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
182 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
183 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
184 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
185 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
186 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
187 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
188 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
189
190 struct root_backref {
191         struct list_head list;
192         unsigned int found_dir_item:1;
193         unsigned int found_dir_index:1;
194         unsigned int found_back_ref:1;
195         unsigned int found_forward_ref:1;
196         unsigned int reachable:1;
197         int errors;
198         u64 ref_root;
199         u64 dir;
200         u64 index;
201         u16 namelen;
202         char name[0];
203 };
204
205 struct root_record {
206         struct list_head backrefs;
207         struct cache_extent cache;
208         unsigned int found_root_item:1;
209         u64 objectid;
210         u32 found_ref;
211 };
212
213 struct ptr_node {
214         struct cache_extent cache;
215         void *data;
216 };
217
218 struct shared_node {
219         struct cache_extent cache;
220         struct cache_tree root_cache;
221         struct cache_tree inode_cache;
222         struct inode_record *current;
223         u32 refs;
224 };
225
226 struct block_info {
227         u64 start;
228         u32 size;
229 };
230
231 struct walk_control {
232         struct cache_tree shared;
233         struct shared_node *nodes[BTRFS_MAX_LEVEL];
234         int active_node;
235         int root_level;
236 };
237
238 struct bad_item {
239         struct btrfs_key key;
240         u64 root_id;
241         struct list_head list;
242 };
243
244 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
245
246 static void record_root_in_trans(struct btrfs_trans_handle *trans,
247                                  struct btrfs_root *root)
248 {
249         if (root->last_trans != trans->transid) {
250                 root->track_dirty = 1;
251                 root->last_trans = trans->transid;
252                 root->commit_root = root->node;
253                 extent_buffer_get(root->node);
254         }
255 }
256
257 static u8 imode_to_type(u32 imode)
258 {
259 #define S_SHIFT 12
260         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
261                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
262                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
263                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
264                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
265                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
266                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
267                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
268         };
269
270         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
271 #undef S_SHIFT
272 }
273
274 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
275 {
276         struct device_record *rec1;
277         struct device_record *rec2;
278
279         rec1 = rb_entry(node1, struct device_record, node);
280         rec2 = rb_entry(node2, struct device_record, node);
281         if (rec1->devid > rec2->devid)
282                 return -1;
283         else if (rec1->devid < rec2->devid)
284                 return 1;
285         else
286                 return 0;
287 }
288
289 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
290 {
291         struct inode_record *rec;
292         struct inode_backref *backref;
293         struct inode_backref *orig;
294         size_t size;
295
296         rec = malloc(sizeof(*rec));
297         memcpy(rec, orig_rec, sizeof(*rec));
298         rec->refs = 1;
299         INIT_LIST_HEAD(&rec->backrefs);
300
301         list_for_each_entry(orig, &orig_rec->backrefs, list) {
302                 size = sizeof(*orig) + orig->namelen + 1;
303                 backref = malloc(size);
304                 memcpy(backref, orig, size);
305                 list_add_tail(&backref->list, &rec->backrefs);
306         }
307         return rec;
308 }
309
310 static void print_inode_error(int errors)
311 {
312         if (errors & I_ERR_NO_INODE_ITEM)
313                 fprintf(stderr, ", no inode item");
314         if (errors & I_ERR_NO_ORPHAN_ITEM)
315                 fprintf(stderr, ", no orphan item");
316         if (errors & I_ERR_DUP_INODE_ITEM)
317                 fprintf(stderr, ", dup inode item");
318         if (errors & I_ERR_DUP_DIR_INDEX)
319                 fprintf(stderr, ", dup dir index");
320         if (errors & I_ERR_ODD_DIR_ITEM)
321                 fprintf(stderr, ", odd dir item");
322         if (errors & I_ERR_ODD_FILE_EXTENT)
323                 fprintf(stderr, ", odd file extent");
324         if (errors & I_ERR_BAD_FILE_EXTENT)
325                 fprintf(stderr, ", bad file extent");
326         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
327                 fprintf(stderr, ", file extent overlap");
328         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
329                 fprintf(stderr, ", file extent discount");
330         if (errors & I_ERR_DIR_ISIZE_WRONG)
331                 fprintf(stderr, ", dir isize wrong");
332         if (errors & I_ERR_FILE_NBYTES_WRONG)
333                 fprintf(stderr, ", nbytes wrong");
334         if (errors & I_ERR_ODD_CSUM_ITEM)
335                 fprintf(stderr, ", odd csum item");
336         if (errors & I_ERR_SOME_CSUM_MISSING)
337                 fprintf(stderr, ", some csum missing");
338         if (errors & I_ERR_LINK_COUNT_WRONG)
339                 fprintf(stderr, ", link count wrong");
340         fprintf(stderr, "\n");
341 }
342
343 static void print_ref_error(int errors)
344 {
345         if (errors & REF_ERR_NO_DIR_ITEM)
346                 fprintf(stderr, ", no dir item");
347         if (errors & REF_ERR_NO_DIR_INDEX)
348                 fprintf(stderr, ", no dir index");
349         if (errors & REF_ERR_NO_INODE_REF)
350                 fprintf(stderr, ", no inode ref");
351         if (errors & REF_ERR_DUP_DIR_ITEM)
352                 fprintf(stderr, ", dup dir item");
353         if (errors & REF_ERR_DUP_DIR_INDEX)
354                 fprintf(stderr, ", dup dir index");
355         if (errors & REF_ERR_DUP_INODE_REF)
356                 fprintf(stderr, ", dup inode ref");
357         if (errors & REF_ERR_INDEX_UNMATCH)
358                 fprintf(stderr, ", index unmatch");
359         if (errors & REF_ERR_FILETYPE_UNMATCH)
360                 fprintf(stderr, ", filetype unmatch");
361         if (errors & REF_ERR_NAME_TOO_LONG)
362                 fprintf(stderr, ", name too long");
363         if (errors & REF_ERR_NO_ROOT_REF)
364                 fprintf(stderr, ", no root ref");
365         if (errors & REF_ERR_NO_ROOT_BACKREF)
366                 fprintf(stderr, ", no root backref");
367         if (errors & REF_ERR_DUP_ROOT_REF)
368                 fprintf(stderr, ", dup root ref");
369         if (errors & REF_ERR_DUP_ROOT_BACKREF)
370                 fprintf(stderr, ", dup root backref");
371         fprintf(stderr, "\n");
372 }
373
374 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
375                                           u64 ino, int mod)
376 {
377         struct ptr_node *node;
378         struct cache_extent *cache;
379         struct inode_record *rec = NULL;
380         int ret;
381
382         cache = lookup_cache_extent(inode_cache, ino, 1);
383         if (cache) {
384                 node = container_of(cache, struct ptr_node, cache);
385                 rec = node->data;
386                 if (mod && rec->refs > 1) {
387                         node->data = clone_inode_rec(rec);
388                         rec->refs--;
389                         rec = node->data;
390                 }
391         } else if (mod) {
392                 rec = calloc(1, sizeof(*rec));
393                 rec->ino = ino;
394                 rec->extent_start = (u64)-1;
395                 rec->first_extent_gap = (u64)-1;
396                 rec->refs = 1;
397                 INIT_LIST_HEAD(&rec->backrefs);
398
399                 node = malloc(sizeof(*node));
400                 node->cache.start = ino;
401                 node->cache.size = 1;
402                 node->data = rec;
403
404                 if (ino == BTRFS_FREE_INO_OBJECTID)
405                         rec->found_link = 1;
406
407                 ret = insert_cache_extent(inode_cache, &node->cache);
408                 BUG_ON(ret);
409         }
410         return rec;
411 }
412
413 static void free_inode_rec(struct inode_record *rec)
414 {
415         struct inode_backref *backref;
416
417         if (--rec->refs > 0)
418                 return;
419
420         while (!list_empty(&rec->backrefs)) {
421                 backref = list_entry(rec->backrefs.next,
422                                      struct inode_backref, list);
423                 list_del(&backref->list);
424                 free(backref);
425         }
426         free(rec);
427 }
428
429 static int can_free_inode_rec(struct inode_record *rec)
430 {
431         if (!rec->errors && rec->checked && rec->found_inode_item &&
432             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
433                 return 1;
434         return 0;
435 }
436
437 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
438                                  struct inode_record *rec)
439 {
440         struct cache_extent *cache;
441         struct inode_backref *tmp, *backref;
442         struct ptr_node *node;
443         unsigned char filetype;
444
445         if (!rec->found_inode_item)
446                 return;
447
448         filetype = imode_to_type(rec->imode);
449         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
450                 if (backref->found_dir_item && backref->found_dir_index) {
451                         if (backref->filetype != filetype)
452                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
453                         if (!backref->errors && backref->found_inode_ref) {
454                                 list_del(&backref->list);
455                                 free(backref);
456                         }
457                 }
458         }
459
460         if (!rec->checked || rec->merging)
461                 return;
462
463         if (S_ISDIR(rec->imode)) {
464                 if (rec->found_size != rec->isize)
465                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
466                 if (rec->found_file_extent)
467                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
468         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
469                 if (rec->found_dir_item)
470                         rec->errors |= I_ERR_ODD_DIR_ITEM;
471                 if (rec->found_size != rec->nbytes)
472                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
473                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
474                         rec->first_extent_gap = 0;
475                 if (rec->nlink > 0 && !no_holes &&
476                     (rec->extent_end < rec->isize ||
477                      rec->first_extent_gap < rec->isize))
478                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
479         }
480
481         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
482                 if (rec->found_csum_item && rec->nodatasum)
483                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
484                 if (rec->some_csum_missing && !rec->nodatasum)
485                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
486         }
487
488         BUG_ON(rec->refs != 1);
489         if (can_free_inode_rec(rec)) {
490                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
491                 node = container_of(cache, struct ptr_node, cache);
492                 BUG_ON(node->data != rec);
493                 remove_cache_extent(inode_cache, &node->cache);
494                 free(node);
495                 free_inode_rec(rec);
496         }
497 }
498
499 static int check_orphan_item(struct btrfs_root *root, u64 ino)
500 {
501         struct btrfs_path path;
502         struct btrfs_key key;
503         int ret;
504
505         key.objectid = BTRFS_ORPHAN_OBJECTID;
506         key.type = BTRFS_ORPHAN_ITEM_KEY;
507         key.offset = ino;
508
509         btrfs_init_path(&path);
510         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
511         btrfs_release_path(&path);
512         if (ret > 0)
513                 ret = -ENOENT;
514         return ret;
515 }
516
517 static int process_inode_item(struct extent_buffer *eb,
518                               int slot, struct btrfs_key *key,
519                               struct shared_node *active_node)
520 {
521         struct inode_record *rec;
522         struct btrfs_inode_item *item;
523
524         rec = active_node->current;
525         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
526         if (rec->found_inode_item) {
527                 rec->errors |= I_ERR_DUP_INODE_ITEM;
528                 return 1;
529         }
530         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
531         rec->nlink = btrfs_inode_nlink(eb, item);
532         rec->isize = btrfs_inode_size(eb, item);
533         rec->nbytes = btrfs_inode_nbytes(eb, item);
534         rec->imode = btrfs_inode_mode(eb, item);
535         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
536                 rec->nodatasum = 1;
537         rec->found_inode_item = 1;
538         if (rec->nlink == 0)
539                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
540         maybe_free_inode_rec(&active_node->inode_cache, rec);
541         return 0;
542 }
543
544 static struct inode_backref *get_inode_backref(struct inode_record *rec,
545                                                 const char *name,
546                                                 int namelen, u64 dir)
547 {
548         struct inode_backref *backref;
549
550         list_for_each_entry(backref, &rec->backrefs, list) {
551                 if (backref->dir != dir || backref->namelen != namelen)
552                         continue;
553                 if (memcmp(name, backref->name, namelen))
554                         continue;
555                 return backref;
556         }
557
558         backref = malloc(sizeof(*backref) + namelen + 1);
559         memset(backref, 0, sizeof(*backref));
560         backref->dir = dir;
561         backref->namelen = namelen;
562         memcpy(backref->name, name, namelen);
563         backref->name[namelen] = '\0';
564         list_add_tail(&backref->list, &rec->backrefs);
565         return backref;
566 }
567
568 static int add_inode_backref(struct cache_tree *inode_cache,
569                              u64 ino, u64 dir, u64 index,
570                              const char *name, int namelen,
571                              int filetype, int itemtype, int errors)
572 {
573         struct inode_record *rec;
574         struct inode_backref *backref;
575
576         rec = get_inode_rec(inode_cache, ino, 1);
577         backref = get_inode_backref(rec, name, namelen, dir);
578         if (errors)
579                 backref->errors |= errors;
580         if (itemtype == BTRFS_DIR_INDEX_KEY) {
581                 if (backref->found_dir_index)
582                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
583                 if (backref->found_inode_ref && backref->index != index)
584                         backref->errors |= REF_ERR_INDEX_UNMATCH;
585                 if (backref->found_dir_item && backref->filetype != filetype)
586                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
587
588                 backref->index = index;
589                 backref->filetype = filetype;
590                 backref->found_dir_index = 1;
591         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
592                 rec->found_link++;
593                 if (backref->found_dir_item)
594                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
595                 if (backref->found_dir_index && backref->filetype != filetype)
596                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
597
598                 backref->filetype = filetype;
599                 backref->found_dir_item = 1;
600         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
601                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
602                 if (backref->found_inode_ref)
603                         backref->errors |= REF_ERR_DUP_INODE_REF;
604                 if (backref->found_dir_index && backref->index != index)
605                         backref->errors |= REF_ERR_INDEX_UNMATCH;
606
607                 backref->ref_type = itemtype;
608                 backref->index = index;
609                 backref->found_inode_ref = 1;
610         } else {
611                 BUG_ON(1);
612         }
613
614         maybe_free_inode_rec(inode_cache, rec);
615         return 0;
616 }
617
618 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
619                             struct cache_tree *dst_cache)
620 {
621         struct inode_backref *backref;
622         u32 dir_count = 0;
623
624         dst->merging = 1;
625         list_for_each_entry(backref, &src->backrefs, list) {
626                 if (backref->found_dir_index) {
627                         add_inode_backref(dst_cache, dst->ino, backref->dir,
628                                         backref->index, backref->name,
629                                         backref->namelen, backref->filetype,
630                                         BTRFS_DIR_INDEX_KEY, backref->errors);
631                 }
632                 if (backref->found_dir_item) {
633                         dir_count++;
634                         add_inode_backref(dst_cache, dst->ino,
635                                         backref->dir, 0, backref->name,
636                                         backref->namelen, backref->filetype,
637                                         BTRFS_DIR_ITEM_KEY, backref->errors);
638                 }
639                 if (backref->found_inode_ref) {
640                         add_inode_backref(dst_cache, dst->ino,
641                                         backref->dir, backref->index,
642                                         backref->name, backref->namelen, 0,
643                                         backref->ref_type, backref->errors);
644                 }
645         }
646
647         if (src->found_dir_item)
648                 dst->found_dir_item = 1;
649         if (src->found_file_extent)
650                 dst->found_file_extent = 1;
651         if (src->found_csum_item)
652                 dst->found_csum_item = 1;
653         if (src->some_csum_missing)
654                 dst->some_csum_missing = 1;
655         if (dst->first_extent_gap > src->first_extent_gap)
656                 dst->first_extent_gap = src->first_extent_gap;
657
658         BUG_ON(src->found_link < dir_count);
659         dst->found_link += src->found_link - dir_count;
660         dst->found_size += src->found_size;
661         if (src->extent_start != (u64)-1) {
662                 if (dst->extent_start == (u64)-1) {
663                         dst->extent_start = src->extent_start;
664                         dst->extent_end = src->extent_end;
665                 } else {
666                         if (dst->extent_end > src->extent_start)
667                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
668                         else if (dst->extent_end < src->extent_start &&
669                                  dst->extent_end < dst->first_extent_gap)
670                                 dst->first_extent_gap = dst->extent_end;
671                         if (dst->extent_end < src->extent_end)
672                                 dst->extent_end = src->extent_end;
673                 }
674         }
675
676         dst->errors |= src->errors;
677         if (src->found_inode_item) {
678                 if (!dst->found_inode_item) {
679                         dst->nlink = src->nlink;
680                         dst->isize = src->isize;
681                         dst->nbytes = src->nbytes;
682                         dst->imode = src->imode;
683                         dst->nodatasum = src->nodatasum;
684                         dst->found_inode_item = 1;
685                 } else {
686                         dst->errors |= I_ERR_DUP_INODE_ITEM;
687                 }
688         }
689         dst->merging = 0;
690
691         return 0;
692 }
693
694 static int splice_shared_node(struct shared_node *src_node,
695                               struct shared_node *dst_node)
696 {
697         struct cache_extent *cache;
698         struct ptr_node *node, *ins;
699         struct cache_tree *src, *dst;
700         struct inode_record *rec, *conflict;
701         u64 current_ino = 0;
702         int splice = 0;
703         int ret;
704
705         if (--src_node->refs == 0)
706                 splice = 1;
707         if (src_node->current)
708                 current_ino = src_node->current->ino;
709
710         src = &src_node->root_cache;
711         dst = &dst_node->root_cache;
712 again:
713         cache = search_cache_extent(src, 0);
714         while (cache) {
715                 node = container_of(cache, struct ptr_node, cache);
716                 rec = node->data;
717                 cache = next_cache_extent(cache);
718
719                 if (splice) {
720                         remove_cache_extent(src, &node->cache);
721                         ins = node;
722                 } else {
723                         ins = malloc(sizeof(*ins));
724                         ins->cache.start = node->cache.start;
725                         ins->cache.size = node->cache.size;
726                         ins->data = rec;
727                         rec->refs++;
728                 }
729                 ret = insert_cache_extent(dst, &ins->cache);
730                 if (ret == -EEXIST) {
731                         conflict = get_inode_rec(dst, rec->ino, 1);
732                         merge_inode_recs(rec, conflict, dst);
733                         if (rec->checked) {
734                                 conflict->checked = 1;
735                                 if (dst_node->current == conflict)
736                                         dst_node->current = NULL;
737                         }
738                         maybe_free_inode_rec(dst, conflict);
739                         free_inode_rec(rec);
740                         free(ins);
741                 } else {
742                         BUG_ON(ret);
743                 }
744         }
745
746         if (src == &src_node->root_cache) {
747                 src = &src_node->inode_cache;
748                 dst = &dst_node->inode_cache;
749                 goto again;
750         }
751
752         if (current_ino > 0 && (!dst_node->current ||
753             current_ino > dst_node->current->ino)) {
754                 if (dst_node->current) {
755                         dst_node->current->checked = 1;
756                         maybe_free_inode_rec(dst, dst_node->current);
757                 }
758                 dst_node->current = get_inode_rec(dst, current_ino, 1);
759         }
760         return 0;
761 }
762
763 static void free_inode_ptr(struct cache_extent *cache)
764 {
765         struct ptr_node *node;
766         struct inode_record *rec;
767
768         node = container_of(cache, struct ptr_node, cache);
769         rec = node->data;
770         free_inode_rec(rec);
771         free(node);
772 }
773
774 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
775
776 static struct shared_node *find_shared_node(struct cache_tree *shared,
777                                             u64 bytenr)
778 {
779         struct cache_extent *cache;
780         struct shared_node *node;
781
782         cache = lookup_cache_extent(shared, bytenr, 1);
783         if (cache) {
784                 node = container_of(cache, struct shared_node, cache);
785                 return node;
786         }
787         return NULL;
788 }
789
790 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
791 {
792         int ret;
793         struct shared_node *node;
794
795         node = calloc(1, sizeof(*node));
796         node->cache.start = bytenr;
797         node->cache.size = 1;
798         cache_tree_init(&node->root_cache);
799         cache_tree_init(&node->inode_cache);
800         node->refs = refs;
801
802         ret = insert_cache_extent(shared, &node->cache);
803         BUG_ON(ret);
804         return 0;
805 }
806
807 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
808                              struct walk_control *wc, int level)
809 {
810         struct shared_node *node;
811         struct shared_node *dest;
812
813         if (level == wc->active_node)
814                 return 0;
815
816         BUG_ON(wc->active_node <= level);
817         node = find_shared_node(&wc->shared, bytenr);
818         if (!node) {
819                 add_shared_node(&wc->shared, bytenr, refs);
820                 node = find_shared_node(&wc->shared, bytenr);
821                 wc->nodes[level] = node;
822                 wc->active_node = level;
823                 return 0;
824         }
825
826         if (wc->root_level == wc->active_node &&
827             btrfs_root_refs(&root->root_item) == 0) {
828                 if (--node->refs == 0) {
829                         free_inode_recs_tree(&node->root_cache);
830                         free_inode_recs_tree(&node->inode_cache);
831                         remove_cache_extent(&wc->shared, &node->cache);
832                         free(node);
833                 }
834                 return 1;
835         }
836
837         dest = wc->nodes[wc->active_node];
838         splice_shared_node(node, dest);
839         if (node->refs == 0) {
840                 remove_cache_extent(&wc->shared, &node->cache);
841                 free(node);
842         }
843         return 1;
844 }
845
846 static int leave_shared_node(struct btrfs_root *root,
847                              struct walk_control *wc, int level)
848 {
849         struct shared_node *node;
850         struct shared_node *dest;
851         int i;
852
853         if (level == wc->root_level)
854                 return 0;
855
856         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
857                 if (wc->nodes[i])
858                         break;
859         }
860         BUG_ON(i >= BTRFS_MAX_LEVEL);
861
862         node = wc->nodes[wc->active_node];
863         wc->nodes[wc->active_node] = NULL;
864         wc->active_node = i;
865
866         dest = wc->nodes[wc->active_node];
867         if (wc->active_node < wc->root_level ||
868             btrfs_root_refs(&root->root_item) > 0) {
869                 BUG_ON(node->refs <= 1);
870                 splice_shared_node(node, dest);
871         } else {
872                 BUG_ON(node->refs < 2);
873                 node->refs--;
874         }
875         return 0;
876 }
877
878 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
879                          u64 child_root_id)
880 {
881         struct btrfs_path path;
882         struct btrfs_key key;
883         struct extent_buffer *leaf;
884         int has_parent = 0;
885         int ret;
886
887         btrfs_init_path(&path);
888
889         key.objectid = parent_root_id;
890         key.type = BTRFS_ROOT_REF_KEY;
891         key.offset = child_root_id;
892         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
893                                 0, 0);
894         BUG_ON(ret < 0);
895         btrfs_release_path(&path);
896         if (!ret)
897                 return 1;
898
899         key.objectid = child_root_id;
900         key.type = BTRFS_ROOT_BACKREF_KEY;
901         key.offset = 0;
902         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
903                                 0, 0);
904         BUG_ON(ret <= 0);
905
906         while (1) {
907                 leaf = path.nodes[0];
908                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
909                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
910                         BUG_ON(ret < 0);
911
912                         if (ret > 0)
913                                 break;
914                         leaf = path.nodes[0];
915                 }
916
917                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
918                 if (key.objectid != child_root_id ||
919                     key.type != BTRFS_ROOT_BACKREF_KEY)
920                         break;
921
922                 has_parent = 1;
923
924                 if (key.offset == parent_root_id) {
925                         btrfs_release_path(&path);
926                         return 1;
927                 }
928
929                 path.slots[0]++;
930         }
931
932         btrfs_release_path(&path);
933         return has_parent? 0 : -1;
934 }
935
936 static int process_dir_item(struct btrfs_root *root,
937                             struct extent_buffer *eb,
938                             int slot, struct btrfs_key *key,
939                             struct shared_node *active_node)
940 {
941         u32 total;
942         u32 cur = 0;
943         u32 len;
944         u32 name_len;
945         u32 data_len;
946         int error;
947         int nritems = 0;
948         int filetype;
949         struct btrfs_dir_item *di;
950         struct inode_record *rec;
951         struct cache_tree *root_cache;
952         struct cache_tree *inode_cache;
953         struct btrfs_key location;
954         char namebuf[BTRFS_NAME_LEN];
955
956         root_cache = &active_node->root_cache;
957         inode_cache = &active_node->inode_cache;
958         rec = active_node->current;
959         rec->found_dir_item = 1;
960
961         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
962         total = btrfs_item_size_nr(eb, slot);
963         while (cur < total) {
964                 nritems++;
965                 btrfs_dir_item_key_to_cpu(eb, di, &location);
966                 name_len = btrfs_dir_name_len(eb, di);
967                 data_len = btrfs_dir_data_len(eb, di);
968                 filetype = btrfs_dir_type(eb, di);
969
970                 rec->found_size += name_len;
971                 if (name_len <= BTRFS_NAME_LEN) {
972                         len = name_len;
973                         error = 0;
974                 } else {
975                         len = BTRFS_NAME_LEN;
976                         error = REF_ERR_NAME_TOO_LONG;
977                 }
978                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
979
980                 if (location.type == BTRFS_INODE_ITEM_KEY) {
981                         add_inode_backref(inode_cache, location.objectid,
982                                           key->objectid, key->offset, namebuf,
983                                           len, filetype, key->type, error);
984                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
985                         add_inode_backref(root_cache, location.objectid,
986                                           key->objectid, key->offset,
987                                           namebuf, len, filetype,
988                                           key->type, error);
989                 } else {
990                         fprintf(stderr, "warning line %d\n", __LINE__);
991                 }
992
993                 len = sizeof(*di) + name_len + data_len;
994                 di = (struct btrfs_dir_item *)((char *)di + len);
995                 cur += len;
996         }
997         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
998                 rec->errors |= I_ERR_DUP_DIR_INDEX;
999
1000         return 0;
1001 }
1002
1003 static int process_inode_ref(struct extent_buffer *eb,
1004                              int slot, struct btrfs_key *key,
1005                              struct shared_node *active_node)
1006 {
1007         u32 total;
1008         u32 cur = 0;
1009         u32 len;
1010         u32 name_len;
1011         u64 index;
1012         int error;
1013         struct cache_tree *inode_cache;
1014         struct btrfs_inode_ref *ref;
1015         char namebuf[BTRFS_NAME_LEN];
1016
1017         inode_cache = &active_node->inode_cache;
1018
1019         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1020         total = btrfs_item_size_nr(eb, slot);
1021         while (cur < total) {
1022                 name_len = btrfs_inode_ref_name_len(eb, ref);
1023                 index = btrfs_inode_ref_index(eb, ref);
1024                 if (name_len <= BTRFS_NAME_LEN) {
1025                         len = name_len;
1026                         error = 0;
1027                 } else {
1028                         len = BTRFS_NAME_LEN;
1029                         error = REF_ERR_NAME_TOO_LONG;
1030                 }
1031                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1032                 add_inode_backref(inode_cache, key->objectid, key->offset,
1033                                   index, namebuf, len, 0, key->type, error);
1034
1035                 len = sizeof(*ref) + name_len;
1036                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1037                 cur += len;
1038         }
1039         return 0;
1040 }
1041
1042 static int process_inode_extref(struct extent_buffer *eb,
1043                                 int slot, struct btrfs_key *key,
1044                                 struct shared_node *active_node)
1045 {
1046         u32 total;
1047         u32 cur = 0;
1048         u32 len;
1049         u32 name_len;
1050         u64 index;
1051         u64 parent;
1052         int error;
1053         struct cache_tree *inode_cache;
1054         struct btrfs_inode_extref *extref;
1055         char namebuf[BTRFS_NAME_LEN];
1056
1057         inode_cache = &active_node->inode_cache;
1058
1059         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1060         total = btrfs_item_size_nr(eb, slot);
1061         while (cur < total) {
1062                 name_len = btrfs_inode_extref_name_len(eb, extref);
1063                 index = btrfs_inode_extref_index(eb, extref);
1064                 parent = btrfs_inode_extref_parent(eb, extref);
1065                 if (name_len <= BTRFS_NAME_LEN) {
1066                         len = name_len;
1067                         error = 0;
1068                 } else {
1069                         len = BTRFS_NAME_LEN;
1070                         error = REF_ERR_NAME_TOO_LONG;
1071                 }
1072                 read_extent_buffer(eb, namebuf,
1073                                    (unsigned long)(extref + 1), len);
1074                 add_inode_backref(inode_cache, key->objectid, parent,
1075                                   index, namebuf, len, 0, key->type, error);
1076
1077                 len = sizeof(*extref) + name_len;
1078                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1079                 cur += len;
1080         }
1081         return 0;
1082
1083 }
1084
1085 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
1086 {
1087         struct btrfs_key key;
1088         struct btrfs_path path;
1089         struct extent_buffer *leaf;
1090         int ret ;
1091         size_t size;
1092         u64 found = 0;
1093         u64 csum_end;
1094         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1095
1096         btrfs_init_path(&path);
1097
1098         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1099         key.offset = start;
1100         key.type = BTRFS_EXTENT_CSUM_KEY;
1101
1102         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1103                                 &key, &path, 0, 0);
1104         BUG_ON(ret < 0);
1105         if (ret > 0 && path.slots[0] > 0) {
1106                 leaf = path.nodes[0];
1107                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1108                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1109                     key.type == BTRFS_EXTENT_CSUM_KEY)
1110                         path.slots[0]--;
1111         }
1112
1113         while (len > 0) {
1114                 leaf = path.nodes[0];
1115                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1116                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1117                         BUG_ON(ret < 0);
1118                         if (ret > 0)
1119                                 break;
1120                         leaf = path.nodes[0];
1121                 }
1122
1123                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1124                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1125                     key.type != BTRFS_EXTENT_CSUM_KEY)
1126                         break;
1127
1128                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1129                 if (key.offset >= start + len)
1130                         break;
1131
1132                 if (key.offset > start)
1133                         start = key.offset;
1134
1135                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1136                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1137                 if (csum_end > start) {
1138                         size = min(csum_end - start, len);
1139                         len -= size;
1140                         start += size;
1141                         found += size;
1142                 }
1143
1144                 path.slots[0]++;
1145         }
1146         btrfs_release_path(&path);
1147         return found;
1148 }
1149
1150 static int process_file_extent(struct btrfs_root *root,
1151                                 struct extent_buffer *eb,
1152                                 int slot, struct btrfs_key *key,
1153                                 struct shared_node *active_node)
1154 {
1155         struct inode_record *rec;
1156         struct btrfs_file_extent_item *fi;
1157         u64 num_bytes = 0;
1158         u64 disk_bytenr = 0;
1159         u64 extent_offset = 0;
1160         u64 mask = root->sectorsize - 1;
1161         int extent_type;
1162
1163         rec = active_node->current;
1164         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1165         rec->found_file_extent = 1;
1166
1167         if (rec->extent_start == (u64)-1) {
1168                 rec->extent_start = key->offset;
1169                 rec->extent_end = key->offset;
1170         }
1171
1172         if (rec->extent_end > key->offset)
1173                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174         else if (rec->extent_end < key->offset &&
1175                  rec->extent_end < rec->first_extent_gap)
1176                 rec->first_extent_gap = rec->extent_end;
1177
1178         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1179         extent_type = btrfs_file_extent_type(eb, fi);
1180
1181         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1182                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1183                 if (num_bytes == 0)
1184                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1185                 rec->found_size += num_bytes;
1186                 num_bytes = (num_bytes + mask) & ~mask;
1187         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1188                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1189                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1190                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1191                 extent_offset = btrfs_file_extent_offset(eb, fi);
1192                 if (num_bytes == 0 || (num_bytes & mask))
1193                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1194                 if (num_bytes + extent_offset >
1195                     btrfs_file_extent_ram_bytes(eb, fi))
1196                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1197                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1198                     (btrfs_file_extent_compression(eb, fi) ||
1199                      btrfs_file_extent_encryption(eb, fi) ||
1200                      btrfs_file_extent_other_encoding(eb, fi)))
1201                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1202                 if (disk_bytenr > 0)
1203                         rec->found_size += num_bytes;
1204         } else {
1205                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1206         }
1207         rec->extent_end = key->offset + num_bytes;
1208
1209         if (disk_bytenr > 0) {
1210                 u64 found;
1211                 if (btrfs_file_extent_compression(eb, fi))
1212                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1213                 else
1214                         disk_bytenr += extent_offset;
1215
1216                 found = count_csum_range(root, disk_bytenr, num_bytes);
1217                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1218                         if (found > 0)
1219                                 rec->found_csum_item = 1;
1220                         if (found < num_bytes)
1221                                 rec->some_csum_missing = 1;
1222                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1223                         if (found > 0)
1224                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1225                 }
1226         }
1227         return 0;
1228 }
1229
1230 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1231                             struct walk_control *wc)
1232 {
1233         struct btrfs_key key;
1234         u32 nritems;
1235         int i;
1236         int ret = 0;
1237         int error = 0;
1238         struct cache_tree *inode_cache;
1239         struct shared_node *active_node;
1240
1241         if (wc->root_level == wc->active_node &&
1242             btrfs_root_refs(&root->root_item) == 0)
1243                 return 0;
1244
1245         active_node = wc->nodes[wc->active_node];
1246         inode_cache = &active_node->inode_cache;
1247         nritems = btrfs_header_nritems(eb);
1248         for (i = 0; i < nritems; i++) {
1249                 btrfs_item_key_to_cpu(eb, &key, i);
1250
1251                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1252                         continue;
1253                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1254                         continue;
1255
1256                 if (active_node->current == NULL ||
1257                     active_node->current->ino < key.objectid) {
1258                         if (active_node->current) {
1259                                 active_node->current->checked = 1;
1260                                 maybe_free_inode_rec(inode_cache,
1261                                                      active_node->current);
1262                         }
1263                         active_node->current = get_inode_rec(inode_cache,
1264                                                              key.objectid, 1);
1265                 }
1266                 switch (key.type) {
1267                 case BTRFS_DIR_ITEM_KEY:
1268                 case BTRFS_DIR_INDEX_KEY:
1269                         ret = process_dir_item(root, eb, i, &key, active_node);
1270                         break;
1271                 case BTRFS_INODE_REF_KEY:
1272                         ret = process_inode_ref(eb, i, &key, active_node);
1273                         break;
1274                 case BTRFS_INODE_EXTREF_KEY:
1275                         ret = process_inode_extref(eb, i, &key, active_node);
1276                         break;
1277                 case BTRFS_INODE_ITEM_KEY:
1278                         ret = process_inode_item(eb, i, &key, active_node);
1279                         break;
1280                 case BTRFS_EXTENT_DATA_KEY:
1281                         ret = process_file_extent(root, eb, i, &key,
1282                                                   active_node);
1283                         break;
1284                 default:
1285                         break;
1286                 };
1287                 if (ret != 0)
1288                         error = 1;
1289         }
1290         return error;
1291 }
1292
1293 static void reada_walk_down(struct btrfs_root *root,
1294                             struct extent_buffer *node, int slot)
1295 {
1296         u64 bytenr;
1297         u64 ptr_gen;
1298         u32 nritems;
1299         u32 blocksize;
1300         int i;
1301         int level;
1302
1303         level = btrfs_header_level(node);
1304         if (level != 1)
1305                 return;
1306
1307         nritems = btrfs_header_nritems(node);
1308         blocksize = btrfs_level_size(root, level - 1);
1309         for (i = slot; i < nritems; i++) {
1310                 bytenr = btrfs_node_blockptr(node, i);
1311                 ptr_gen = btrfs_node_ptr_generation(node, i);
1312                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1313         }
1314 }
1315
1316 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1317                           struct walk_control *wc, int *level)
1318 {
1319         u64 bytenr;
1320         u64 ptr_gen;
1321         struct extent_buffer *next;
1322         struct extent_buffer *cur;
1323         u32 blocksize;
1324         int ret, err = 0;
1325         u64 refs;
1326
1327         WARN_ON(*level < 0);
1328         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1329         ret = btrfs_lookup_extent_info(NULL, root,
1330                                        path->nodes[*level]->start,
1331                                        *level, 1, &refs, NULL);
1332         if (ret < 0) {
1333                 err = ret;
1334                 goto out;
1335         }
1336
1337         if (refs > 1) {
1338                 ret = enter_shared_node(root, path->nodes[*level]->start,
1339                                         refs, wc, *level);
1340                 if (ret > 0) {
1341                         err = ret;
1342                         goto out;
1343                 }
1344         }
1345
1346         while (*level >= 0) {
1347                 WARN_ON(*level < 0);
1348                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1349                 cur = path->nodes[*level];
1350
1351                 if (btrfs_header_level(cur) != *level)
1352                         WARN_ON(1);
1353
1354                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1355                         break;
1356                 if (*level == 0) {
1357                         ret = process_one_leaf(root, cur, wc);
1358                         break;
1359                 }
1360                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1361                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1362                 blocksize = btrfs_level_size(root, *level - 1);
1363                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1364                                                1, &refs, NULL);
1365                 if (ret < 0)
1366                         refs = 0;
1367
1368                 if (refs > 1) {
1369                         ret = enter_shared_node(root, bytenr, refs,
1370                                                 wc, *level - 1);
1371                         if (ret > 0) {
1372                                 path->slots[*level]++;
1373                                 continue;
1374                         }
1375                 }
1376
1377                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1378                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1379                         free_extent_buffer(next);
1380                         reada_walk_down(root, cur, path->slots[*level]);
1381                         next = read_tree_block(root, bytenr, blocksize,
1382                                                ptr_gen);
1383                         if (!next) {
1384                                 err = -EIO;
1385                                 goto out;
1386                         }
1387                 }
1388
1389                 *level = *level - 1;
1390                 free_extent_buffer(path->nodes[*level]);
1391                 path->nodes[*level] = next;
1392                 path->slots[*level] = 0;
1393         }
1394 out:
1395         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1396         return err;
1397 }
1398
1399 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1400                         struct walk_control *wc, int *level)
1401 {
1402         int i;
1403         struct extent_buffer *leaf;
1404
1405         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1406                 leaf = path->nodes[i];
1407                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1408                         path->slots[i]++;
1409                         *level = i;
1410                         return 0;
1411                 } else {
1412                         free_extent_buffer(path->nodes[*level]);
1413                         path->nodes[*level] = NULL;
1414                         BUG_ON(*level > wc->active_node);
1415                         if (*level == wc->active_node)
1416                                 leave_shared_node(root, wc, *level);
1417                         *level = i + 1;
1418                 }
1419         }
1420         return 1;
1421 }
1422
1423 static int check_root_dir(struct inode_record *rec)
1424 {
1425         struct inode_backref *backref;
1426         int ret = -1;
1427
1428         if (!rec->found_inode_item || rec->errors)
1429                 goto out;
1430         if (rec->nlink != 1 || rec->found_link != 0)
1431                 goto out;
1432         if (list_empty(&rec->backrefs))
1433                 goto out;
1434         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1435         if (!backref->found_inode_ref)
1436                 goto out;
1437         if (backref->index != 0 || backref->namelen != 2 ||
1438             memcmp(backref->name, "..", 2))
1439                 goto out;
1440         if (backref->found_dir_index || backref->found_dir_item)
1441                 goto out;
1442         ret = 0;
1443 out:
1444         return ret;
1445 }
1446
1447 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1448                               struct btrfs_root *root, struct btrfs_path *path,
1449                               struct inode_record *rec)
1450 {
1451         struct btrfs_inode_item *ei;
1452         struct btrfs_key key;
1453         int ret;
1454
1455         key.objectid = rec->ino;
1456         key.type = BTRFS_INODE_ITEM_KEY;
1457         key.offset = (u64)-1;
1458
1459         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1460         if (ret < 0)
1461                 goto out;
1462         if (ret) {
1463                 if (!path->slots[0]) {
1464                         ret = -ENOENT;
1465                         goto out;
1466                 }
1467                 path->slots[0]--;
1468                 ret = 0;
1469         }
1470         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1471         if (key.objectid != rec->ino) {
1472                 ret = -ENOENT;
1473                 goto out;
1474         }
1475
1476         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1477                             struct btrfs_inode_item);
1478         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1479         btrfs_mark_buffer_dirty(path->nodes[0]);
1480         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1481         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1482                root->root_key.objectid);
1483 out:
1484         btrfs_release_path(path);
1485         return ret;
1486 }
1487
1488 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1489                                     struct btrfs_root *root,
1490                                     struct btrfs_path *path,
1491                                     struct inode_record *rec)
1492 {
1493         struct btrfs_key key;
1494         int ret;
1495
1496         key.objectid = BTRFS_ORPHAN_OBJECTID;
1497         key.type = BTRFS_ORPHAN_ITEM_KEY;
1498         key.offset = rec->ino;
1499
1500         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1501         btrfs_release_path(path);
1502         if (!ret)
1503                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1504         return ret;
1505 }
1506
1507 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1508 {
1509         struct btrfs_trans_handle *trans;
1510         struct btrfs_path *path;
1511         int ret = 0;
1512
1513         /* So far we just fix dir isize wrong */
1514         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1515                 return 1;
1516
1517         path = btrfs_alloc_path();
1518         if (!path)
1519                 return -ENOMEM;
1520
1521         trans = btrfs_start_transaction(root, 1);
1522         if (IS_ERR(trans)) {
1523                 btrfs_free_path(path);
1524                 return PTR_ERR(trans);
1525         }
1526
1527         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1528                 ret = repair_inode_isize(trans, root, path, rec);
1529         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1530                 ret = repair_inode_orphan_item(trans, root, path, rec);
1531         btrfs_commit_transaction(trans, root);
1532         btrfs_free_path(path);
1533         return ret;
1534 }
1535
1536 static int check_inode_recs(struct btrfs_root *root,
1537                             struct cache_tree *inode_cache)
1538 {
1539         struct cache_extent *cache;
1540         struct ptr_node *node;
1541         struct inode_record *rec;
1542         struct inode_backref *backref;
1543         int ret;
1544         u64 error = 0;
1545         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1546
1547         if (btrfs_root_refs(&root->root_item) == 0) {
1548                 if (!cache_tree_empty(inode_cache))
1549                         fprintf(stderr, "warning line %d\n", __LINE__);
1550                 return 0;
1551         }
1552
1553         rec = get_inode_rec(inode_cache, root_dirid, 0);
1554         if (rec) {
1555                 ret = check_root_dir(rec);
1556                 if (ret) {
1557                         fprintf(stderr, "root %llu root dir %llu error\n",
1558                                 (unsigned long long)root->root_key.objectid,
1559                                 (unsigned long long)root_dirid);
1560                         error++;
1561                 }
1562         } else {
1563                 fprintf(stderr, "root %llu root dir %llu not found\n",
1564                         (unsigned long long)root->root_key.objectid,
1565                         (unsigned long long)root_dirid);
1566         }
1567
1568         while (1) {
1569                 cache = search_cache_extent(inode_cache, 0);
1570                 if (!cache)
1571                         break;
1572                 node = container_of(cache, struct ptr_node, cache);
1573                 rec = node->data;
1574                 remove_cache_extent(inode_cache, &node->cache);
1575                 free(node);
1576                 if (rec->ino == root_dirid ||
1577                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1578                         free_inode_rec(rec);
1579                         continue;
1580                 }
1581
1582                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1583                         ret = check_orphan_item(root, rec->ino);
1584                         if (ret == 0)
1585                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1586                         if (can_free_inode_rec(rec)) {
1587                                 free_inode_rec(rec);
1588                                 continue;
1589                         }
1590                 }
1591
1592                 if (repair) {
1593                         ret = try_repair_inode(root, rec);
1594                         if (ret == 0 && can_free_inode_rec(rec)) {
1595                                 free_inode_rec(rec);
1596                                 continue;
1597                         }
1598                         ret = 0;
1599                 }
1600
1601                 error++;
1602                 if (!rec->found_inode_item)
1603                         rec->errors |= I_ERR_NO_INODE_ITEM;
1604                 if (rec->found_link != rec->nlink)
1605                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1606                 fprintf(stderr, "root %llu inode %llu errors %x",
1607                         (unsigned long long) root->root_key.objectid,
1608                         (unsigned long long) rec->ino, rec->errors);
1609                 print_inode_error(rec->errors);
1610                 list_for_each_entry(backref, &rec->backrefs, list) {
1611                         if (!backref->found_dir_item)
1612                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1613                         if (!backref->found_dir_index)
1614                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1615                         if (!backref->found_inode_ref)
1616                                 backref->errors |= REF_ERR_NO_INODE_REF;
1617                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1618                                 " namelen %u name %s filetype %d errors %x",
1619                                 (unsigned long long)backref->dir,
1620                                 (unsigned long long)backref->index,
1621                                 backref->namelen, backref->name,
1622                                 backref->filetype, backref->errors);
1623                         print_ref_error(backref->errors);
1624                 }
1625                 free_inode_rec(rec);
1626         }
1627         return (error > 0) ? -1 : 0;
1628 }
1629
1630 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1631                                         u64 objectid)
1632 {
1633         struct cache_extent *cache;
1634         struct root_record *rec = NULL;
1635         int ret;
1636
1637         cache = lookup_cache_extent(root_cache, objectid, 1);
1638         if (cache) {
1639                 rec = container_of(cache, struct root_record, cache);
1640         } else {
1641                 rec = calloc(1, sizeof(*rec));
1642                 rec->objectid = objectid;
1643                 INIT_LIST_HEAD(&rec->backrefs);
1644                 rec->cache.start = objectid;
1645                 rec->cache.size = 1;
1646
1647                 ret = insert_cache_extent(root_cache, &rec->cache);
1648                 BUG_ON(ret);
1649         }
1650         return rec;
1651 }
1652
1653 static struct root_backref *get_root_backref(struct root_record *rec,
1654                                              u64 ref_root, u64 dir, u64 index,
1655                                              const char *name, int namelen)
1656 {
1657         struct root_backref *backref;
1658
1659         list_for_each_entry(backref, &rec->backrefs, list) {
1660                 if (backref->ref_root != ref_root || backref->dir != dir ||
1661                     backref->namelen != namelen)
1662                         continue;
1663                 if (memcmp(name, backref->name, namelen))
1664                         continue;
1665                 return backref;
1666         }
1667
1668         backref = malloc(sizeof(*backref) + namelen + 1);
1669         memset(backref, 0, sizeof(*backref));
1670         backref->ref_root = ref_root;
1671         backref->dir = dir;
1672         backref->index = index;
1673         backref->namelen = namelen;
1674         memcpy(backref->name, name, namelen);
1675         backref->name[namelen] = '\0';
1676         list_add_tail(&backref->list, &rec->backrefs);
1677         return backref;
1678 }
1679
1680 static void free_root_record(struct cache_extent *cache)
1681 {
1682         struct root_record *rec;
1683         struct root_backref *backref;
1684
1685         rec = container_of(cache, struct root_record, cache);
1686         while (!list_empty(&rec->backrefs)) {
1687                 backref = list_entry(rec->backrefs.next,
1688                                      struct root_backref, list);
1689                 list_del(&backref->list);
1690                 free(backref);
1691         }
1692
1693         kfree(rec);
1694 }
1695
1696 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1697
1698 static int add_root_backref(struct cache_tree *root_cache,
1699                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1700                             const char *name, int namelen,
1701                             int item_type, int errors)
1702 {
1703         struct root_record *rec;
1704         struct root_backref *backref;
1705
1706         rec = get_root_rec(root_cache, root_id);
1707         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1708
1709         backref->errors |= errors;
1710
1711         if (item_type != BTRFS_DIR_ITEM_KEY) {
1712                 if (backref->found_dir_index || backref->found_back_ref ||
1713                     backref->found_forward_ref) {
1714                         if (backref->index != index)
1715                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1716                 } else {
1717                         backref->index = index;
1718                 }
1719         }
1720
1721         if (item_type == BTRFS_DIR_ITEM_KEY) {
1722                 if (backref->found_forward_ref)
1723                         rec->found_ref++;
1724                 backref->found_dir_item = 1;
1725         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1726                 backref->found_dir_index = 1;
1727         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1728                 if (backref->found_forward_ref)
1729                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1730                 else if (backref->found_dir_item)
1731                         rec->found_ref++;
1732                 backref->found_forward_ref = 1;
1733         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1734                 if (backref->found_back_ref)
1735                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1736                 backref->found_back_ref = 1;
1737         } else {
1738                 BUG_ON(1);
1739         }
1740
1741         if (backref->found_forward_ref && backref->found_dir_item)
1742                 backref->reachable = 1;
1743         return 0;
1744 }
1745
1746 static int merge_root_recs(struct btrfs_root *root,
1747                            struct cache_tree *src_cache,
1748                            struct cache_tree *dst_cache)
1749 {
1750         struct cache_extent *cache;
1751         struct ptr_node *node;
1752         struct inode_record *rec;
1753         struct inode_backref *backref;
1754
1755         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1756                 free_inode_recs_tree(src_cache);
1757                 return 0;
1758         }
1759
1760         while (1) {
1761                 cache = search_cache_extent(src_cache, 0);
1762                 if (!cache)
1763                         break;
1764                 node = container_of(cache, struct ptr_node, cache);
1765                 rec = node->data;
1766                 remove_cache_extent(src_cache, &node->cache);
1767                 free(node);
1768
1769                 if (!is_child_root(root, root->objectid, rec->ino))
1770                         goto skip;
1771
1772                 list_for_each_entry(backref, &rec->backrefs, list) {
1773                         BUG_ON(backref->found_inode_ref);
1774                         if (backref->found_dir_item)
1775                                 add_root_backref(dst_cache, rec->ino,
1776                                         root->root_key.objectid, backref->dir,
1777                                         backref->index, backref->name,
1778                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1779                                         backref->errors);
1780                         if (backref->found_dir_index)
1781                                 add_root_backref(dst_cache, rec->ino,
1782                                         root->root_key.objectid, backref->dir,
1783                                         backref->index, backref->name,
1784                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1785                                         backref->errors);
1786                 }
1787 skip:
1788                 free_inode_rec(rec);
1789         }
1790         return 0;
1791 }
1792
1793 static int check_root_refs(struct btrfs_root *root,
1794                            struct cache_tree *root_cache)
1795 {
1796         struct root_record *rec;
1797         struct root_record *ref_root;
1798         struct root_backref *backref;
1799         struct cache_extent *cache;
1800         int loop = 1;
1801         int ret;
1802         int error;
1803         int errors = 0;
1804
1805         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1806         rec->found_ref = 1;
1807
1808         /* fixme: this can not detect circular references */
1809         while (loop) {
1810                 loop = 0;
1811                 cache = search_cache_extent(root_cache, 0);
1812                 while (1) {
1813                         if (!cache)
1814                                 break;
1815                         rec = container_of(cache, struct root_record, cache);
1816                         cache = next_cache_extent(cache);
1817
1818                         if (rec->found_ref == 0)
1819                                 continue;
1820
1821                         list_for_each_entry(backref, &rec->backrefs, list) {
1822                                 if (!backref->reachable)
1823                                         continue;
1824
1825                                 ref_root = get_root_rec(root_cache,
1826                                                         backref->ref_root);
1827                                 if (ref_root->found_ref > 0)
1828                                         continue;
1829
1830                                 backref->reachable = 0;
1831                                 rec->found_ref--;
1832                                 if (rec->found_ref == 0)
1833                                         loop = 1;
1834                         }
1835                 }
1836         }
1837
1838         cache = search_cache_extent(root_cache, 0);
1839         while (1) {
1840                 if (!cache)
1841                         break;
1842                 rec = container_of(cache, struct root_record, cache);
1843                 cache = next_cache_extent(cache);
1844
1845                 if (rec->found_ref == 0 &&
1846                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1847                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1848                         ret = check_orphan_item(root->fs_info->tree_root,
1849                                                 rec->objectid);
1850                         if (ret == 0)
1851                                 continue;
1852
1853                         /*
1854                          * If we don't have a root item then we likely just have
1855                          * a dir item in a snapshot for this root but no actual
1856                          * ref key or anything so it's meaningless.
1857                          */
1858                         if (!rec->found_root_item)
1859                                 continue;
1860                         errors++;
1861                         fprintf(stderr, "fs tree %llu not referenced\n",
1862                                 (unsigned long long)rec->objectid);
1863                 }
1864
1865                 error = 0;
1866                 if (rec->found_ref > 0 && !rec->found_root_item)
1867                         error = 1;
1868                 list_for_each_entry(backref, &rec->backrefs, list) {
1869                         if (!backref->found_dir_item)
1870                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1871                         if (!backref->found_dir_index)
1872                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1873                         if (!backref->found_back_ref)
1874                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1875                         if (!backref->found_forward_ref)
1876                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1877                         if (backref->reachable && backref->errors)
1878                                 error = 1;
1879                 }
1880                 if (!error)
1881                         continue;
1882
1883                 errors++;
1884                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1885                         (unsigned long long)rec->objectid, rec->found_ref,
1886                          rec->found_root_item ? "" : "not found");
1887
1888                 list_for_each_entry(backref, &rec->backrefs, list) {
1889                         if (!backref->reachable)
1890                                 continue;
1891                         if (!backref->errors && rec->found_root_item)
1892                                 continue;
1893                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1894                                 " index %llu namelen %u name %s errors %x\n",
1895                                 (unsigned long long)backref->ref_root,
1896                                 (unsigned long long)backref->dir,
1897                                 (unsigned long long)backref->index,
1898                                 backref->namelen, backref->name,
1899                                 backref->errors);
1900                         print_ref_error(backref->errors);
1901                 }
1902         }
1903         return errors > 0 ? 1 : 0;
1904 }
1905
1906 static int process_root_ref(struct extent_buffer *eb, int slot,
1907                             struct btrfs_key *key,
1908                             struct cache_tree *root_cache)
1909 {
1910         u64 dirid;
1911         u64 index;
1912         u32 len;
1913         u32 name_len;
1914         struct btrfs_root_ref *ref;
1915         char namebuf[BTRFS_NAME_LEN];
1916         int error;
1917
1918         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1919
1920         dirid = btrfs_root_ref_dirid(eb, ref);
1921         index = btrfs_root_ref_sequence(eb, ref);
1922         name_len = btrfs_root_ref_name_len(eb, ref);
1923
1924         if (name_len <= BTRFS_NAME_LEN) {
1925                 len = name_len;
1926                 error = 0;
1927         } else {
1928                 len = BTRFS_NAME_LEN;
1929                 error = REF_ERR_NAME_TOO_LONG;
1930         }
1931         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1932
1933         if (key->type == BTRFS_ROOT_REF_KEY) {
1934                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1935                                  index, namebuf, len, key->type, error);
1936         } else {
1937                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1938                                  index, namebuf, len, key->type, error);
1939         }
1940         return 0;
1941 }
1942
1943 static int check_fs_root(struct btrfs_root *root,
1944                          struct cache_tree *root_cache,
1945                          struct walk_control *wc)
1946 {
1947         int ret = 0;
1948         int wret;
1949         int level;
1950         struct btrfs_path path;
1951         struct shared_node root_node;
1952         struct root_record *rec;
1953         struct btrfs_root_item *root_item = &root->root_item;
1954
1955         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1956                 rec = get_root_rec(root_cache, root->root_key.objectid);
1957                 if (btrfs_root_refs(root_item) > 0)
1958                         rec->found_root_item = 1;
1959         }
1960
1961         btrfs_init_path(&path);
1962         memset(&root_node, 0, sizeof(root_node));
1963         cache_tree_init(&root_node.root_cache);
1964         cache_tree_init(&root_node.inode_cache);
1965
1966         level = btrfs_header_level(root->node);
1967         memset(wc->nodes, 0, sizeof(wc->nodes));
1968         wc->nodes[level] = &root_node;
1969         wc->active_node = level;
1970         wc->root_level = level;
1971
1972         if (btrfs_root_refs(root_item) > 0 ||
1973             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1974                 path.nodes[level] = root->node;
1975                 extent_buffer_get(root->node);
1976                 path.slots[level] = 0;
1977         } else {
1978                 struct btrfs_key key;
1979                 struct btrfs_disk_key found_key;
1980
1981                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1982                 level = root_item->drop_level;
1983                 path.lowest_level = level;
1984                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1985                 BUG_ON(wret < 0);
1986                 btrfs_node_key(path.nodes[level], &found_key,
1987                                 path.slots[level]);
1988                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1989                                         sizeof(found_key)));
1990         }
1991
1992         while (1) {
1993                 wret = walk_down_tree(root, &path, wc, &level);
1994                 if (wret < 0)
1995                         ret = wret;
1996                 if (wret != 0)
1997                         break;
1998
1999                 wret = walk_up_tree(root, &path, wc, &level);
2000                 if (wret < 0)
2001                         ret = wret;
2002                 if (wret != 0)
2003                         break;
2004         }
2005         btrfs_release_path(&path);
2006
2007         merge_root_recs(root, &root_node.root_cache, root_cache);
2008
2009         if (root_node.current) {
2010                 root_node.current->checked = 1;
2011                 maybe_free_inode_rec(&root_node.inode_cache,
2012                                 root_node.current);
2013         }
2014
2015         ret = check_inode_recs(root, &root_node.inode_cache);
2016         return ret;
2017 }
2018
2019 static int fs_root_objectid(u64 objectid)
2020 {
2021         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2022             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2023                 return 1;
2024         return is_fstree(objectid);
2025 }
2026
2027 static int check_fs_roots(struct btrfs_root *root,
2028                           struct cache_tree *root_cache)
2029 {
2030         struct btrfs_path path;
2031         struct btrfs_key key;
2032         struct walk_control wc;
2033         struct extent_buffer *leaf;
2034         struct btrfs_root *tmp_root;
2035         struct btrfs_root *tree_root = root->fs_info->tree_root;
2036         int ret;
2037         int err = 0;
2038
2039         /*
2040          * Just in case we made any changes to the extent tree that weren't
2041          * reflected into the free space cache yet.
2042          */
2043         if (repair)
2044                 reset_cached_block_groups(root->fs_info);
2045         memset(&wc, 0, sizeof(wc));
2046         cache_tree_init(&wc.shared);
2047         btrfs_init_path(&path);
2048
2049         key.offset = 0;
2050         key.objectid = 0;
2051         key.type = BTRFS_ROOT_ITEM_KEY;
2052         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2053         BUG_ON(ret < 0);
2054         while (1) {
2055                 leaf = path.nodes[0];
2056                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2057                         ret = btrfs_next_leaf(tree_root, &path);
2058                         if (ret != 0)
2059                                 break;
2060                         leaf = path.nodes[0];
2061                 }
2062                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2063                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2064                     fs_root_objectid(key.objectid)) {
2065                         key.offset = (u64)-1;
2066                         tmp_root = btrfs_read_fs_root(root->fs_info, &key);
2067                         if (IS_ERR(tmp_root)) {
2068                                 err = 1;
2069                                 goto next;
2070                         }
2071                         ret = check_fs_root(tmp_root, root_cache, &wc);
2072                         if (ret)
2073                                 err = 1;
2074                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2075                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2076                         process_root_ref(leaf, path.slots[0], &key,
2077                                          root_cache);
2078                 }
2079 next:
2080                 path.slots[0]++;
2081         }
2082         btrfs_release_path(&path);
2083
2084         if (!cache_tree_empty(&wc.shared))
2085                 fprintf(stderr, "warning line %d\n", __LINE__);
2086
2087         return err;
2088 }
2089
2090 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2091 {
2092         struct list_head *cur = rec->backrefs.next;
2093         struct extent_backref *back;
2094         struct tree_backref *tback;
2095         struct data_backref *dback;
2096         u64 found = 0;
2097         int err = 0;
2098
2099         while(cur != &rec->backrefs) {
2100                 back = list_entry(cur, struct extent_backref, list);
2101                 cur = cur->next;
2102                 if (!back->found_extent_tree) {
2103                         err = 1;
2104                         if (!print_errs)
2105                                 goto out;
2106                         if (back->is_data) {
2107                                 dback = (struct data_backref *)back;
2108                                 fprintf(stderr, "Backref %llu %s %llu"
2109                                         " owner %llu offset %llu num_refs %lu"
2110                                         " not found in extent tree\n",
2111                                         (unsigned long long)rec->start,
2112                                         back->full_backref ?
2113                                         "parent" : "root",
2114                                         back->full_backref ?
2115                                         (unsigned long long)dback->parent:
2116                                         (unsigned long long)dback->root,
2117                                         (unsigned long long)dback->owner,
2118                                         (unsigned long long)dback->offset,
2119                                         (unsigned long)dback->num_refs);
2120                         } else {
2121                                 tback = (struct tree_backref *)back;
2122                                 fprintf(stderr, "Backref %llu parent %llu"
2123                                         " root %llu not found in extent tree\n",
2124                                         (unsigned long long)rec->start,
2125                                         (unsigned long long)tback->parent,
2126                                         (unsigned long long)tback->root);
2127                         }
2128                 }
2129                 if (!back->is_data && !back->found_ref) {
2130                         err = 1;
2131                         if (!print_errs)
2132                                 goto out;
2133                         tback = (struct tree_backref *)back;
2134                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2135                                 (unsigned long long)rec->start,
2136                                 back->full_backref ? "parent" : "root",
2137                                 back->full_backref ?
2138                                 (unsigned long long)tback->parent :
2139                                 (unsigned long long)tback->root, back);
2140                 }
2141                 if (back->is_data) {
2142                         dback = (struct data_backref *)back;
2143                         if (dback->found_ref != dback->num_refs) {
2144                                 err = 1;
2145                                 if (!print_errs)
2146                                         goto out;
2147                                 fprintf(stderr, "Incorrect local backref count"
2148                                         " on %llu %s %llu owner %llu"
2149                                         " offset %llu found %u wanted %u back %p\n",
2150                                         (unsigned long long)rec->start,
2151                                         back->full_backref ?
2152                                         "parent" : "root",
2153                                         back->full_backref ?
2154                                         (unsigned long long)dback->parent:
2155                                         (unsigned long long)dback->root,
2156                                         (unsigned long long)dback->owner,
2157                                         (unsigned long long)dback->offset,
2158                                         dback->found_ref, dback->num_refs, back);
2159                         }
2160                         if (dback->disk_bytenr != rec->start) {
2161                                 err = 1;
2162                                 if (!print_errs)
2163                                         goto out;
2164                                 fprintf(stderr, "Backref disk bytenr does not"
2165                                         " match extent record, bytenr=%llu, "
2166                                         "ref bytenr=%llu\n",
2167                                         (unsigned long long)rec->start,
2168                                         (unsigned long long)dback->disk_bytenr);
2169                         }
2170
2171                         if (dback->bytes != rec->nr) {
2172                                 err = 1;
2173                                 if (!print_errs)
2174                                         goto out;
2175                                 fprintf(stderr, "Backref bytes do not match "
2176                                         "extent backref, bytenr=%llu, ref "
2177                                         "bytes=%llu, backref bytes=%llu\n",
2178                                         (unsigned long long)rec->start,
2179                                         (unsigned long long)rec->nr,
2180                                         (unsigned long long)dback->bytes);
2181                         }
2182                 }
2183                 if (!back->is_data) {
2184                         found += 1;
2185                 } else {
2186                         dback = (struct data_backref *)back;
2187                         found += dback->found_ref;
2188                 }
2189         }
2190         if (found != rec->refs) {
2191                 err = 1;
2192                 if (!print_errs)
2193                         goto out;
2194                 fprintf(stderr, "Incorrect global backref count "
2195                         "on %llu found %llu wanted %llu\n",
2196                         (unsigned long long)rec->start,
2197                         (unsigned long long)found,
2198                         (unsigned long long)rec->refs);
2199         }
2200 out:
2201         return err;
2202 }
2203
2204 static int free_all_extent_backrefs(struct extent_record *rec)
2205 {
2206         struct extent_backref *back;
2207         struct list_head *cur;
2208         while (!list_empty(&rec->backrefs)) {
2209                 cur = rec->backrefs.next;
2210                 back = list_entry(cur, struct extent_backref, list);
2211                 list_del(cur);
2212                 free(back);
2213         }
2214         return 0;
2215 }
2216
2217 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2218                                      struct cache_tree *extent_cache)
2219 {
2220         struct cache_extent *cache;
2221         struct extent_record *rec;
2222
2223         while (1) {
2224                 cache = first_cache_extent(extent_cache);
2225                 if (!cache)
2226                         break;
2227                 rec = container_of(cache, struct extent_record, cache);
2228                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2229                 remove_cache_extent(extent_cache, cache);
2230                 free_all_extent_backrefs(rec);
2231                 free(rec);
2232         }
2233 }
2234
2235 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2236                                  struct extent_record *rec)
2237 {
2238         if (rec->content_checked && rec->owner_ref_checked &&
2239             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2240             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2241                 remove_cache_extent(extent_cache, &rec->cache);
2242                 free_all_extent_backrefs(rec);
2243                 list_del_init(&rec->list);
2244                 free(rec);
2245         }
2246         return 0;
2247 }
2248
2249 static int check_owner_ref(struct btrfs_root *root,
2250                             struct extent_record *rec,
2251                             struct extent_buffer *buf)
2252 {
2253         struct extent_backref *node;
2254         struct tree_backref *back;
2255         struct btrfs_root *ref_root;
2256         struct btrfs_key key;
2257         struct btrfs_path path;
2258         struct extent_buffer *parent;
2259         int level;
2260         int found = 0;
2261         int ret;
2262
2263         list_for_each_entry(node, &rec->backrefs, list) {
2264                 if (node->is_data)
2265                         continue;
2266                 if (!node->found_ref)
2267                         continue;
2268                 if (node->full_backref)
2269                         continue;
2270                 back = (struct tree_backref *)node;
2271                 if (btrfs_header_owner(buf) == back->root)
2272                         return 0;
2273         }
2274         BUG_ON(rec->is_root);
2275
2276         /* try to find the block by search corresponding fs tree */
2277         key.objectid = btrfs_header_owner(buf);
2278         key.type = BTRFS_ROOT_ITEM_KEY;
2279         key.offset = (u64)-1;
2280
2281         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2282         if (IS_ERR(ref_root))
2283                 return 1;
2284
2285         level = btrfs_header_level(buf);
2286         if (level == 0)
2287                 btrfs_item_key_to_cpu(buf, &key, 0);
2288         else
2289                 btrfs_node_key_to_cpu(buf, &key, 0);
2290
2291         btrfs_init_path(&path);
2292         path.lowest_level = level + 1;
2293         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2294         if (ret < 0)
2295                 return 0;
2296
2297         parent = path.nodes[level + 1];
2298         if (parent && buf->start == btrfs_node_blockptr(parent,
2299                                                         path.slots[level + 1]))
2300                 found = 1;
2301
2302         btrfs_release_path(&path);
2303         return found ? 0 : 1;
2304 }
2305
2306 static int is_extent_tree_record(struct extent_record *rec)
2307 {
2308         struct list_head *cur = rec->backrefs.next;
2309         struct extent_backref *node;
2310         struct tree_backref *back;
2311         int is_extent = 0;
2312
2313         while(cur != &rec->backrefs) {
2314                 node = list_entry(cur, struct extent_backref, list);
2315                 cur = cur->next;
2316                 if (node->is_data)
2317                         return 0;
2318                 back = (struct tree_backref *)node;
2319                 if (node->full_backref)
2320                         return 0;
2321                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2322                         is_extent = 1;
2323         }
2324         return is_extent;
2325 }
2326
2327
2328 static int record_bad_block_io(struct btrfs_fs_info *info,
2329                                struct cache_tree *extent_cache,
2330                                u64 start, u64 len)
2331 {
2332         struct extent_record *rec;
2333         struct cache_extent *cache;
2334         struct btrfs_key key;
2335
2336         cache = lookup_cache_extent(extent_cache, start, len);
2337         if (!cache)
2338                 return 0;
2339
2340         rec = container_of(cache, struct extent_record, cache);
2341         if (!is_extent_tree_record(rec))
2342                 return 0;
2343
2344         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2345         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2346 }
2347
2348 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2349                        struct extent_buffer *buf, int slot)
2350 {
2351         if (btrfs_header_level(buf)) {
2352                 struct btrfs_key_ptr ptr1, ptr2;
2353
2354                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2355                                    sizeof(struct btrfs_key_ptr));
2356                 read_extent_buffer(buf, &ptr2,
2357                                    btrfs_node_key_ptr_offset(slot + 1),
2358                                    sizeof(struct btrfs_key_ptr));
2359                 write_extent_buffer(buf, &ptr1,
2360                                     btrfs_node_key_ptr_offset(slot + 1),
2361                                     sizeof(struct btrfs_key_ptr));
2362                 write_extent_buffer(buf, &ptr2,
2363                                     btrfs_node_key_ptr_offset(slot),
2364                                     sizeof(struct btrfs_key_ptr));
2365                 if (slot == 0) {
2366                         struct btrfs_disk_key key;
2367                         btrfs_node_key(buf, &key, 0);
2368                         btrfs_fixup_low_keys(root, path, &key,
2369                                              btrfs_header_level(buf) + 1);
2370                 }
2371         } else {
2372                 struct btrfs_item *item1, *item2;
2373                 struct btrfs_key k1, k2;
2374                 char *item1_data, *item2_data;
2375                 u32 item1_offset, item2_offset, item1_size, item2_size;
2376
2377                 item1 = btrfs_item_nr(slot);
2378                 item2 = btrfs_item_nr(slot + 1);
2379                 btrfs_item_key_to_cpu(buf, &k1, slot);
2380                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2381                 item1_offset = btrfs_item_offset(buf, item1);
2382                 item2_offset = btrfs_item_offset(buf, item2);
2383                 item1_size = btrfs_item_size(buf, item1);
2384                 item2_size = btrfs_item_size(buf, item2);
2385
2386                 item1_data = malloc(item1_size);
2387                 if (!item1_data)
2388                         return -ENOMEM;
2389                 item2_data = malloc(item2_size);
2390                 if (!item2_data) {
2391                         free(item1_data);
2392                         return -ENOMEM;
2393                 }
2394
2395                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2396                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2397
2398                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2399                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2400                 free(item1_data);
2401                 free(item2_data);
2402
2403                 btrfs_set_item_offset(buf, item1, item2_offset);
2404                 btrfs_set_item_offset(buf, item2, item1_offset);
2405                 btrfs_set_item_size(buf, item1, item2_size);
2406                 btrfs_set_item_size(buf, item2, item1_size);
2407
2408                 path->slots[0] = slot;
2409                 btrfs_set_item_key_unsafe(root, path, &k2);
2410                 path->slots[0] = slot + 1;
2411                 btrfs_set_item_key_unsafe(root, path, &k1);
2412         }
2413         return 0;
2414 }
2415
2416 /*
2417  * Attempt to fix basic block failures.  Currently we only handle bad key
2418  * orders, we will cycle through the keys and swap them if necessary.
2419  */
2420 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2421                                 struct btrfs_root *root,
2422                                 struct extent_buffer *buf,
2423                                 struct btrfs_disk_key *parent_key,
2424                                 enum btrfs_tree_block_status status)
2425 {
2426         struct btrfs_path *path;
2427         struct btrfs_key k1, k2;
2428         int i;
2429         int level;
2430         int ret;
2431
2432         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2433                 return -EIO;
2434
2435         k1.objectid = btrfs_header_owner(buf);
2436         k1.type = BTRFS_ROOT_ITEM_KEY;
2437         k1.offset = (u64)-1;
2438
2439         root = btrfs_read_fs_root(root->fs_info, &k1);
2440         if (IS_ERR(root))
2441                 return -EIO;
2442
2443         record_root_in_trans(trans, root);
2444
2445         path = btrfs_alloc_path();
2446         if (!path)
2447                 return -EIO;
2448
2449         level = btrfs_header_level(buf);
2450         path->lowest_level = level;
2451         path->skip_check_block = 1;
2452         if (level)
2453                 btrfs_node_key_to_cpu(buf, &k1, 0);
2454         else
2455                 btrfs_item_key_to_cpu(buf, &k1, 0);
2456
2457         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2458         if (ret) {
2459                 btrfs_free_path(path);
2460                 return -EIO;
2461         }
2462
2463         buf = path->nodes[level];
2464         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2465                 if (level) {
2466                         btrfs_node_key_to_cpu(buf, &k1, i);
2467                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2468                 } else {
2469                         btrfs_item_key_to_cpu(buf, &k1, i);
2470                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2471                 }
2472                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2473                         continue;
2474                 ret = swap_values(root, path, buf, i);
2475                 if (ret)
2476                         break;
2477                 btrfs_mark_buffer_dirty(buf);
2478                 i = 0;
2479         }
2480
2481         btrfs_free_path(path);
2482         return ret;
2483 }
2484
2485 static int check_block(struct btrfs_trans_handle *trans,
2486                        struct btrfs_root *root,
2487                        struct cache_tree *extent_cache,
2488                        struct extent_buffer *buf, u64 flags)
2489 {
2490         struct extent_record *rec;
2491         struct cache_extent *cache;
2492         struct btrfs_key key;
2493         enum btrfs_tree_block_status status;
2494         int ret = 0;
2495         int level;
2496
2497         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2498         if (!cache)
2499                 return 1;
2500         rec = container_of(cache, struct extent_record, cache);
2501         rec->generation = btrfs_header_generation(buf);
2502
2503         level = btrfs_header_level(buf);
2504         if (btrfs_header_nritems(buf) > 0) {
2505
2506                 if (level == 0)
2507                         btrfs_item_key_to_cpu(buf, &key, 0);
2508                 else
2509                         btrfs_node_key_to_cpu(buf, &key, 0);
2510
2511                 rec->info_objectid = key.objectid;
2512         }
2513         rec->info_level = level;
2514
2515         if (btrfs_is_leaf(buf))
2516                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2517         else
2518                 status = btrfs_check_node(root, &rec->parent_key, buf);
2519
2520         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2521                 if (repair)
2522                         status = try_to_fix_bad_block(trans, root, buf,
2523                                                       &rec->parent_key,
2524                                                       status);
2525                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2526                         ret = -EIO;
2527                         fprintf(stderr, "bad block %llu\n",
2528                                 (unsigned long long)buf->start);
2529                 } else {
2530                         /*
2531                          * Signal to callers we need to start the scan over
2532                          * again since we'll have cow'ed blocks.
2533                          */
2534                         ret = -EAGAIN;
2535                 }
2536         } else {
2537                 rec->content_checked = 1;
2538                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2539                         rec->owner_ref_checked = 1;
2540                 else {
2541                         ret = check_owner_ref(root, rec, buf);
2542                         if (!ret)
2543                                 rec->owner_ref_checked = 1;
2544                 }
2545         }
2546         if (!ret)
2547                 maybe_free_extent_rec(extent_cache, rec);
2548         return ret;
2549 }
2550
2551 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2552                                                 u64 parent, u64 root)
2553 {
2554         struct list_head *cur = rec->backrefs.next;
2555         struct extent_backref *node;
2556         struct tree_backref *back;
2557
2558         while(cur != &rec->backrefs) {
2559                 node = list_entry(cur, struct extent_backref, list);
2560                 cur = cur->next;
2561                 if (node->is_data)
2562                         continue;
2563                 back = (struct tree_backref *)node;
2564                 if (parent > 0) {
2565                         if (!node->full_backref)
2566                                 continue;
2567                         if (parent == back->parent)
2568                                 return back;
2569                 } else {
2570                         if (node->full_backref)
2571                                 continue;
2572                         if (back->root == root)
2573                                 return back;
2574                 }
2575         }
2576         return NULL;
2577 }
2578
2579 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2580                                                 u64 parent, u64 root)
2581 {
2582         struct tree_backref *ref = malloc(sizeof(*ref));
2583         memset(&ref->node, 0, sizeof(ref->node));
2584         if (parent > 0) {
2585                 ref->parent = parent;
2586                 ref->node.full_backref = 1;
2587         } else {
2588                 ref->root = root;
2589                 ref->node.full_backref = 0;
2590         }
2591         list_add_tail(&ref->node.list, &rec->backrefs);
2592
2593         return ref;
2594 }
2595
2596 static struct data_backref *find_data_backref(struct extent_record *rec,
2597                                                 u64 parent, u64 root,
2598                                                 u64 owner, u64 offset,
2599                                                 int found_ref,
2600                                                 u64 disk_bytenr, u64 bytes)
2601 {
2602         struct list_head *cur = rec->backrefs.next;
2603         struct extent_backref *node;
2604         struct data_backref *back;
2605
2606         while(cur != &rec->backrefs) {
2607                 node = list_entry(cur, struct extent_backref, list);
2608                 cur = cur->next;
2609                 if (!node->is_data)
2610                         continue;
2611                 back = (struct data_backref *)node;
2612                 if (parent > 0) {
2613                         if (!node->full_backref)
2614                                 continue;
2615                         if (parent == back->parent)
2616                                 return back;
2617                 } else {
2618                         if (node->full_backref)
2619                                 continue;
2620                         if (back->root == root && back->owner == owner &&
2621                             back->offset == offset) {
2622                                 if (found_ref && node->found_ref &&
2623                                     (back->bytes != bytes ||
2624                                     back->disk_bytenr != disk_bytenr))
2625                                         continue;
2626                                 return back;
2627                         }
2628                 }
2629         }
2630         return NULL;
2631 }
2632
2633 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2634                                                 u64 parent, u64 root,
2635                                                 u64 owner, u64 offset,
2636                                                 u64 max_size)
2637 {
2638         struct data_backref *ref = malloc(sizeof(*ref));
2639         memset(&ref->node, 0, sizeof(ref->node));
2640         ref->node.is_data = 1;
2641
2642         if (parent > 0) {
2643                 ref->parent = parent;
2644                 ref->owner = 0;
2645                 ref->offset = 0;
2646                 ref->node.full_backref = 1;
2647         } else {
2648                 ref->root = root;
2649                 ref->owner = owner;
2650                 ref->offset = offset;
2651                 ref->node.full_backref = 0;
2652         }
2653         ref->bytes = max_size;
2654         ref->found_ref = 0;
2655         ref->num_refs = 0;
2656         list_add_tail(&ref->node.list, &rec->backrefs);
2657         if (max_size > rec->max_size)
2658                 rec->max_size = max_size;
2659         return ref;
2660 }
2661
2662 static int add_extent_rec(struct cache_tree *extent_cache,
2663                           struct btrfs_key *parent_key, u64 parent_gen,
2664                           u64 start, u64 nr, u64 extent_item_refs,
2665                           int is_root, int inc_ref, int set_checked,
2666                           int metadata, int extent_rec, u64 max_size)
2667 {
2668         struct extent_record *rec;
2669         struct cache_extent *cache;
2670         int ret = 0;
2671         int dup = 0;
2672
2673         cache = lookup_cache_extent(extent_cache, start, nr);
2674         if (cache) {
2675                 rec = container_of(cache, struct extent_record, cache);
2676                 if (inc_ref)
2677                         rec->refs++;
2678                 if (rec->nr == 1)
2679                         rec->nr = max(nr, max_size);
2680
2681                 /*
2682                  * We need to make sure to reset nr to whatever the extent
2683                  * record says was the real size, this way we can compare it to
2684                  * the backrefs.
2685                  */
2686                 if (extent_rec) {
2687                         if (start != rec->start || rec->found_rec) {
2688                                 struct extent_record *tmp;
2689
2690                                 dup = 1;
2691                                 if (list_empty(&rec->list))
2692                                         list_add_tail(&rec->list,
2693                                                       &duplicate_extents);
2694
2695                                 /*
2696                                  * We have to do this song and dance in case we
2697                                  * find an extent record that falls inside of
2698                                  * our current extent record but does not have
2699                                  * the same objectid.
2700                                  */
2701                                 tmp = malloc(sizeof(*tmp));
2702                                 if (!tmp)
2703                                         return -ENOMEM;
2704                                 tmp->start = start;
2705                                 tmp->max_size = max_size;
2706                                 tmp->nr = nr;
2707                                 tmp->found_rec = 1;
2708                                 tmp->metadata = metadata;
2709                                 tmp->extent_item_refs = extent_item_refs;
2710                                 INIT_LIST_HEAD(&tmp->list);
2711                                 list_add_tail(&tmp->list, &rec->dups);
2712                                 rec->num_duplicates++;
2713                         } else {
2714                                 rec->nr = nr;
2715                                 rec->found_rec = 1;
2716                         }
2717                 }
2718
2719                 if (extent_item_refs && !dup) {
2720                         if (rec->extent_item_refs) {
2721                                 fprintf(stderr, "block %llu rec "
2722                                         "extent_item_refs %llu, passed %llu\n",
2723                                         (unsigned long long)start,
2724                                         (unsigned long long)
2725                                                         rec->extent_item_refs,
2726                                         (unsigned long long)extent_item_refs);
2727                         }
2728                         rec->extent_item_refs = extent_item_refs;
2729                 }
2730                 if (is_root)
2731                         rec->is_root = 1;
2732                 if (set_checked) {
2733                         rec->content_checked = 1;
2734                         rec->owner_ref_checked = 1;
2735                 }
2736
2737                 if (parent_key)
2738                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2739                 if (parent_gen)
2740                         rec->parent_generation = parent_gen;
2741
2742                 if (rec->max_size < max_size)
2743                         rec->max_size = max_size;
2744
2745                 maybe_free_extent_rec(extent_cache, rec);
2746                 return ret;
2747         }
2748         rec = malloc(sizeof(*rec));
2749         rec->start = start;
2750         rec->max_size = max_size;
2751         rec->nr = max(nr, max_size);
2752         rec->found_rec = !!extent_rec;
2753         rec->content_checked = 0;
2754         rec->owner_ref_checked = 0;
2755         rec->num_duplicates = 0;
2756         rec->metadata = metadata;
2757         INIT_LIST_HEAD(&rec->backrefs);
2758         INIT_LIST_HEAD(&rec->dups);
2759         INIT_LIST_HEAD(&rec->list);
2760
2761         if (is_root)
2762                 rec->is_root = 1;
2763         else
2764                 rec->is_root = 0;
2765
2766         if (inc_ref)
2767                 rec->refs = 1;
2768         else
2769                 rec->refs = 0;
2770
2771         if (extent_item_refs)
2772                 rec->extent_item_refs = extent_item_refs;
2773         else
2774                 rec->extent_item_refs = 0;
2775
2776         if (parent_key)
2777                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2778         else
2779                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2780
2781         if (parent_gen)
2782                 rec->parent_generation = parent_gen;
2783         else
2784                 rec->parent_generation = 0;
2785
2786         rec->cache.start = start;
2787         rec->cache.size = nr;
2788         ret = insert_cache_extent(extent_cache, &rec->cache);
2789         BUG_ON(ret);
2790         bytes_used += nr;
2791         if (set_checked) {
2792                 rec->content_checked = 1;
2793                 rec->owner_ref_checked = 1;
2794         }
2795         return ret;
2796 }
2797
2798 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2799                             u64 parent, u64 root, int found_ref)
2800 {
2801         struct extent_record *rec;
2802         struct tree_backref *back;
2803         struct cache_extent *cache;
2804
2805         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2806         if (!cache) {
2807                 add_extent_rec(extent_cache, NULL, 0, bytenr,
2808                                1, 0, 0, 0, 0, 1, 0, 0);
2809                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2810                 if (!cache)
2811                         abort();
2812         }
2813
2814         rec = container_of(cache, struct extent_record, cache);
2815         if (rec->start != bytenr) {
2816                 abort();
2817         }
2818
2819         back = find_tree_backref(rec, parent, root);
2820         if (!back)
2821                 back = alloc_tree_backref(rec, parent, root);
2822
2823         if (found_ref) {
2824                 if (back->node.found_ref) {
2825                         fprintf(stderr, "Extent back ref already exists "
2826                                 "for %llu parent %llu root %llu \n",
2827                                 (unsigned long long)bytenr,
2828                                 (unsigned long long)parent,
2829                                 (unsigned long long)root);
2830                 }
2831                 back->node.found_ref = 1;
2832         } else {
2833                 if (back->node.found_extent_tree) {
2834                         fprintf(stderr, "Extent back ref already exists "
2835                                 "for %llu parent %llu root %llu \n",
2836                                 (unsigned long long)bytenr,
2837                                 (unsigned long long)parent,
2838                                 (unsigned long long)root);
2839                 }
2840                 back->node.found_extent_tree = 1;
2841         }
2842         maybe_free_extent_rec(extent_cache, rec);
2843         return 0;
2844 }
2845
2846 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2847                             u64 parent, u64 root, u64 owner, u64 offset,
2848                             u32 num_refs, int found_ref, u64 max_size)
2849 {
2850         struct extent_record *rec;
2851         struct data_backref *back;
2852         struct cache_extent *cache;
2853
2854         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2855         if (!cache) {
2856                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
2857                                0, 0, max_size);
2858                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2859                 if (!cache)
2860                         abort();
2861         }
2862
2863         rec = container_of(cache, struct extent_record, cache);
2864         if (rec->max_size < max_size)
2865                 rec->max_size = max_size;
2866
2867         /*
2868          * If found_ref is set then max_size is the real size and must match the
2869          * existing refs.  So if we have already found a ref then we need to
2870          * make sure that this ref matches the existing one, otherwise we need
2871          * to add a new backref so we can notice that the backrefs don't match
2872          * and we need to figure out who is telling the truth.  This is to
2873          * account for that awful fsync bug I introduced where we'd end up with
2874          * a btrfs_file_extent_item that would have its length include multiple
2875          * prealloc extents or point inside of a prealloc extent.
2876          */
2877         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2878                                  bytenr, max_size);
2879         if (!back)
2880                 back = alloc_data_backref(rec, parent, root, owner, offset,
2881                                           max_size);
2882
2883         if (found_ref) {
2884                 BUG_ON(num_refs != 1);
2885                 if (back->node.found_ref)
2886                         BUG_ON(back->bytes != max_size);
2887                 back->node.found_ref = 1;
2888                 back->found_ref += 1;
2889                 back->bytes = max_size;
2890                 back->disk_bytenr = bytenr;
2891                 rec->refs += 1;
2892                 rec->content_checked = 1;
2893                 rec->owner_ref_checked = 1;
2894         } else {
2895                 if (back->node.found_extent_tree) {
2896                         fprintf(stderr, "Extent back ref already exists "
2897                                 "for %llu parent %llu root %llu "
2898                                 "owner %llu offset %llu num_refs %lu\n",
2899                                 (unsigned long long)bytenr,
2900                                 (unsigned long long)parent,
2901                                 (unsigned long long)root,
2902                                 (unsigned long long)owner,
2903                                 (unsigned long long)offset,
2904                                 (unsigned long)num_refs);
2905                 }
2906                 back->num_refs = num_refs;
2907                 back->node.found_extent_tree = 1;
2908         }
2909         maybe_free_extent_rec(extent_cache, rec);
2910         return 0;
2911 }
2912
2913 static int add_pending(struct cache_tree *pending,
2914                        struct cache_tree *seen, u64 bytenr, u32 size)
2915 {
2916         int ret;
2917         ret = add_cache_extent(seen, bytenr, size);
2918         if (ret)
2919                 return ret;
2920         add_cache_extent(pending, bytenr, size);
2921         return 0;
2922 }
2923
2924 static int pick_next_pending(struct cache_tree *pending,
2925                         struct cache_tree *reada,
2926                         struct cache_tree *nodes,
2927                         u64 last, struct block_info *bits, int bits_nr,
2928                         int *reada_bits)
2929 {
2930         unsigned long node_start = last;
2931         struct cache_extent *cache;
2932         int ret;
2933
2934         cache = search_cache_extent(reada, 0);
2935         if (cache) {
2936                 bits[0].start = cache->start;
2937                 bits[0].size = cache->size;
2938                 *reada_bits = 1;
2939                 return 1;
2940         }
2941         *reada_bits = 0;
2942         if (node_start > 32768)
2943                 node_start -= 32768;
2944
2945         cache = search_cache_extent(nodes, node_start);
2946         if (!cache)
2947                 cache = search_cache_extent(nodes, 0);
2948
2949         if (!cache) {
2950                  cache = search_cache_extent(pending, 0);
2951                  if (!cache)
2952                          return 0;
2953                  ret = 0;
2954                  do {
2955                          bits[ret].start = cache->start;
2956                          bits[ret].size = cache->size;
2957                          cache = next_cache_extent(cache);
2958                          ret++;
2959                  } while (cache && ret < bits_nr);
2960                  return ret;
2961         }
2962
2963         ret = 0;
2964         do {
2965                 bits[ret].start = cache->start;
2966                 bits[ret].size = cache->size;
2967                 cache = next_cache_extent(cache);
2968                 ret++;
2969         } while (cache && ret < bits_nr);
2970
2971         if (bits_nr - ret > 8) {
2972                 u64 lookup = bits[0].start + bits[0].size;
2973                 struct cache_extent *next;
2974                 next = search_cache_extent(pending, lookup);
2975                 while(next) {
2976                         if (next->start - lookup > 32768)
2977                                 break;
2978                         bits[ret].start = next->start;
2979                         bits[ret].size = next->size;
2980                         lookup = next->start + next->size;
2981                         ret++;
2982                         if (ret == bits_nr)
2983                                 break;
2984                         next = next_cache_extent(next);
2985                         if (!next)
2986                                 break;
2987                 }
2988         }
2989         return ret;
2990 }
2991
2992 static void free_chunk_record(struct cache_extent *cache)
2993 {
2994         struct chunk_record *rec;
2995
2996         rec = container_of(cache, struct chunk_record, cache);
2997         free(rec);
2998 }
2999
3000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
3001 {
3002         cache_tree_free_extents(chunk_cache, free_chunk_record);
3003 }
3004
3005 static void free_device_record(struct rb_node *node)
3006 {
3007         struct device_record *rec;
3008
3009         rec = container_of(node, struct device_record, node);
3010         free(rec);
3011 }
3012
3013 FREE_RB_BASED_TREE(device_cache, free_device_record);
3014
3015 int insert_block_group_record(struct block_group_tree *tree,
3016                               struct block_group_record *bg_rec)
3017 {
3018         int ret;
3019
3020         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3021         if (ret)
3022                 return ret;
3023
3024         list_add_tail(&bg_rec->list, &tree->block_groups);
3025         return 0;
3026 }
3027
3028 static void free_block_group_record(struct cache_extent *cache)
3029 {
3030         struct block_group_record *rec;
3031
3032         rec = container_of(cache, struct block_group_record, cache);
3033         free(rec);
3034 }
3035
3036 void free_block_group_tree(struct block_group_tree *tree)
3037 {
3038         cache_tree_free_extents(&tree->tree, free_block_group_record);
3039 }
3040
3041 int insert_device_extent_record(struct device_extent_tree *tree,
3042                                 struct device_extent_record *de_rec)
3043 {
3044         int ret;
3045
3046         /*
3047          * Device extent is a bit different from the other extents, because
3048          * the extents which belong to the different devices may have the
3049          * same start and size, so we need use the special extent cache
3050          * search/insert functions.
3051          */
3052         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3053         if (ret)
3054                 return ret;
3055
3056         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3057         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3058         return 0;
3059 }
3060
3061 static void free_device_extent_record(struct cache_extent *cache)
3062 {
3063         struct device_extent_record *rec;
3064
3065         rec = container_of(cache, struct device_extent_record, cache);
3066         free(rec);
3067 }
3068
3069 void free_device_extent_tree(struct device_extent_tree *tree)
3070 {
3071         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3072 }
3073
3074 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3075 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3076                                  struct extent_buffer *leaf, int slot)
3077 {
3078         struct btrfs_extent_ref_v0 *ref0;
3079         struct btrfs_key key;
3080
3081         btrfs_item_key_to_cpu(leaf, &key, slot);
3082         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3083         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3084                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3085         } else {
3086                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3087                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3088         }
3089         return 0;
3090 }
3091 #endif
3092
3093 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3094                                             struct btrfs_key *key,
3095                                             int slot)
3096 {
3097         struct btrfs_chunk *ptr;
3098         struct chunk_record *rec;
3099         int num_stripes, i;
3100
3101         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3102         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3103
3104         rec = malloc(btrfs_chunk_record_size(num_stripes));
3105         if (!rec) {
3106                 fprintf(stderr, "memory allocation failed\n");
3107                 exit(-1);
3108         }
3109
3110         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3111
3112         INIT_LIST_HEAD(&rec->list);
3113         INIT_LIST_HEAD(&rec->dextents);
3114         rec->bg_rec = NULL;
3115
3116         rec->cache.start = key->offset;
3117         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3118
3119         rec->generation = btrfs_header_generation(leaf);
3120
3121         rec->objectid = key->objectid;
3122         rec->type = key->type;
3123         rec->offset = key->offset;
3124
3125         rec->length = rec->cache.size;
3126         rec->owner = btrfs_chunk_owner(leaf, ptr);
3127         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3128         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3129         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3130         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3131         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3132         rec->num_stripes = num_stripes;
3133         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3134
3135         for (i = 0; i < rec->num_stripes; ++i) {
3136                 rec->stripes[i].devid =
3137                         btrfs_stripe_devid_nr(leaf, ptr, i);
3138                 rec->stripes[i].offset =
3139                         btrfs_stripe_offset_nr(leaf, ptr, i);
3140                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3141                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3142                                 BTRFS_UUID_SIZE);
3143         }
3144
3145         return rec;
3146 }
3147
3148 static int process_chunk_item(struct cache_tree *chunk_cache,
3149                               struct btrfs_key *key, struct extent_buffer *eb,
3150                               int slot)
3151 {
3152         struct chunk_record *rec;
3153         int ret = 0;
3154
3155         rec = btrfs_new_chunk_record(eb, key, slot);
3156         ret = insert_cache_extent(chunk_cache, &rec->cache);
3157         if (ret) {
3158                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3159                         rec->offset, rec->length);
3160                 free(rec);
3161         }
3162
3163         return ret;
3164 }
3165
3166 static int process_device_item(struct rb_root *dev_cache,
3167                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3168 {
3169         struct btrfs_dev_item *ptr;
3170         struct device_record *rec;
3171         int ret = 0;
3172
3173         ptr = btrfs_item_ptr(eb,
3174                 slot, struct btrfs_dev_item);
3175
3176         rec = malloc(sizeof(*rec));
3177         if (!rec) {
3178                 fprintf(stderr, "memory allocation failed\n");
3179                 return -ENOMEM;
3180         }
3181
3182         rec->devid = key->offset;
3183         rec->generation = btrfs_header_generation(eb);
3184
3185         rec->objectid = key->objectid;
3186         rec->type = key->type;
3187         rec->offset = key->offset;
3188
3189         rec->devid = btrfs_device_id(eb, ptr);
3190         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3191         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3192
3193         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3194         if (ret) {
3195                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3196                 free(rec);
3197         }
3198
3199         return ret;
3200 }
3201
3202 struct block_group_record *
3203 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3204                              int slot)
3205 {
3206         struct btrfs_block_group_item *ptr;
3207         struct block_group_record *rec;
3208
3209         rec = malloc(sizeof(*rec));
3210         if (!rec) {
3211                 fprintf(stderr, "memory allocation failed\n");
3212                 exit(-1);
3213         }
3214         memset(rec, 0, sizeof(*rec));
3215
3216         rec->cache.start = key->objectid;
3217         rec->cache.size = key->offset;
3218
3219         rec->generation = btrfs_header_generation(leaf);
3220
3221         rec->objectid = key->objectid;
3222         rec->type = key->type;
3223         rec->offset = key->offset;
3224
3225         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3226         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3227
3228         INIT_LIST_HEAD(&rec->list);
3229
3230         return rec;
3231 }
3232
3233 static int process_block_group_item(struct block_group_tree *block_group_cache,
3234                                     struct btrfs_key *key,
3235                                     struct extent_buffer *eb, int slot)
3236 {
3237         struct block_group_record *rec;
3238         int ret = 0;
3239
3240         rec = btrfs_new_block_group_record(eb, key, slot);
3241         ret = insert_block_group_record(block_group_cache, rec);
3242         if (ret) {
3243                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3244                         rec->objectid, rec->offset);
3245                 free(rec);
3246         }
3247
3248         return ret;
3249 }
3250
3251 struct device_extent_record *
3252 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3253                                struct btrfs_key *key, int slot)
3254 {
3255         struct device_extent_record *rec;
3256         struct btrfs_dev_extent *ptr;
3257
3258         rec = malloc(sizeof(*rec));
3259         if (!rec) {
3260                 fprintf(stderr, "memory allocation failed\n");
3261                 exit(-1);
3262         }
3263         memset(rec, 0, sizeof(*rec));
3264
3265         rec->cache.objectid = key->objectid;
3266         rec->cache.start = key->offset;
3267
3268         rec->generation = btrfs_header_generation(leaf);
3269
3270         rec->objectid = key->objectid;
3271         rec->type = key->type;
3272         rec->offset = key->offset;
3273
3274         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3275         rec->chunk_objecteid =
3276                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3277         rec->chunk_offset =
3278                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3279         rec->length = btrfs_dev_extent_length(leaf, ptr);
3280         rec->cache.size = rec->length;
3281
3282         INIT_LIST_HEAD(&rec->chunk_list);
3283         INIT_LIST_HEAD(&rec->device_list);
3284
3285         return rec;
3286 }
3287
3288 static int
3289 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3290                            struct btrfs_key *key, struct extent_buffer *eb,
3291                            int slot)
3292 {
3293         struct device_extent_record *rec;
3294         int ret;
3295
3296         rec = btrfs_new_device_extent_record(eb, key, slot);
3297         ret = insert_device_extent_record(dev_extent_cache, rec);
3298         if (ret) {
3299                 fprintf(stderr,
3300                         "Device extent[%llu, %llu, %llu] existed.\n",
3301                         rec->objectid, rec->offset, rec->length);
3302                 free(rec);
3303         }
3304
3305         return ret;
3306 }
3307
3308 static int process_extent_item(struct btrfs_root *root,
3309                                struct cache_tree *extent_cache,
3310                                struct extent_buffer *eb, int slot)
3311 {
3312         struct btrfs_extent_item *ei;
3313         struct btrfs_extent_inline_ref *iref;
3314         struct btrfs_extent_data_ref *dref;
3315         struct btrfs_shared_data_ref *sref;
3316         struct btrfs_key key;
3317         unsigned long end;
3318         unsigned long ptr;
3319         int type;
3320         u32 item_size = btrfs_item_size_nr(eb, slot);
3321         u64 refs = 0;
3322         u64 offset;
3323         u64 num_bytes;
3324         int metadata = 0;
3325
3326         btrfs_item_key_to_cpu(eb, &key, slot);
3327
3328         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3329                 metadata = 1;
3330                 num_bytes = root->leafsize;
3331         } else {
3332                 num_bytes = key.offset;
3333         }
3334
3335         if (item_size < sizeof(*ei)) {
3336 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3337                 struct btrfs_extent_item_v0 *ei0;
3338                 BUG_ON(item_size != sizeof(*ei0));
3339                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3340                 refs = btrfs_extent_refs_v0(eb, ei0);
3341 #else
3342                 BUG();
3343 #endif
3344                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3345                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3346                                       num_bytes);
3347         }
3348
3349         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3350         refs = btrfs_extent_refs(eb, ei);
3351
3352         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3353                        refs, 0, 0, 0, metadata, 1, num_bytes);
3354
3355         ptr = (unsigned long)(ei + 1);
3356         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3357             key.type == BTRFS_EXTENT_ITEM_KEY)
3358                 ptr += sizeof(struct btrfs_tree_block_info);
3359
3360         end = (unsigned long)ei + item_size;
3361         while (ptr < end) {
3362                 iref = (struct btrfs_extent_inline_ref *)ptr;
3363                 type = btrfs_extent_inline_ref_type(eb, iref);
3364                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3365                 switch (type) {
3366                 case BTRFS_TREE_BLOCK_REF_KEY:
3367                         add_tree_backref(extent_cache, key.objectid,
3368                                          0, offset, 0);
3369                         break;
3370                 case BTRFS_SHARED_BLOCK_REF_KEY:
3371                         add_tree_backref(extent_cache, key.objectid,
3372                                          offset, 0, 0);
3373                         break;
3374                 case BTRFS_EXTENT_DATA_REF_KEY:
3375                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3376                         add_data_backref(extent_cache, key.objectid, 0,
3377                                         btrfs_extent_data_ref_root(eb, dref),
3378                                         btrfs_extent_data_ref_objectid(eb,
3379                                                                        dref),
3380                                         btrfs_extent_data_ref_offset(eb, dref),
3381                                         btrfs_extent_data_ref_count(eb, dref),
3382                                         0, num_bytes);
3383                         break;
3384                 case BTRFS_SHARED_DATA_REF_KEY:
3385                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3386                         add_data_backref(extent_cache, key.objectid, offset,
3387                                         0, 0, 0,
3388                                         btrfs_shared_data_ref_count(eb, sref),
3389                                         0, num_bytes);
3390                         break;
3391                 default:
3392                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3393                                 key.objectid, key.type, num_bytes);
3394                         goto out;
3395                 }
3396                 ptr += btrfs_extent_inline_ref_size(type);
3397         }
3398         WARN_ON(ptr > end);
3399 out:
3400         return 0;
3401 }
3402
3403 static int check_cache_range(struct btrfs_root *root,
3404                              struct btrfs_block_group_cache *cache,
3405                              u64 offset, u64 bytes)
3406 {
3407         struct btrfs_free_space *entry;
3408         u64 *logical;
3409         u64 bytenr;
3410         int stripe_len;
3411         int i, nr, ret;
3412
3413         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3414                 bytenr = btrfs_sb_offset(i);
3415                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3416                                        cache->key.objectid, bytenr, 0,
3417                                        &logical, &nr, &stripe_len);
3418                 if (ret)
3419                         return ret;
3420
3421                 while (nr--) {
3422                         if (logical[nr] + stripe_len <= offset)
3423                                 continue;
3424                         if (offset + bytes <= logical[nr])
3425                                 continue;
3426                         if (logical[nr] == offset) {
3427                                 if (stripe_len >= bytes) {
3428                                         kfree(logical);
3429                                         return 0;
3430                                 }
3431                                 bytes -= stripe_len;
3432                                 offset += stripe_len;
3433                         } else if (logical[nr] < offset) {
3434                                 if (logical[nr] + stripe_len >=
3435                                     offset + bytes) {
3436                                         kfree(logical);
3437                                         return 0;
3438                                 }
3439                                 bytes = (offset + bytes) -
3440                                         (logical[nr] + stripe_len);
3441                                 offset = logical[nr] + stripe_len;
3442                         } else {
3443                                 /*
3444                                  * Could be tricky, the super may land in the
3445                                  * middle of the area we're checking.  First
3446                                  * check the easiest case, it's at the end.
3447                                  */
3448                                 if (logical[nr] + stripe_len >=
3449                                     bytes + offset) {
3450                                         bytes = logical[nr] - offset;
3451                                         continue;
3452                                 }
3453
3454                                 /* Check the left side */
3455                                 ret = check_cache_range(root, cache,
3456                                                         offset,
3457                                                         logical[nr] - offset);
3458                                 if (ret) {
3459                                         kfree(logical);
3460                                         return ret;
3461                                 }
3462
3463                                 /* Now we continue with the right side */
3464                                 bytes = (offset + bytes) -
3465                                         (logical[nr] + stripe_len);
3466                                 offset = logical[nr] + stripe_len;
3467                         }
3468                 }
3469
3470                 kfree(logical);
3471         }
3472
3473         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3474         if (!entry) {
3475                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3476                         offset, offset+bytes);
3477                 return -EINVAL;
3478         }
3479
3480         if (entry->offset != offset) {
3481                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3482                         entry->offset);
3483                 return -EINVAL;
3484         }
3485
3486         if (entry->bytes != bytes) {
3487                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3488                         bytes, entry->bytes, offset);
3489                 return -EINVAL;
3490         }
3491
3492         unlink_free_space(cache->free_space_ctl, entry);
3493         free(entry);
3494         return 0;
3495 }
3496
3497 static int verify_space_cache(struct btrfs_root *root,
3498                               struct btrfs_block_group_cache *cache)
3499 {
3500         struct btrfs_path *path;
3501         struct extent_buffer *leaf;
3502         struct btrfs_key key;
3503         u64 last;
3504         int ret = 0;
3505
3506         path = btrfs_alloc_path();
3507         if (!path)
3508                 return -ENOMEM;
3509
3510         root = root->fs_info->extent_root;
3511
3512         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3513
3514         key.objectid = last;
3515         key.offset = 0;
3516         key.type = BTRFS_EXTENT_ITEM_KEY;
3517
3518         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3519         if (ret < 0)
3520                 goto out;
3521         ret = 0;
3522         while (1) {
3523                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3524                         ret = btrfs_next_leaf(root, path);
3525                         if (ret < 0)
3526                                 goto out;
3527                         if (ret > 0) {
3528                                 ret = 0;
3529                                 break;
3530                         }
3531                 }
3532                 leaf = path->nodes[0];
3533                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3534                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3535                         break;
3536                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3537                     key.type != BTRFS_METADATA_ITEM_KEY) {
3538                         path->slots[0]++;
3539                         continue;
3540                 }
3541
3542                 if (last == key.objectid) {
3543                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3544                                 last = key.objectid + key.offset;
3545                         else
3546                                 last = key.objectid + root->leafsize;
3547                         path->slots[0]++;
3548                         continue;
3549                 }
3550
3551                 ret = check_cache_range(root, cache, last,
3552                                         key.objectid - last);
3553                 if (ret)
3554                         break;
3555                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3556                         last = key.objectid + key.offset;
3557                 else
3558                         last = key.objectid + root->leafsize;
3559                 path->slots[0]++;
3560         }
3561
3562         if (last < cache->key.objectid + cache->key.offset)
3563                 ret = check_cache_range(root, cache, last,
3564                                         cache->key.objectid +
3565                                         cache->key.offset - last);
3566
3567 out:
3568         btrfs_free_path(path);
3569
3570         if (!ret &&
3571             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3572                 fprintf(stderr, "There are still entries left in the space "
3573                         "cache\n");
3574                 ret = -EINVAL;
3575         }
3576
3577         return ret;
3578 }
3579
3580 static int check_space_cache(struct btrfs_root *root)
3581 {
3582         struct btrfs_block_group_cache *cache;
3583         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3584         int ret;
3585         int error = 0;
3586
3587         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3588             btrfs_super_generation(root->fs_info->super_copy) !=
3589             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3590                 printf("cache and super generation don't match, space cache "
3591                        "will be invalidated\n");
3592                 return 0;
3593         }
3594
3595         while (1) {
3596                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3597                 if (!cache)
3598                         break;
3599
3600                 start = cache->key.objectid + cache->key.offset;
3601                 if (!cache->free_space_ctl) {
3602                         if (btrfs_init_free_space_ctl(cache,
3603                                                       root->sectorsize)) {
3604                                 ret = -ENOMEM;
3605                                 break;
3606                         }
3607                 } else {
3608                         btrfs_remove_free_space_cache(cache);
3609                 }
3610
3611                 ret = load_free_space_cache(root->fs_info, cache);
3612                 if (!ret)
3613                         continue;
3614
3615                 ret = verify_space_cache(root, cache);
3616                 if (ret) {
3617                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3618                                 cache->key.objectid);
3619                         error++;
3620                 }
3621         }
3622
3623         return error ? -EINVAL : 0;
3624 }
3625
3626 static int read_extent_data(struct btrfs_root *root, char *data,
3627                         u64 logical, u64 *len, int mirror)
3628 {
3629         u64 offset = 0;
3630         struct btrfs_multi_bio *multi = NULL;
3631         struct btrfs_fs_info *info = root->fs_info;
3632         struct btrfs_device *device;
3633         int ret = 0;
3634         u64 max_len = *len;
3635
3636         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
3637                               &multi, mirror, NULL);
3638         if (ret) {
3639                 fprintf(stderr, "Couldn't map the block %llu\n",
3640                                 logical + offset);
3641                 goto err;
3642         }
3643         device = multi->stripes[0].dev;
3644
3645         if (device->fd == 0)
3646                 goto err;
3647         if (*len > max_len)
3648                 *len = max_len;
3649
3650         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
3651         if (ret != *len)
3652                 ret = -EIO;
3653         else
3654                 ret = 0;
3655 err:
3656         kfree(multi);
3657         return ret;
3658 }
3659
3660 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
3661                         u64 num_bytes, unsigned long leaf_offset,
3662                         struct extent_buffer *eb) {
3663
3664         u64 offset = 0;
3665         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3666         char *data;
3667         unsigned long csum_offset;
3668         u32 csum;
3669         u32 csum_expected;
3670         u64 read_len;
3671         u64 data_checked = 0;
3672         u64 tmp;
3673         int ret = 0;
3674         int mirror;
3675         int num_copies;
3676
3677         if (num_bytes % root->sectorsize)
3678                 return -EINVAL;
3679
3680         data = malloc(num_bytes);
3681         if (!data)
3682                 return -ENOMEM;
3683
3684         while (offset < num_bytes) {
3685                 mirror = 0;
3686 again:
3687                 read_len = num_bytes - offset;
3688                 /* read as much space once a time */
3689                 ret = read_extent_data(root, data + offset,
3690                                 bytenr + offset, &read_len, mirror);
3691                 if (ret)
3692                         goto out;
3693                 data_checked = 0;
3694                 /* verify every 4k data's checksum */
3695                 while (data_checked < read_len) {
3696                         csum = ~(u32)0;
3697                         tmp = offset + data_checked;
3698
3699                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
3700                                                csum, root->sectorsize);
3701                         btrfs_csum_final(csum, (char *)&csum);
3702
3703                         csum_offset = leaf_offset +
3704                                  tmp / root->sectorsize * csum_size;
3705                         read_extent_buffer(eb, (char *)&csum_expected,
3706                                            csum_offset, csum_size);
3707                         /* try another mirror */
3708                         if (csum != csum_expected) {
3709                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
3710                                                 mirror, bytenr + tmp,
3711                                                 csum, csum_expected);
3712                                 num_copies = btrfs_num_copies(
3713                                                 &root->fs_info->mapping_tree,
3714                                                 bytenr, num_bytes);
3715                                 if (mirror < num_copies - 1) {
3716                                         mirror += 1;
3717                                         goto again;
3718                                 }
3719                         }
3720                         data_checked += root->sectorsize;
3721                 }
3722                 offset += read_len;
3723         }
3724 out:
3725         free(data);
3726         return ret;
3727 }
3728
3729 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3730                                u64 num_bytes)
3731 {
3732         struct btrfs_path *path;
3733         struct extent_buffer *leaf;
3734         struct btrfs_key key;
3735         int ret;
3736
3737         path = btrfs_alloc_path();
3738         if (!path) {
3739                 fprintf(stderr, "Error allocing path\n");
3740                 return -ENOMEM;
3741         }
3742
3743         key.objectid = bytenr;
3744         key.type = BTRFS_EXTENT_ITEM_KEY;
3745         key.offset = 0;
3746
3747
3748 again:
3749         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3750                                 0, 0);
3751         if (ret < 0) {
3752                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3753                 btrfs_free_path(path);
3754                 return ret;
3755         } else if (ret) {
3756                 if (path->slots[0])
3757                         path->slots[0]--;
3758                 else
3759                         btrfs_prev_leaf(root, path);
3760         }
3761
3762         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3763
3764         /*
3765          * Block group items come before extent items if they have the same
3766          * bytenr, so walk back one more just in case.  Dear future traveler,
3767          * first congrats on mastering time travel.  Now if it's not too much
3768          * trouble could you go back to 2006 and tell Chris to make the
3769          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
3770          */
3771         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3772                 if (path->slots[0])
3773                         path->slots[0]--;
3774                 else
3775                         btrfs_prev_leaf(root, path);
3776         }
3777
3778         while (num_bytes) {
3779                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3780                         ret = btrfs_next_leaf(root, path);
3781                         if (ret < 0) {
3782                                 fprintf(stderr, "Error going to next leaf "
3783                                         "%d\n", ret);
3784                                 btrfs_free_path(path);
3785                                 return ret;
3786                         } else if (ret) {
3787                                 break;
3788                         }
3789                 }
3790                 leaf = path->nodes[0];
3791                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3792                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3793                         path->slots[0]++;
3794                         continue;
3795                 }
3796                 if (key.objectid + key.offset < bytenr) {
3797                         path->slots[0]++;
3798                         continue;
3799                 }
3800                 if (key.objectid > bytenr + num_bytes)
3801                         break;
3802
3803                 if (key.objectid == bytenr) {
3804                         if (key.offset >= num_bytes) {
3805                                 num_bytes = 0;
3806                                 break;
3807                         }
3808                         num_bytes -= key.offset;
3809                         bytenr += key.offset;
3810                 } else if (key.objectid < bytenr) {
3811                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3812                                 num_bytes = 0;
3813                                 break;
3814                         }
3815                         num_bytes = (bytenr + num_bytes) -
3816                                 (key.objectid + key.offset);
3817                         bytenr = key.objectid + key.offset;
3818                 } else {
3819                         if (key.objectid + key.offset < bytenr + num_bytes) {
3820                                 u64 new_start = key.objectid + key.offset;
3821                                 u64 new_bytes = bytenr + num_bytes - new_start;
3822
3823                                 /*
3824                                  * Weird case, the extent is in the middle of
3825                                  * our range, we'll have to search one side
3826                                  * and then the other.  Not sure if this happens
3827                                  * in real life, but no harm in coding it up
3828                                  * anyway just in case.
3829                                  */
3830                                 btrfs_release_path(path);
3831                                 ret = check_extent_exists(root, new_start,
3832                                                           new_bytes);
3833                                 if (ret) {
3834                                         fprintf(stderr, "Right section didn't "
3835                                                 "have a record\n");
3836                                         break;
3837                                 }
3838                                 num_bytes = key.objectid - bytenr;
3839                                 goto again;
3840                         }
3841                         num_bytes = key.objectid - bytenr;
3842                 }
3843                 path->slots[0]++;
3844         }
3845         ret = 0;
3846
3847         if (num_bytes) {
3848                 fprintf(stderr, "There are no extents for csum range "
3849                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3850                 ret = 1;
3851         }
3852
3853         btrfs_free_path(path);
3854         return ret;
3855 }
3856
3857 static int check_csums(struct btrfs_root *root)
3858 {
3859         struct btrfs_path *path;
3860         struct extent_buffer *leaf;
3861         struct btrfs_key key;
3862         u64 offset = 0, num_bytes = 0;
3863         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3864         int errors = 0;
3865         int ret;
3866         u64 data_len;
3867         unsigned long leaf_offset;
3868
3869         root = root->fs_info->csum_root;
3870         if (!extent_buffer_uptodate(root->node)) {
3871                 fprintf(stderr, "No valid csum tree found\n");
3872                 return -ENOENT;
3873         }
3874
3875         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3876         key.type = BTRFS_EXTENT_CSUM_KEY;
3877         key.offset = 0;
3878
3879         path = btrfs_alloc_path();
3880         if (!path)
3881                 return -ENOMEM;
3882
3883         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3884         if (ret < 0) {
3885                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3886                 btrfs_free_path(path);
3887                 return ret;
3888         }
3889
3890         if (ret > 0 && path->slots[0])
3891                 path->slots[0]--;
3892         ret = 0;
3893
3894         while (1) {
3895                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3896                         ret = btrfs_next_leaf(root, path);
3897                         if (ret < 0) {
3898                                 fprintf(stderr, "Error going to next leaf "
3899                                         "%d\n", ret);
3900                                 break;
3901                         }
3902                         if (ret)
3903                                 break;
3904                 }
3905                 leaf = path->nodes[0];
3906
3907                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3908                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3909                         path->slots[0]++;
3910                         continue;
3911                 }
3912
3913                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
3914                               csum_size) * root->sectorsize;
3915                 if (!check_data_csum)
3916                         goto skip_csum_check;
3917                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
3918                 ret = check_extent_csums(root, key.offset, data_len,
3919                                          leaf_offset, leaf);
3920                 if (ret)
3921                         break;
3922 skip_csum_check:
3923                 if (!num_bytes) {
3924                         offset = key.offset;
3925                 } else if (key.offset != offset + num_bytes) {
3926                         ret = check_extent_exists(root, offset, num_bytes);
3927                         if (ret) {
3928                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3929                                         "there is no extent record\n",
3930                                         offset, offset+num_bytes);
3931                                 errors++;
3932                         }
3933                         offset = key.offset;
3934                         num_bytes = 0;
3935                 }
3936                 num_bytes += data_len;
3937                 path->slots[0]++;
3938         }
3939
3940         btrfs_free_path(path);
3941         return errors;
3942 }
3943
3944 static int is_dropped_key(struct btrfs_key *key,
3945                           struct btrfs_key *drop_key) {
3946         if (key->objectid < drop_key->objectid)
3947                 return 1;
3948         else if (key->objectid == drop_key->objectid) {
3949                 if (key->type < drop_key->type)
3950                         return 1;
3951                 else if (key->type == drop_key->type) {
3952                         if (key->offset < drop_key->offset)
3953                                 return 1;
3954                 }
3955         }
3956         return 0;
3957 }
3958
3959 static int run_next_block(struct btrfs_trans_handle *trans,
3960                           struct btrfs_root *root,
3961                           struct block_info *bits,
3962                           int bits_nr,
3963                           u64 *last,
3964                           struct cache_tree *pending,
3965                           struct cache_tree *seen,
3966                           struct cache_tree *reada,
3967                           struct cache_tree *nodes,
3968                           struct cache_tree *extent_cache,
3969                           struct cache_tree *chunk_cache,
3970                           struct rb_root *dev_cache,
3971                           struct block_group_tree *block_group_cache,
3972                           struct device_extent_tree *dev_extent_cache,
3973                           struct btrfs_root_item *ri)
3974 {
3975         struct extent_buffer *buf;
3976         u64 bytenr;
3977         u32 size;
3978         u64 parent;
3979         u64 owner;
3980         u64 flags;
3981         u64 ptr;
3982         u64 gen = 0;
3983         int ret = 0;
3984         int i;
3985         int nritems;
3986         struct btrfs_key key;
3987         struct cache_extent *cache;
3988         int reada_bits;
3989
3990         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3991                                     bits_nr, &reada_bits);
3992         if (nritems == 0)
3993                 return 1;
3994
3995         if (!reada_bits) {
3996                 for(i = 0; i < nritems; i++) {
3997                         ret = add_cache_extent(reada, bits[i].start,
3998                                                bits[i].size);
3999                         if (ret == -EEXIST)
4000                                 continue;
4001
4002                         /* fixme, get the parent transid */
4003                         readahead_tree_block(root, bits[i].start,
4004                                              bits[i].size, 0);
4005                 }
4006         }
4007         *last = bits[0].start;
4008         bytenr = bits[0].start;
4009         size = bits[0].size;
4010
4011         cache = lookup_cache_extent(pending, bytenr, size);
4012         if (cache) {
4013                 remove_cache_extent(pending, cache);
4014                 free(cache);
4015         }
4016         cache = lookup_cache_extent(reada, bytenr, size);
4017         if (cache) {
4018                 remove_cache_extent(reada, cache);
4019                 free(cache);
4020         }
4021         cache = lookup_cache_extent(nodes, bytenr, size);
4022         if (cache) {
4023                 remove_cache_extent(nodes, cache);
4024                 free(cache);
4025         }
4026         cache = lookup_cache_extent(extent_cache, bytenr, size);
4027         if (cache) {
4028                 struct extent_record *rec;
4029
4030                 rec = container_of(cache, struct extent_record, cache);
4031                 gen = rec->parent_generation;
4032         }
4033
4034         /* fixme, get the real parent transid */
4035         buf = read_tree_block(root, bytenr, size, gen);
4036         if (!extent_buffer_uptodate(buf)) {
4037                 record_bad_block_io(root->fs_info,
4038                                     extent_cache, bytenr, size);
4039                 goto out;
4040         }
4041
4042         nritems = btrfs_header_nritems(buf);
4043
4044         /*
4045          * FIXME, this only works only if we don't have any full
4046          * backref mode.
4047          */
4048         if (!init_extent_tree) {
4049                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4050                                        btrfs_header_level(buf), 1, NULL,
4051                                        &flags);
4052                 if (ret < 0)
4053                         flags = 0;
4054         } else {
4055                 flags = 0;
4056         }
4057
4058         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4059                 parent = bytenr;
4060                 owner = 0;
4061         } else {
4062                 parent = 0;
4063                 owner = btrfs_header_owner(buf);
4064         }
4065
4066         ret = check_block(trans, root, extent_cache, buf, flags);
4067         if (ret)
4068                 goto out;
4069
4070         if (btrfs_is_leaf(buf)) {
4071                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4072                 for (i = 0; i < nritems; i++) {
4073                         struct btrfs_file_extent_item *fi;
4074                         btrfs_item_key_to_cpu(buf, &key, i);
4075                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4076                                 process_extent_item(root, extent_cache, buf,
4077                                                     i);
4078                                 continue;
4079                         }
4080                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4081                                 process_extent_item(root, extent_cache, buf,
4082                                                     i);
4083                                 continue;
4084                         }
4085                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4086                                 total_csum_bytes +=
4087                                         btrfs_item_size_nr(buf, i);
4088                                 continue;
4089                         }
4090                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4091                                 process_chunk_item(chunk_cache, &key, buf, i);
4092                                 continue;
4093                         }
4094                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4095                                 process_device_item(dev_cache, &key, buf, i);
4096                                 continue;
4097                         }
4098                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4099                                 process_block_group_item(block_group_cache,
4100                                         &key, buf, i);
4101                                 continue;
4102                         }
4103                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4104                                 process_device_extent_item(dev_extent_cache,
4105                                         &key, buf, i);
4106                                 continue;
4107
4108                         }
4109                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4110 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4111                                 process_extent_ref_v0(extent_cache, buf, i);
4112 #else
4113                                 BUG();
4114 #endif
4115                                 continue;
4116                         }
4117
4118                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4119                                 add_tree_backref(extent_cache, key.objectid, 0,
4120                                                  key.offset, 0);
4121                                 continue;
4122                         }
4123                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4124                                 add_tree_backref(extent_cache, key.objectid,
4125                                                  key.offset, 0, 0);
4126                                 continue;
4127                         }
4128                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4129                                 struct btrfs_extent_data_ref *ref;
4130                                 ref = btrfs_item_ptr(buf, i,
4131                                                 struct btrfs_extent_data_ref);
4132                                 add_data_backref(extent_cache,
4133                                         key.objectid, 0,
4134                                         btrfs_extent_data_ref_root(buf, ref),
4135                                         btrfs_extent_data_ref_objectid(buf,
4136                                                                        ref),
4137                                         btrfs_extent_data_ref_offset(buf, ref),
4138                                         btrfs_extent_data_ref_count(buf, ref),
4139                                         0, root->sectorsize);
4140                                 continue;
4141                         }
4142                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4143                                 struct btrfs_shared_data_ref *ref;
4144                                 ref = btrfs_item_ptr(buf, i,
4145                                                 struct btrfs_shared_data_ref);
4146                                 add_data_backref(extent_cache,
4147                                         key.objectid, key.offset, 0, 0, 0,
4148                                         btrfs_shared_data_ref_count(buf, ref),
4149                                         0, root->sectorsize);
4150                                 continue;
4151                         }
4152                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4153                                 struct bad_item *bad;
4154
4155                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4156                                         continue;
4157                                 if (!owner)
4158                                         continue;
4159                                 bad = malloc(sizeof(struct bad_item));
4160                                 if (!bad)
4161                                         continue;
4162                                 INIT_LIST_HEAD(&bad->list);
4163                                 memcpy(&bad->key, &key,
4164                                        sizeof(struct btrfs_key));
4165                                 bad->root_id = owner;
4166                                 list_add_tail(&bad->list, &delete_items);
4167                                 continue;
4168                         }
4169                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4170                                 continue;
4171                         fi = btrfs_item_ptr(buf, i,
4172                                             struct btrfs_file_extent_item);
4173                         if (btrfs_file_extent_type(buf, fi) ==
4174                             BTRFS_FILE_EXTENT_INLINE)
4175                                 continue;
4176                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4177                                 continue;
4178
4179                         data_bytes_allocated +=
4180                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4181                         if (data_bytes_allocated < root->sectorsize) {
4182                                 abort();
4183                         }
4184                         data_bytes_referenced +=
4185                                 btrfs_file_extent_num_bytes(buf, fi);
4186                         add_data_backref(extent_cache,
4187                                 btrfs_file_extent_disk_bytenr(buf, fi),
4188                                 parent, owner, key.objectid, key.offset -
4189                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4190                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4191                 }
4192         } else {
4193                 int level;
4194                 struct btrfs_key first_key;
4195
4196                 first_key.objectid = 0;
4197
4198                 if (nritems > 0)
4199                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4200                 level = btrfs_header_level(buf);
4201                 for (i = 0; i < nritems; i++) {
4202                         ptr = btrfs_node_blockptr(buf, i);
4203                         size = btrfs_level_size(root, level - 1);
4204                         btrfs_node_key_to_cpu(buf, &key, i);
4205                         if (ri != NULL) {
4206                                 struct btrfs_key drop_key;
4207                                 btrfs_disk_key_to_cpu(&drop_key,
4208                                                       &ri->drop_progress);
4209                                 if ((level == ri->drop_level)
4210                                     && is_dropped_key(&key, &drop_key)) {
4211                                         continue;
4212                                 }
4213                         }
4214                         ret = add_extent_rec(extent_cache, &key,
4215                                              btrfs_node_ptr_generation(buf, i),
4216                                              ptr, size, 0, 0, 1, 0, 1, 0,
4217                                              size);
4218                         BUG_ON(ret);
4219
4220                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4221
4222                         if (level > 1) {
4223                                 add_pending(nodes, seen, ptr, size);
4224                         } else {
4225                                 add_pending(pending, seen, ptr, size);
4226                         }
4227                 }
4228                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4229                                       nritems) * sizeof(struct btrfs_key_ptr);
4230         }
4231         total_btree_bytes += buf->len;
4232         if (fs_root_objectid(btrfs_header_owner(buf)))
4233                 total_fs_tree_bytes += buf->len;
4234         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4235                 total_extent_tree_bytes += buf->len;
4236         if (!found_old_backref &&
4237             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4238             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4239             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4240                 found_old_backref = 1;
4241 out:
4242         free_extent_buffer(buf);
4243         return ret;
4244 }
4245
4246 static int add_root_to_pending(struct extent_buffer *buf,
4247                                struct cache_tree *extent_cache,
4248                                struct cache_tree *pending,
4249                                struct cache_tree *seen,
4250                                struct cache_tree *nodes,
4251                                struct btrfs_key *root_key)
4252 {
4253         if (btrfs_header_level(buf) > 0)
4254                 add_pending(nodes, seen, buf->start, buf->len);
4255         else
4256                 add_pending(pending, seen, buf->start, buf->len);
4257         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4258                        0, 1, 1, 0, 1, 0, buf->len);
4259
4260         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4261             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4262                 add_tree_backref(extent_cache, buf->start, buf->start,
4263                                  0, 1);
4264         else
4265                 add_tree_backref(extent_cache, buf->start, 0,
4266                                  root_key->objectid, 1);
4267         return 0;
4268 }
4269
4270 /* as we fix the tree, we might be deleting blocks that
4271  * we're tracking for repair.  This hook makes sure we
4272  * remove any backrefs for blocks as we are fixing them.
4273  */
4274 static int free_extent_hook(struct btrfs_trans_handle *trans,
4275                             struct btrfs_root *root,
4276                             u64 bytenr, u64 num_bytes, u64 parent,
4277                             u64 root_objectid, u64 owner, u64 offset,
4278                             int refs_to_drop)
4279 {
4280         struct extent_record *rec;
4281         struct cache_extent *cache;
4282         int is_data;
4283         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4284
4285         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4286         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4287         if (!cache)
4288                 return 0;
4289
4290         rec = container_of(cache, struct extent_record, cache);
4291         if (is_data) {
4292                 struct data_backref *back;
4293                 back = find_data_backref(rec, parent, root_objectid, owner,
4294                                          offset, 1, bytenr, num_bytes);
4295                 if (!back)
4296                         goto out;
4297                 if (back->node.found_ref) {
4298                         back->found_ref -= refs_to_drop;
4299                         if (rec->refs)
4300                                 rec->refs -= refs_to_drop;
4301                 }
4302                 if (back->node.found_extent_tree) {
4303                         back->num_refs -= refs_to_drop;
4304                         if (rec->extent_item_refs)
4305                                 rec->extent_item_refs -= refs_to_drop;
4306                 }
4307                 if (back->found_ref == 0)
4308                         back->node.found_ref = 0;
4309                 if (back->num_refs == 0)
4310                         back->node.found_extent_tree = 0;
4311
4312                 if (!back->node.found_extent_tree && back->node.found_ref) {
4313                         list_del(&back->node.list);
4314                         free(back);
4315                 }
4316         } else {
4317                 struct tree_backref *back;
4318                 back = find_tree_backref(rec, parent, root_objectid);
4319                 if (!back)
4320                         goto out;
4321                 if (back->node.found_ref) {
4322                         if (rec->refs)
4323                                 rec->refs--;
4324                         back->node.found_ref = 0;
4325                 }
4326                 if (back->node.found_extent_tree) {
4327                         if (rec->extent_item_refs)
4328                                 rec->extent_item_refs--;
4329                         back->node.found_extent_tree = 0;
4330                 }
4331                 if (!back->node.found_extent_tree && back->node.found_ref) {
4332                         list_del(&back->node.list);
4333                         free(back);
4334                 }
4335         }
4336         maybe_free_extent_rec(extent_cache, rec);
4337 out:
4338         return 0;
4339 }
4340
4341 static int delete_extent_records(struct btrfs_trans_handle *trans,
4342                                  struct btrfs_root *root,
4343                                  struct btrfs_path *path,
4344                                  u64 bytenr, u64 new_len)
4345 {
4346         struct btrfs_key key;
4347         struct btrfs_key found_key;
4348         struct extent_buffer *leaf;
4349         int ret;
4350         int slot;
4351
4352
4353         key.objectid = bytenr;
4354         key.type = (u8)-1;
4355         key.offset = (u64)-1;
4356
4357         while(1) {
4358                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4359                                         &key, path, 0, 1);
4360                 if (ret < 0)
4361                         break;
4362
4363                 if (ret > 0) {
4364                         ret = 0;
4365                         if (path->slots[0] == 0)
4366                                 break;
4367                         path->slots[0]--;
4368                 }
4369                 ret = 0;
4370
4371                 leaf = path->nodes[0];
4372                 slot = path->slots[0];
4373
4374                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4375                 if (found_key.objectid != bytenr)
4376                         break;
4377
4378                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4379                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4380                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4381                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4382                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4383                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4384                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4385                         btrfs_release_path(path);
4386                         if (found_key.type == 0) {
4387                                 if (found_key.offset == 0)
4388                                         break;
4389                                 key.offset = found_key.offset - 1;
4390                                 key.type = found_key.type;
4391                         }
4392                         key.type = found_key.type - 1;
4393                         key.offset = (u64)-1;
4394                         continue;
4395                 }
4396
4397                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4398                         found_key.objectid, found_key.type, found_key.offset);
4399
4400                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4401                 if (ret)
4402                         break;
4403                 btrfs_release_path(path);
4404
4405                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4406                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4407                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4408                                 found_key.offset : root->leafsize;
4409
4410                         ret = btrfs_update_block_group(trans, root, bytenr,
4411                                                        bytes, 0, 0);
4412                         if (ret)
4413                                 break;
4414                 }
4415         }
4416
4417         btrfs_release_path(path);
4418         return ret;
4419 }
4420
4421 /*
4422  * for a single backref, this will allocate a new extent
4423  * and add the backref to it.
4424  */
4425 static int record_extent(struct btrfs_trans_handle *trans,
4426                          struct btrfs_fs_info *info,
4427                          struct btrfs_path *path,
4428                          struct extent_record *rec,
4429                          struct extent_backref *back,
4430                          int allocated, u64 flags)
4431 {
4432         int ret;
4433         struct btrfs_root *extent_root = info->extent_root;
4434         struct extent_buffer *leaf;
4435         struct btrfs_key ins_key;
4436         struct btrfs_extent_item *ei;
4437         struct tree_backref *tback;
4438         struct data_backref *dback;
4439         struct btrfs_tree_block_info *bi;
4440
4441         if (!back->is_data)
4442                 rec->max_size = max_t(u64, rec->max_size,
4443                                     info->extent_root->leafsize);
4444
4445         if (!allocated) {
4446                 u32 item_size = sizeof(*ei);
4447
4448                 if (!back->is_data)
4449                         item_size += sizeof(*bi);
4450
4451                 ins_key.objectid = rec->start;
4452                 ins_key.offset = rec->max_size;
4453                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4454
4455                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4456                                         &ins_key, item_size);
4457                 if (ret)
4458                         goto fail;
4459
4460                 leaf = path->nodes[0];
4461                 ei = btrfs_item_ptr(leaf, path->slots[0],
4462                                     struct btrfs_extent_item);
4463
4464                 btrfs_set_extent_refs(leaf, ei, 0);
4465                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4466
4467                 if (back->is_data) {
4468                         btrfs_set_extent_flags(leaf, ei,
4469                                                BTRFS_EXTENT_FLAG_DATA);
4470                 } else {
4471                         struct btrfs_disk_key copy_key;;
4472
4473                         tback = (struct tree_backref *)back;
4474                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4475                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4476                                              sizeof(*bi));
4477
4478                         btrfs_set_disk_key_objectid(&copy_key,
4479                                                     rec->info_objectid);
4480                         btrfs_set_disk_key_type(&copy_key, 0);
4481                         btrfs_set_disk_key_offset(&copy_key, 0);
4482
4483                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4484                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4485
4486                         btrfs_set_extent_flags(leaf, ei,
4487                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4488                 }
4489
4490                 btrfs_mark_buffer_dirty(leaf);
4491                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4492                                                rec->max_size, 1, 0);
4493                 if (ret)
4494                         goto fail;
4495                 btrfs_release_path(path);
4496         }
4497
4498         if (back->is_data) {
4499                 u64 parent;
4500                 int i;
4501
4502                 dback = (struct data_backref *)back;
4503                 if (back->full_backref)
4504                         parent = dback->parent;
4505                 else
4506                         parent = 0;
4507
4508                 for (i = 0; i < dback->found_ref; i++) {
4509                         /* if parent != 0, we're doing a full backref
4510                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4511                          * just makes the backref allocator create a data
4512                          * backref
4513                          */
4514                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4515                                                    rec->start, rec->max_size,
4516                                                    parent,
4517                                                    dback->root,
4518                                                    parent ?
4519                                                    BTRFS_FIRST_FREE_OBJECTID :
4520                                                    dback->owner,
4521                                                    dback->offset);
4522                         if (ret)
4523                                 break;
4524                 }
4525                 fprintf(stderr, "adding new data backref"
4526                                 " on %llu %s %llu owner %llu"
4527                                 " offset %llu found %d\n",
4528                                 (unsigned long long)rec->start,
4529                                 back->full_backref ?
4530                                 "parent" : "root",
4531                                 back->full_backref ?
4532                                 (unsigned long long)parent :
4533                                 (unsigned long long)dback->root,
4534                                 (unsigned long long)dback->owner,
4535                                 (unsigned long long)dback->offset,
4536                                 dback->found_ref);
4537         } else {
4538                 u64 parent;
4539
4540                 tback = (struct tree_backref *)back;
4541                 if (back->full_backref)
4542                         parent = tback->parent;
4543                 else
4544                         parent = 0;
4545
4546                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4547                                            rec->start, rec->max_size,
4548                                            parent, tback->root, 0, 0);
4549                 fprintf(stderr, "adding new tree backref on "
4550                         "start %llu len %llu parent %llu root %llu\n",
4551                         rec->start, rec->max_size, tback->parent, tback->root);
4552         }
4553         if (ret)
4554                 goto fail;
4555 fail:
4556         btrfs_release_path(path);
4557         return ret;
4558 }
4559
4560 struct extent_entry {
4561         u64 bytenr;
4562         u64 bytes;
4563         int count;
4564         int broken;
4565         struct list_head list;
4566 };
4567
4568 static struct extent_entry *find_entry(struct list_head *entries,
4569                                        u64 bytenr, u64 bytes)
4570 {
4571         struct extent_entry *entry = NULL;
4572
4573         list_for_each_entry(entry, entries, list) {
4574                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4575                         return entry;
4576         }
4577
4578         return NULL;
4579 }
4580
4581 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4582 {
4583         struct extent_entry *entry, *best = NULL, *prev = NULL;
4584
4585         list_for_each_entry(entry, entries, list) {
4586                 if (!prev) {
4587                         prev = entry;
4588                         continue;
4589                 }
4590
4591                 /*
4592                  * If there are as many broken entries as entries then we know
4593                  * not to trust this particular entry.
4594                  */
4595                 if (entry->broken == entry->count)
4596                         continue;
4597
4598                 /*
4599                  * If our current entry == best then we can't be sure our best
4600                  * is really the best, so we need to keep searching.
4601                  */
4602                 if (best && best->count == entry->count) {
4603                         prev = entry;
4604                         best = NULL;
4605                         continue;
4606                 }
4607
4608                 /* Prev == entry, not good enough, have to keep searching */
4609                 if (!prev->broken && prev->count == entry->count)
4610                         continue;
4611
4612                 if (!best)
4613                         best = (prev->count > entry->count) ? prev : entry;
4614                 else if (best->count < entry->count)
4615                         best = entry;
4616                 prev = entry;
4617         }
4618
4619         return best;
4620 }
4621
4622 static int repair_ref(struct btrfs_trans_handle *trans,
4623                       struct btrfs_fs_info *info, struct btrfs_path *path,
4624                       struct data_backref *dback, struct extent_entry *entry)
4625 {
4626         struct btrfs_root *root;
4627         struct btrfs_file_extent_item *fi;
4628         struct extent_buffer *leaf;
4629         struct btrfs_key key;
4630         u64 bytenr, bytes;
4631         int ret;
4632
4633         key.objectid = dback->root;
4634         key.type = BTRFS_ROOT_ITEM_KEY;
4635         key.offset = (u64)-1;
4636         root = btrfs_read_fs_root(info, &key);
4637         if (IS_ERR(root)) {
4638                 fprintf(stderr, "Couldn't find root for our ref\n");
4639                 return -EINVAL;
4640         }
4641
4642         /*
4643          * The backref points to the original offset of the extent if it was
4644          * split, so we need to search down to the offset we have and then walk
4645          * forward until we find the backref we're looking for.
4646          */
4647         key.objectid = dback->owner;
4648         key.type = BTRFS_EXTENT_DATA_KEY;
4649         key.offset = dback->offset;
4650         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4651         if (ret < 0) {
4652                 fprintf(stderr, "Error looking up ref %d\n", ret);
4653                 return ret;
4654         }
4655
4656         while (1) {
4657                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4658                         ret = btrfs_next_leaf(root, path);
4659                         if (ret) {
4660                                 fprintf(stderr, "Couldn't find our ref, next\n");
4661                                 return -EINVAL;
4662                         }
4663                 }
4664                 leaf = path->nodes[0];
4665                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4666                 if (key.objectid != dback->owner ||
4667                     key.type != BTRFS_EXTENT_DATA_KEY) {
4668                         fprintf(stderr, "Couldn't find our ref, search\n");
4669                         return -EINVAL;
4670                 }
4671                 fi = btrfs_item_ptr(leaf, path->slots[0],
4672                                     struct btrfs_file_extent_item);
4673                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4674                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4675
4676                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4677                         break;
4678                 path->slots[0]++;
4679         }
4680
4681         btrfs_release_path(path);
4682
4683         /*
4684          * Have to make sure that this root gets updated when we commit the
4685          * transaction
4686          */
4687         record_root_in_trans(trans, root);
4688
4689         /*
4690          * Ok we have the key of the file extent we want to fix, now we can cow
4691          * down to the thing and fix it.
4692          */
4693         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4694         if (ret < 0) {
4695                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4696                         key.objectid, key.type, key.offset, ret);
4697                 return ret;
4698         }
4699         if (ret > 0) {
4700                 fprintf(stderr, "Well that's odd, we just found this key "
4701                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4702                         key.offset);
4703                 return -EINVAL;
4704         }
4705         leaf = path->nodes[0];
4706         fi = btrfs_item_ptr(leaf, path->slots[0],
4707                             struct btrfs_file_extent_item);
4708
4709         if (btrfs_file_extent_compression(leaf, fi) &&
4710             dback->disk_bytenr != entry->bytenr) {
4711                 fprintf(stderr, "Ref doesn't match the record start and is "
4712                         "compressed, please take a btrfs-image of this file "
4713                         "system and send it to a btrfs developer so they can "
4714                         "complete this functionality for bytenr %Lu\n",
4715                         dback->disk_bytenr);
4716                 return -EINVAL;
4717         }
4718
4719         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4720                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4721         } else if (dback->disk_bytenr > entry->bytenr) {
4722                 u64 off_diff, offset;
4723
4724                 off_diff = dback->disk_bytenr - entry->bytenr;
4725                 offset = btrfs_file_extent_offset(leaf, fi);
4726                 if (dback->disk_bytenr + offset +
4727                     btrfs_file_extent_num_bytes(leaf, fi) >
4728                     entry->bytenr + entry->bytes) {
4729                         fprintf(stderr, "Ref is past the entry end, please "
4730                                 "take a btrfs-image of this file system and "
4731                                 "send it to a btrfs developer, ref %Lu\n",
4732                                 dback->disk_bytenr);
4733                         return -EINVAL;
4734                 }
4735                 offset += off_diff;
4736                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4737                 btrfs_set_file_extent_offset(leaf, fi, offset);
4738         } else if (dback->disk_bytenr < entry->bytenr) {
4739                 u64 offset;
4740
4741                 offset = btrfs_file_extent_offset(leaf, fi);
4742                 if (dback->disk_bytenr + offset < entry->bytenr) {
4743                         fprintf(stderr, "Ref is before the entry start, please"
4744                                 " take a btrfs-image of this file system and "
4745                                 "send it to a btrfs developer, ref %Lu\n",
4746                                 dback->disk_bytenr);
4747                         return -EINVAL;
4748                 }
4749
4750                 offset += dback->disk_bytenr;
4751                 offset -= entry->bytenr;
4752                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4753                 btrfs_set_file_extent_offset(leaf, fi, offset);
4754         }
4755
4756         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4757
4758         /*
4759          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4760          * only do this if we aren't using compression, otherwise it's a
4761          * trickier case.
4762          */
4763         if (!btrfs_file_extent_compression(leaf, fi))
4764                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4765         else
4766                 printf("ram bytes may be wrong?\n");
4767         btrfs_mark_buffer_dirty(leaf);
4768         btrfs_release_path(path);
4769         return 0;
4770 }
4771
4772 static int verify_backrefs(struct btrfs_trans_handle *trans,
4773                            struct btrfs_fs_info *info, struct btrfs_path *path,
4774                            struct extent_record *rec)
4775 {
4776         struct extent_backref *back;
4777         struct data_backref *dback;
4778         struct extent_entry *entry, *best = NULL;
4779         LIST_HEAD(entries);
4780         int nr_entries = 0;
4781         int broken_entries = 0;
4782         int ret = 0;
4783         short mismatch = 0;
4784
4785         /*
4786          * Metadata is easy and the backrefs should always agree on bytenr and
4787          * size, if not we've got bigger issues.
4788          */
4789         if (rec->metadata)
4790                 return 0;
4791
4792         list_for_each_entry(back, &rec->backrefs, list) {
4793                 dback = (struct data_backref *)back;
4794                 /*
4795                  * We only pay attention to backrefs that we found a real
4796                  * backref for.
4797                  */
4798                 if (dback->found_ref == 0)
4799                         continue;
4800                 if (back->full_backref)
4801                         continue;
4802
4803                 /*
4804                  * For now we only catch when the bytes don't match, not the
4805                  * bytenr.  We can easily do this at the same time, but I want
4806                  * to have a fs image to test on before we just add repair
4807                  * functionality willy-nilly so we know we won't screw up the
4808                  * repair.
4809                  */
4810
4811                 entry = find_entry(&entries, dback->disk_bytenr,
4812                                    dback->bytes);
4813                 if (!entry) {
4814                         entry = malloc(sizeof(struct extent_entry));
4815                         if (!entry) {
4816                                 ret = -ENOMEM;
4817                                 goto out;
4818                         }
4819                         memset(entry, 0, sizeof(*entry));
4820                         entry->bytenr = dback->disk_bytenr;
4821                         entry->bytes = dback->bytes;
4822                         list_add_tail(&entry->list, &entries);
4823                         nr_entries++;
4824                 }
4825
4826                 /*
4827                  * If we only have on entry we may think the entries agree when
4828                  * in reality they don't so we have to do some extra checking.
4829                  */
4830                 if (dback->disk_bytenr != rec->start ||
4831                     dback->bytes != rec->nr || back->broken)
4832                         mismatch = 1;
4833
4834                 if (back->broken) {
4835                         entry->broken++;
4836                         broken_entries++;
4837                 }
4838
4839                 entry->count++;
4840         }
4841
4842         /* Yay all the backrefs agree, carry on good sir */
4843         if (nr_entries <= 1 && !mismatch)
4844                 goto out;
4845
4846         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4847                 "%Lu\n", rec->start);
4848
4849         /*
4850          * First we want to see if the backrefs can agree amongst themselves who
4851          * is right, so figure out which one of the entries has the highest
4852          * count.
4853          */
4854         best = find_most_right_entry(&entries);
4855
4856         /*
4857          * Ok so we may have an even split between what the backrefs think, so
4858          * this is where we use the extent ref to see what it thinks.
4859          */
4860         if (!best) {
4861                 entry = find_entry(&entries, rec->start, rec->nr);
4862                 if (!entry && (!broken_entries || !rec->found_rec)) {
4863                         fprintf(stderr, "Backrefs don't agree with each other "
4864                                 "and extent record doesn't agree with anybody,"
4865                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4866                                 rec->start, rec->nr);
4867                         ret = -EINVAL;
4868                         goto out;
4869                 } else if (!entry) {
4870                         /*
4871                          * Ok our backrefs were broken, we'll assume this is the
4872                          * correct value and add an entry for this range.
4873                          */
4874                         entry = malloc(sizeof(struct extent_entry));
4875                         if (!entry) {
4876                                 ret = -ENOMEM;
4877                                 goto out;
4878                         }
4879                         memset(entry, 0, sizeof(*entry));
4880                         entry->bytenr = rec->start;
4881                         entry->bytes = rec->nr;
4882                         list_add_tail(&entry->list, &entries);
4883                         nr_entries++;
4884                 }
4885                 entry->count++;
4886                 best = find_most_right_entry(&entries);
4887                 if (!best) {
4888                         fprintf(stderr, "Backrefs and extent record evenly "
4889                                 "split on who is right, this is going to "
4890                                 "require user input to fix bytenr %Lu bytes "
4891                                 "%Lu\n", rec->start, rec->nr);
4892                         ret = -EINVAL;
4893                         goto out;
4894                 }
4895         }
4896
4897         /*
4898          * I don't think this can happen currently as we'll abort() if we catch
4899          * this case higher up, but in case somebody removes that we still can't
4900          * deal with it properly here yet, so just bail out of that's the case.
4901          */
4902         if (best->bytenr != rec->start) {
4903                 fprintf(stderr, "Extent start and backref starts don't match, "
4904                         "please use btrfs-image on this file system and send "
4905                         "it to a btrfs developer so they can make fsck fix "
4906                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
4907                         rec->start, rec->nr);
4908                 ret = -EINVAL;
4909                 goto out;
4910         }
4911
4912         /*
4913          * Ok great we all agreed on an extent record, let's go find the real
4914          * references and fix up the ones that don't match.
4915          */
4916         list_for_each_entry(back, &rec->backrefs, list) {
4917                 dback = (struct data_backref *)back;
4918
4919                 /*
4920                  * Still ignoring backrefs that don't have a real ref attached
4921                  * to them.
4922                  */
4923                 if (dback->found_ref == 0)
4924                         continue;
4925                 if (back->full_backref)
4926                         continue;
4927
4928                 if (dback->bytes == best->bytes &&
4929                     dback->disk_bytenr == best->bytenr)
4930                         continue;
4931
4932                 ret = repair_ref(trans, info, path, dback, best);
4933                 if (ret)
4934                         goto out;
4935         }
4936
4937         /*
4938          * Ok we messed with the actual refs, which means we need to drop our
4939          * entire cache and go back and rescan.  I know this is a huge pain and
4940          * adds a lot of extra work, but it's the only way to be safe.  Once all
4941          * the backrefs agree we may not need to do anything to the extent
4942          * record itself.
4943          */
4944         ret = -EAGAIN;
4945 out:
4946         while (!list_empty(&entries)) {
4947                 entry = list_entry(entries.next, struct extent_entry, list);
4948                 list_del_init(&entry->list);
4949                 free(entry);
4950         }
4951         return ret;
4952 }
4953
4954 static int process_duplicates(struct btrfs_root *root,
4955                               struct cache_tree *extent_cache,
4956                               struct extent_record *rec)
4957 {
4958         struct extent_record *good, *tmp;
4959         struct cache_extent *cache;
4960         int ret;
4961
4962         /*
4963          * If we found a extent record for this extent then return, or if we
4964          * have more than one duplicate we are likely going to need to delete
4965          * something.
4966          */
4967         if (rec->found_rec || rec->num_duplicates > 1)
4968                 return 0;
4969
4970         /* Shouldn't happen but just in case */
4971         BUG_ON(!rec->num_duplicates);
4972
4973         /*
4974          * So this happens if we end up with a backref that doesn't match the
4975          * actual extent entry.  So either the backref is bad or the extent
4976          * entry is bad.  Either way we want to have the extent_record actually
4977          * reflect what we found in the extent_tree, so we need to take the
4978          * duplicate out and use that as the extent_record since the only way we
4979          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4980          */
4981         remove_cache_extent(extent_cache, &rec->cache);
4982
4983         good = list_entry(rec->dups.next, struct extent_record, list);
4984         list_del_init(&good->list);
4985         INIT_LIST_HEAD(&good->backrefs);
4986         INIT_LIST_HEAD(&good->dups);
4987         good->cache.start = good->start;
4988         good->cache.size = good->nr;
4989         good->content_checked = 0;
4990         good->owner_ref_checked = 0;
4991         good->num_duplicates = 0;
4992         good->refs = rec->refs;
4993         list_splice_init(&rec->backrefs, &good->backrefs);
4994         while (1) {
4995                 cache = lookup_cache_extent(extent_cache, good->start,
4996                                             good->nr);
4997                 if (!cache)
4998                         break;
4999                 tmp = container_of(cache, struct extent_record, cache);
5000
5001                 /*
5002                  * If we find another overlapping extent and it's found_rec is
5003                  * set then it's a duplicate and we need to try and delete
5004                  * something.
5005                  */
5006                 if (tmp->found_rec || tmp->num_duplicates > 0) {
5007                         if (list_empty(&good->list))
5008                                 list_add_tail(&good->list,
5009                                               &duplicate_extents);
5010                         good->num_duplicates += tmp->num_duplicates + 1;
5011                         list_splice_init(&tmp->dups, &good->dups);
5012                         list_del_init(&tmp->list);
5013                         list_add_tail(&tmp->list, &good->dups);
5014                         remove_cache_extent(extent_cache, &tmp->cache);
5015                         continue;
5016                 }
5017
5018                 /*
5019                  * Ok we have another non extent item backed extent rec, so lets
5020                  * just add it to this extent and carry on like we did above.
5021                  */
5022                 good->refs += tmp->refs;
5023                 list_splice_init(&tmp->backrefs, &good->backrefs);
5024                 remove_cache_extent(extent_cache, &tmp->cache);
5025                 free(tmp);
5026         }
5027         ret = insert_cache_extent(extent_cache, &good->cache);
5028         BUG_ON(ret);
5029         free(rec);
5030         return good->num_duplicates ? 0 : 1;
5031 }
5032
5033 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5034                                     struct btrfs_root *root,
5035                                     struct extent_record *rec)
5036 {
5037         LIST_HEAD(delete_list);
5038         struct btrfs_path *path;
5039         struct extent_record *tmp, *good, *n;
5040         int nr_del = 0;
5041         int ret = 0;
5042         struct btrfs_key key;
5043
5044         path = btrfs_alloc_path();
5045         if (!path) {
5046                 ret = -ENOMEM;
5047                 goto out;
5048         }
5049
5050         good = rec;
5051         /* Find the record that covers all of the duplicates. */
5052         list_for_each_entry(tmp, &rec->dups, list) {
5053                 if (good->start < tmp->start)
5054                         continue;
5055                 if (good->nr > tmp->nr)
5056                         continue;
5057
5058                 if (tmp->start + tmp->nr < good->start + good->nr) {
5059                         fprintf(stderr, "Ok we have overlapping extents that "
5060                                 "aren't completely covered by eachother, this "
5061                                 "is going to require more careful thought.  "
5062                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5063                                 tmp->start, tmp->nr, good->start, good->nr);
5064                         abort();
5065                 }
5066                 good = tmp;
5067         }
5068
5069         if (good != rec)
5070                 list_add_tail(&rec->list, &delete_list);
5071
5072         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5073                 if (tmp == good)
5074                         continue;
5075                 list_move_tail(&tmp->list, &delete_list);
5076         }
5077
5078         root = root->fs_info->extent_root;
5079         list_for_each_entry(tmp, &delete_list, list) {
5080                 if (tmp->found_rec == 0)
5081                         continue;
5082                 key.objectid = tmp->start;
5083                 key.type = BTRFS_EXTENT_ITEM_KEY;
5084                 key.offset = tmp->nr;
5085
5086                 /* Shouldn't happen but just in case */
5087                 if (tmp->metadata) {
5088                         fprintf(stderr, "Well this shouldn't happen, extent "
5089                                 "record overlaps but is metadata? "
5090                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5091                         abort();
5092                 }
5093
5094                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5095                 if (ret) {
5096                         if (ret > 0)
5097                                 ret = -EINVAL;
5098                         goto out;
5099                 }
5100                 ret = btrfs_del_item(trans, root, path);
5101                 if (ret)
5102                         goto out;
5103                 btrfs_release_path(path);
5104                 nr_del++;
5105         }
5106
5107 out:
5108         while (!list_empty(&delete_list)) {
5109                 tmp = list_entry(delete_list.next, struct extent_record, list);
5110                 list_del_init(&tmp->list);
5111                 if (tmp == rec)
5112                         continue;
5113                 free(tmp);
5114         }
5115
5116         while (!list_empty(&rec->dups)) {
5117                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5118                 list_del_init(&tmp->list);
5119                 free(tmp);
5120         }
5121
5122         btrfs_free_path(path);
5123
5124         if (!ret && !nr_del)
5125                 rec->num_duplicates = 0;
5126
5127         return ret ? ret : nr_del;
5128 }
5129
5130 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5131                                   struct btrfs_fs_info *info,
5132                                   struct btrfs_path *path,
5133                                   struct cache_tree *extent_cache,
5134                                   struct extent_record *rec)
5135 {
5136         struct btrfs_root *root;
5137         struct extent_backref *back;
5138         struct data_backref *dback;
5139         struct cache_extent *cache;
5140         struct btrfs_file_extent_item *fi;
5141         struct btrfs_key key;
5142         u64 bytenr, bytes;
5143         int ret;
5144
5145         list_for_each_entry(back, &rec->backrefs, list) {
5146                 dback = (struct data_backref *)back;
5147
5148                 /* We found this one, we don't need to do a lookup */
5149                 if (dback->found_ref)
5150                         continue;
5151                 /* Don't care about full backrefs (poor unloved backrefs) */
5152                 if (back->full_backref)
5153                         continue;
5154                 key.objectid = dback->root;
5155                 key.type = BTRFS_ROOT_ITEM_KEY;
5156                 key.offset = (u64)-1;
5157
5158                 root = btrfs_read_fs_root(info, &key);
5159
5160                 /* No root, definitely a bad ref, skip */
5161                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5162                         continue;
5163                 /* Other err, exit */
5164                 if (IS_ERR(root))
5165                         return PTR_ERR(root);
5166
5167                 key.objectid = dback->owner;
5168                 key.type = BTRFS_EXTENT_DATA_KEY;
5169                 key.offset = dback->offset;
5170                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5171                 if (ret) {
5172                         btrfs_release_path(path);
5173                         if (ret < 0)
5174                                 return ret;
5175                         /* Didn't find it, we can carry on */
5176                         ret = 0;
5177                         continue;
5178                 }
5179
5180                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5181                                     struct btrfs_file_extent_item);
5182                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5183                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5184                 btrfs_release_path(path);
5185                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5186                 if (cache) {
5187                         struct extent_record *tmp;
5188                         tmp = container_of(cache, struct extent_record, cache);
5189
5190                         /*
5191                          * If we found an extent record for the bytenr for this
5192                          * particular backref then we can't add it to our
5193                          * current extent record.  We only want to add backrefs
5194                          * that don't have a corresponding extent item in the
5195                          * extent tree since they likely belong to this record
5196                          * and we need to fix it if it doesn't match bytenrs.
5197                          */
5198                         if  (tmp->found_rec)
5199                                 continue;
5200                 }
5201
5202                 dback->found_ref += 1;
5203                 dback->disk_bytenr = bytenr;
5204                 dback->bytes = bytes;
5205
5206                 /*
5207                  * Set this so the verify backref code knows not to trust the
5208                  * values in this backref.
5209                  */
5210                 back->broken = 1;
5211         }
5212
5213         return 0;
5214 }
5215
5216 /*
5217  * when an incorrect extent item is found, this will delete
5218  * all of the existing entries for it and recreate them
5219  * based on what the tree scan found.
5220  */
5221 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5222                              struct btrfs_fs_info *info,
5223                              struct cache_tree *extent_cache,
5224                              struct extent_record *rec)
5225 {
5226         int ret;
5227         struct btrfs_path *path;
5228         struct list_head *cur = rec->backrefs.next;
5229         struct cache_extent *cache;
5230         struct extent_backref *back;
5231         int allocated = 0;
5232         u64 flags = 0;
5233
5234         /*
5235          * remember our flags for recreating the extent.
5236          * FIXME, if we have cleared extent tree, we can not
5237          * lookup extent info in extent tree.
5238          */
5239         if (!init_extent_tree) {
5240                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5241                                         rec->start, rec->max_size,
5242                                         rec->metadata, NULL, &flags);
5243                 if (ret < 0)
5244                         flags = 0;
5245         } else {
5246                 flags = 0;
5247         }
5248
5249         path = btrfs_alloc_path();
5250         if (!path)
5251                 return -ENOMEM;
5252
5253         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5254                 /*
5255                  * Sometimes the backrefs themselves are so broken they don't
5256                  * get attached to any meaningful rec, so first go back and
5257                  * check any of our backrefs that we couldn't find and throw
5258                  * them into the list if we find the backref so that
5259                  * verify_backrefs can figure out what to do.
5260                  */
5261                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5262                                              rec);
5263                 if (ret < 0)
5264                         goto out;
5265         }
5266
5267         /* step one, make sure all of the backrefs agree */
5268         ret = verify_backrefs(trans, info, path, rec);
5269         if (ret < 0)
5270                 goto out;
5271
5272         /* step two, delete all the existing records */
5273         ret = delete_extent_records(trans, info->extent_root, path,
5274                                     rec->start, rec->max_size);
5275
5276         if (ret < 0)
5277                 goto out;
5278
5279         /* was this block corrupt?  If so, don't add references to it */
5280         cache = lookup_cache_extent(info->corrupt_blocks,
5281                                     rec->start, rec->max_size);
5282         if (cache) {
5283                 ret = 0;
5284                 goto out;
5285         }
5286
5287         /* step three, recreate all the refs we did find */
5288         while(cur != &rec->backrefs) {
5289                 back = list_entry(cur, struct extent_backref, list);
5290                 cur = cur->next;
5291
5292                 /*
5293                  * if we didn't find any references, don't create a
5294                  * new extent record
5295                  */
5296                 if (!back->found_ref)
5297                         continue;
5298
5299                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5300                 allocated = 1;
5301
5302                 if (ret)
5303                         goto out;
5304         }
5305 out:
5306         btrfs_free_path(path);
5307         return ret;
5308 }
5309
5310 /* right now we only prune from the extent allocation tree */
5311 static int prune_one_block(struct btrfs_trans_handle *trans,
5312                            struct btrfs_fs_info *info,
5313                            struct btrfs_corrupt_block *corrupt)
5314 {
5315         int ret;
5316         struct btrfs_path path;
5317         struct extent_buffer *eb;
5318         u64 found;
5319         int slot;
5320         int nritems;
5321         int level = corrupt->level + 1;
5322
5323         btrfs_init_path(&path);
5324 again:
5325         /* we want to stop at the parent to our busted block */
5326         path.lowest_level = level;
5327
5328         ret = btrfs_search_slot(trans, info->extent_root,
5329                                 &corrupt->key, &path, -1, 1);
5330
5331         if (ret < 0)
5332                 goto out;
5333
5334         eb = path.nodes[level];
5335         if (!eb) {
5336                 ret = -ENOENT;
5337                 goto out;
5338         }
5339
5340         /*
5341          * hopefully the search gave us the block we want to prune,
5342          * lets try that first
5343          */
5344         slot = path.slots[level];
5345         found =  btrfs_node_blockptr(eb, slot);
5346         if (found == corrupt->cache.start)
5347                 goto del_ptr;
5348
5349         nritems = btrfs_header_nritems(eb);
5350
5351         /* the search failed, lets scan this node and hope we find it */
5352         for (slot = 0; slot < nritems; slot++) {
5353                 found =  btrfs_node_blockptr(eb, slot);
5354                 if (found == corrupt->cache.start)
5355                         goto del_ptr;
5356         }
5357         /*
5358          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5359          * to this block
5360          */
5361         if (eb == info->extent_root->node) {
5362                 ret = -ENOENT;
5363                 goto out;
5364         } else {
5365                 level++;
5366                 btrfs_release_path(&path);
5367                 goto again;
5368         }
5369
5370 del_ptr:
5371         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5372         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5373
5374 out:
5375         btrfs_release_path(&path);
5376         return ret;
5377 }
5378
5379 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5380                                 struct btrfs_fs_info *info)
5381 {
5382         struct cache_extent *cache;
5383         struct btrfs_corrupt_block *corrupt;
5384
5385         cache = search_cache_extent(info->corrupt_blocks, 0);
5386         while (1) {
5387                 if (!cache)
5388                         break;
5389                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5390                 prune_one_block(trans, info, corrupt);
5391                 cache = next_cache_extent(cache);
5392         }
5393         return 0;
5394 }
5395
5396 static void free_corrupt_block(struct cache_extent *cache)
5397 {
5398         struct btrfs_corrupt_block *corrupt;
5399
5400         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5401         free(corrupt);
5402 }
5403
5404 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5405
5406 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5407 {
5408         struct btrfs_block_group_cache *cache;
5409         u64 start, end;
5410         int ret;
5411
5412         while (1) {
5413                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5414                                             &start, &end, EXTENT_DIRTY);
5415                 if (ret)
5416                         break;
5417                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5418                                    GFP_NOFS);
5419         }
5420
5421         start = 0;
5422         while (1) {
5423                 cache = btrfs_lookup_first_block_group(fs_info, start);
5424                 if (!cache)
5425                         break;
5426                 if (cache->cached)
5427                         cache->cached = 0;
5428                 start = cache->key.objectid + cache->key.offset;
5429         }
5430 }
5431
5432 static int check_extent_refs(struct btrfs_trans_handle *trans,
5433                              struct btrfs_root *root,
5434                              struct cache_tree *extent_cache)
5435 {
5436         struct extent_record *rec;
5437         struct cache_extent *cache;
5438         int err = 0;
5439         int ret = 0;
5440         int fixed = 0;
5441         int had_dups = 0;
5442
5443         if (repair) {
5444                 /*
5445                  * if we're doing a repair, we have to make sure
5446                  * we don't allocate from the problem extents.
5447                  * In the worst case, this will be all the
5448                  * extents in the FS
5449                  */
5450                 cache = search_cache_extent(extent_cache, 0);
5451                 while(cache) {
5452                         rec = container_of(cache, struct extent_record, cache);
5453                         btrfs_pin_extent(root->fs_info,
5454                                          rec->start, rec->max_size);
5455                         cache = next_cache_extent(cache);
5456                 }
5457
5458                 /* pin down all the corrupted blocks too */
5459                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5460                 while(cache) {
5461                         btrfs_pin_extent(root->fs_info,
5462                                          cache->start, cache->size);
5463                         cache = next_cache_extent(cache);
5464                 }
5465                 prune_corrupt_blocks(trans, root->fs_info);
5466                 reset_cached_block_groups(root->fs_info);
5467         }
5468
5469         /*
5470          * We need to delete any duplicate entries we find first otherwise we
5471          * could mess up the extent tree when we have backrefs that actually
5472          * belong to a different extent item and not the weird duplicate one.
5473          */
5474         while (repair && !list_empty(&duplicate_extents)) {
5475                 rec = list_entry(duplicate_extents.next, struct extent_record,
5476                                  list);
5477                 list_del_init(&rec->list);
5478
5479                 /* Sometimes we can find a backref before we find an actual
5480                  * extent, so we need to process it a little bit to see if there
5481                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5482                  * if this is a backref screwup.  If we need to delete stuff
5483                  * process_duplicates() will return 0, otherwise it will return
5484                  * 1 and we
5485                  */
5486                 if (process_duplicates(root, extent_cache, rec))
5487                         continue;
5488                 ret = delete_duplicate_records(trans, root, rec);
5489                 if (ret < 0)
5490                         return ret;
5491                 /*
5492                  * delete_duplicate_records will return the number of entries
5493                  * deleted, so if it's greater than 0 then we know we actually
5494                  * did something and we need to remove.
5495                  */
5496                 if (ret)
5497                         had_dups = 1;
5498         }
5499
5500         if (had_dups)
5501                 return -EAGAIN;
5502
5503         while(1) {
5504                 fixed = 0;
5505                 cache = search_cache_extent(extent_cache, 0);
5506                 if (!cache)
5507                         break;
5508                 rec = container_of(cache, struct extent_record, cache);
5509                 if (rec->num_duplicates) {
5510                         fprintf(stderr, "extent item %llu has multiple extent "
5511                                 "items\n", (unsigned long long)rec->start);
5512                         err = 1;
5513                 }
5514
5515                 if (rec->refs != rec->extent_item_refs) {
5516                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5517                                 (unsigned long long)rec->start,
5518                                 (unsigned long long)rec->nr);
5519                         fprintf(stderr, "extent item %llu, found %llu\n",
5520                                 (unsigned long long)rec->extent_item_refs,
5521                                 (unsigned long long)rec->refs);
5522                         if (!fixed && repair) {
5523                                 ret = fixup_extent_refs(trans, root->fs_info,
5524                                                         extent_cache, rec);
5525                                 if (ret)
5526                                         goto repair_abort;
5527                                 fixed = 1;
5528                         }
5529                         err = 1;
5530
5531                 }
5532                 if (all_backpointers_checked(rec, 1)) {
5533                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5534                                 (unsigned long long)rec->start,
5535                                 (unsigned long long)rec->nr);
5536
5537                         if (!fixed && repair) {
5538                                 ret = fixup_extent_refs(trans, root->fs_info,
5539                                                         extent_cache, rec);
5540                                 if (ret)
5541                                         goto repair_abort;
5542                                 fixed = 1;
5543                         }
5544
5545                         err = 1;
5546                 }
5547                 if (!rec->owner_ref_checked) {
5548                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5549                                 (unsigned long long)rec->start,
5550                                 (unsigned long long)rec->nr);
5551                         if (!fixed && repair) {
5552                                 ret = fixup_extent_refs(trans, root->fs_info,
5553                                                         extent_cache, rec);
5554                                 if (ret)
5555                                         goto repair_abort;
5556                                 fixed = 1;
5557                         }
5558                         err = 1;
5559                 }
5560
5561                 remove_cache_extent(extent_cache, cache);
5562                 free_all_extent_backrefs(rec);
5563                 free(rec);
5564         }
5565 repair_abort:
5566         if (repair) {
5567                 if (ret && ret != -EAGAIN) {
5568                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5569                         exit(1);
5570                 } else if (!ret) {
5571                         btrfs_fix_block_accounting(trans, root);
5572                 }
5573                 if (err)
5574                         fprintf(stderr, "repaired damaged extent references\n");
5575                 return ret;
5576         }
5577         return err;
5578 }
5579
5580 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5581 {
5582         u64 stripe_size;
5583
5584         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5585                 stripe_size = length;
5586                 stripe_size /= num_stripes;
5587         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5588                 stripe_size = length * 2;
5589                 stripe_size /= num_stripes;
5590         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5591                 stripe_size = length;
5592                 stripe_size /= (num_stripes - 1);
5593         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5594                 stripe_size = length;
5595                 stripe_size /= (num_stripes - 2);
5596         } else {
5597                 stripe_size = length;
5598         }
5599         return stripe_size;
5600 }
5601
5602 static int check_chunk_refs(struct chunk_record *chunk_rec,
5603                             struct block_group_tree *block_group_cache,
5604                             struct device_extent_tree *dev_extent_cache,
5605                             int silent)
5606 {
5607         struct cache_extent *block_group_item;
5608         struct block_group_record *block_group_rec;
5609         struct cache_extent *dev_extent_item;
5610         struct device_extent_record *dev_extent_rec;
5611         u64 devid;
5612         u64 offset;
5613         u64 length;
5614         int i;
5615         int ret = 0;
5616
5617         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5618                                                chunk_rec->offset,
5619                                                chunk_rec->length);
5620         if (block_group_item) {
5621                 block_group_rec = container_of(block_group_item,
5622                                                struct block_group_record,
5623                                                cache);
5624                 if (chunk_rec->length != block_group_rec->offset ||
5625                     chunk_rec->offset != block_group_rec->objectid ||
5626                     chunk_rec->type_flags != block_group_rec->flags) {
5627                         if (!silent)
5628                                 fprintf(stderr,
5629                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5630                                         chunk_rec->objectid,
5631                                         chunk_rec->type,
5632                                         chunk_rec->offset,
5633                                         chunk_rec->length,
5634                                         chunk_rec->offset,
5635                                         chunk_rec->type_flags,
5636                                         block_group_rec->objectid,
5637                                         block_group_rec->type,
5638                                         block_group_rec->offset,
5639                                         block_group_rec->offset,
5640                                         block_group_rec->objectid,
5641                                         block_group_rec->flags);
5642                         ret = -1;
5643                 } else {
5644                         list_del_init(&block_group_rec->list);
5645                         chunk_rec->bg_rec = block_group_rec;
5646                 }
5647         } else {
5648                 if (!silent)
5649                         fprintf(stderr,
5650                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5651                                 chunk_rec->objectid,
5652                                 chunk_rec->type,
5653                                 chunk_rec->offset,
5654                                 chunk_rec->length,
5655                                 chunk_rec->offset,
5656                                 chunk_rec->type_flags);
5657                 ret = -1;
5658         }
5659
5660         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5661                                     chunk_rec->num_stripes);
5662         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5663                 devid = chunk_rec->stripes[i].devid;
5664                 offset = chunk_rec->stripes[i].offset;
5665                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5666                                                        devid, offset, length);
5667                 if (dev_extent_item) {
5668                         dev_extent_rec = container_of(dev_extent_item,
5669                                                 struct device_extent_record,
5670                                                 cache);
5671                         if (dev_extent_rec->objectid != devid ||
5672                             dev_extent_rec->offset != offset ||
5673                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5674                             dev_extent_rec->length != length) {
5675                                 if (!silent)
5676                                         fprintf(stderr,
5677                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5678                                                 chunk_rec->objectid,
5679                                                 chunk_rec->type,
5680                                                 chunk_rec->offset,
5681                                                 chunk_rec->stripes[i].devid,
5682                                                 chunk_rec->stripes[i].offset,
5683                                                 dev_extent_rec->objectid,
5684                                                 dev_extent_rec->offset,
5685                                                 dev_extent_rec->length);
5686                                 ret = -1;
5687                         } else {
5688                                 list_move(&dev_extent_rec->chunk_list,
5689                                           &chunk_rec->dextents);
5690                         }
5691                 } else {
5692                         if (!silent)
5693                                 fprintf(stderr,
5694                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5695                                         chunk_rec->objectid,
5696                                         chunk_rec->type,
5697                                         chunk_rec->offset,
5698                                         chunk_rec->stripes[i].devid,
5699                                         chunk_rec->stripes[i].offset);
5700                         ret = -1;
5701                 }
5702         }
5703         return ret;
5704 }
5705
5706 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5707 int check_chunks(struct cache_tree *chunk_cache,
5708                  struct block_group_tree *block_group_cache,
5709                  struct device_extent_tree *dev_extent_cache,
5710                  struct list_head *good, struct list_head *bad, int silent)
5711 {
5712         struct cache_extent *chunk_item;
5713         struct chunk_record *chunk_rec;
5714         struct block_group_record *bg_rec;
5715         struct device_extent_record *dext_rec;
5716         int err;
5717         int ret = 0;
5718
5719         chunk_item = first_cache_extent(chunk_cache);
5720         while (chunk_item) {
5721                 chunk_rec = container_of(chunk_item, struct chunk_record,
5722                                          cache);
5723                 err = check_chunk_refs(chunk_rec, block_group_cache,
5724                                        dev_extent_cache, silent);
5725                 if (err) {
5726                         ret = err;
5727                         if (bad)
5728                                 list_add_tail(&chunk_rec->list, bad);
5729                 } else {
5730                         if (good)
5731                                 list_add_tail(&chunk_rec->list, good);
5732                 }
5733
5734                 chunk_item = next_cache_extent(chunk_item);
5735         }
5736
5737         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5738                 if (!silent)
5739                         fprintf(stderr,
5740                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5741                                 bg_rec->objectid,
5742                                 bg_rec->offset,
5743                                 bg_rec->flags);
5744                 if (!ret)
5745                         ret = 1;
5746         }
5747
5748         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5749                             chunk_list) {
5750                 if (!silent)
5751                         fprintf(stderr,
5752                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5753                                 dext_rec->objectid,
5754                                 dext_rec->offset,
5755                                 dext_rec->length);
5756                 if (!ret)
5757                         ret = 1;
5758         }
5759         return ret;
5760 }
5761
5762
5763 static int check_device_used(struct device_record *dev_rec,
5764                              struct device_extent_tree *dext_cache)
5765 {
5766         struct cache_extent *cache;
5767         struct device_extent_record *dev_extent_rec;
5768         u64 total_byte = 0;
5769
5770         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5771         while (cache) {
5772                 dev_extent_rec = container_of(cache,
5773                                               struct device_extent_record,
5774                                               cache);
5775                 if (dev_extent_rec->objectid != dev_rec->devid)
5776                         break;
5777
5778                 list_del(&dev_extent_rec->device_list);
5779                 total_byte += dev_extent_rec->length;
5780                 cache = next_cache_extent(cache);
5781         }
5782
5783         if (total_byte != dev_rec->byte_used) {
5784                 fprintf(stderr,
5785                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5786                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5787                         dev_rec->type, dev_rec->offset);
5788                 return -1;
5789         } else {
5790                 return 0;
5791         }
5792 }
5793
5794 /* check btrfs_dev_item -> btrfs_dev_extent */
5795 static int check_devices(struct rb_root *dev_cache,
5796                          struct device_extent_tree *dev_extent_cache)
5797 {
5798         struct rb_node *dev_node;
5799         struct device_record *dev_rec;
5800         struct device_extent_record *dext_rec;
5801         int err;
5802         int ret = 0;
5803
5804         dev_node = rb_first(dev_cache);
5805         while (dev_node) {
5806                 dev_rec = container_of(dev_node, struct device_record, node);
5807                 err = check_device_used(dev_rec, dev_extent_cache);
5808                 if (err)
5809                         ret = err;
5810
5811                 dev_node = rb_next(dev_node);
5812         }
5813         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5814                             device_list) {
5815                 fprintf(stderr,
5816                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5817                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5818                 if (!ret)
5819                         ret = 1;
5820         }
5821         return ret;
5822 }
5823
5824 static int check_chunks_and_extents(struct btrfs_root *root)
5825 {
5826         struct rb_root dev_cache;
5827         struct cache_tree chunk_cache;
5828         struct block_group_tree block_group_cache;
5829         struct device_extent_tree dev_extent_cache;
5830         struct cache_tree extent_cache;
5831         struct cache_tree seen;
5832         struct cache_tree pending;
5833         struct cache_tree reada;
5834         struct cache_tree nodes;
5835         struct cache_tree corrupt_blocks;
5836         struct btrfs_path path;
5837         struct btrfs_key key;
5838         struct btrfs_key found_key;
5839         int ret, err = 0;
5840         u64 last = 0;
5841         struct block_info *bits;
5842         int bits_nr;
5843         struct extent_buffer *leaf;
5844         struct btrfs_trans_handle *trans = NULL;
5845         int slot;
5846         struct btrfs_root_item ri;
5847         struct list_head dropping_trees;
5848
5849         dev_cache = RB_ROOT;
5850         cache_tree_init(&chunk_cache);
5851         block_group_tree_init(&block_group_cache);
5852         device_extent_tree_init(&dev_extent_cache);
5853
5854         cache_tree_init(&extent_cache);
5855         cache_tree_init(&seen);
5856         cache_tree_init(&pending);
5857         cache_tree_init(&nodes);
5858         cache_tree_init(&reada);
5859         cache_tree_init(&corrupt_blocks);
5860         INIT_LIST_HEAD(&dropping_trees);
5861
5862         if (repair) {
5863                 trans = btrfs_start_transaction(root, 1);
5864                 if (IS_ERR(trans)) {
5865                         fprintf(stderr, "Error starting transaction\n");
5866                         return PTR_ERR(trans);
5867                 }
5868                 root->fs_info->fsck_extent_cache = &extent_cache;
5869                 root->fs_info->free_extent_hook = free_extent_hook;
5870                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5871         }
5872
5873         bits_nr = 1024;
5874         bits = malloc(bits_nr * sizeof(struct block_info));
5875         if (!bits) {
5876                 perror("malloc");
5877                 exit(1);
5878         }
5879
5880 again:
5881         add_root_to_pending(root->fs_info->tree_root->node,
5882                             &extent_cache, &pending, &seen, &nodes,
5883                             &root->fs_info->tree_root->root_key);
5884
5885         add_root_to_pending(root->fs_info->chunk_root->node,
5886                             &extent_cache, &pending, &seen, &nodes,
5887                             &root->fs_info->chunk_root->root_key);
5888
5889         btrfs_init_path(&path);
5890         key.offset = 0;
5891         key.objectid = 0;
5892         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
5893         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
5894                                         &key, &path, 0, 0);
5895         BUG_ON(ret < 0);
5896         while(1) {
5897                 leaf = path.nodes[0];
5898                 slot = path.slots[0];
5899                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
5900                         ret = btrfs_next_leaf(root, &path);
5901                         if (ret != 0)
5902                                 break;
5903                         leaf = path.nodes[0];
5904                         slot = path.slots[0];
5905                 }
5906                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
5907                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
5908                         unsigned long offset;
5909                         struct extent_buffer *buf;
5910
5911                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5912                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
5913                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
5914                                 buf = read_tree_block(root->fs_info->tree_root,
5915                                                       btrfs_root_bytenr(&ri),
5916                                                       btrfs_level_size(root,
5917                                                       btrfs_root_level(&ri)),
5918                                                       0);
5919                                 if (!buf) {
5920                                         ret = -EIO;
5921                                         goto out;
5922                                 }
5923                                 add_root_to_pending(buf, &extent_cache,
5924                                                     &pending, &seen, &nodes,
5925                                                     &found_key);
5926                                 free_extent_buffer(buf);
5927                         } else {
5928                                 struct dropping_root_item_record *dri_rec;
5929                                 dri_rec = malloc(sizeof(*dri_rec));
5930                                 if (!dri_rec) {
5931                                         perror("malloc");
5932                                         exit(1);
5933                                 }
5934                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
5935                                 memcpy(&dri_rec->found_key, &found_key,
5936                                        sizeof(found_key));
5937                                 list_add_tail(&dri_rec->list, &dropping_trees);
5938                         }
5939                 }
5940                 path.slots[0]++;
5941         }
5942         btrfs_release_path(&path);
5943         while (1) {
5944                 ret = run_next_block(trans, root, bits, bits_nr, &last,
5945                                      &pending, &seen, &reada, &nodes,
5946                                      &extent_cache, &chunk_cache, &dev_cache,
5947                                      &block_group_cache, &dev_extent_cache,
5948                                      NULL);
5949                 if (ret != 0)
5950                         break;
5951         }
5952
5953         while (!list_empty(&dropping_trees)) {
5954                 struct dropping_root_item_record *rec;
5955                 struct extent_buffer *buf;
5956                 rec = list_entry(dropping_trees.next,
5957                                  struct dropping_root_item_record, list);
5958                 last = 0;
5959                 if (!bits) {
5960                         perror("realloc");
5961                         exit(1);
5962                 }
5963                 buf = read_tree_block(root->fs_info->tree_root,
5964                                       btrfs_root_bytenr(&rec->ri),
5965                                       btrfs_level_size(root,
5966                                       btrfs_root_level(&rec->ri)), 0);
5967                 if (!buf) {
5968                         ret = -EIO;
5969                         goto out;
5970                 }
5971                 add_root_to_pending(buf, &extent_cache, &pending,
5972                                     &seen, &nodes, &rec->found_key);
5973                 while (1) {
5974                         ret = run_next_block(trans, root, bits, bits_nr, &last,
5975                                              &pending, &seen, &reada,
5976                                              &nodes, &extent_cache,
5977                                              &chunk_cache, &dev_cache,
5978                                              &block_group_cache,
5979                                              &dev_extent_cache,
5980                                              &rec->ri);
5981                         if (ret != 0)
5982                                 break;
5983                 }
5984                 free_extent_buffer(buf);
5985                 list_del(&rec->list);
5986                 free(rec);
5987         }
5988
5989         if (ret >= 0)
5990                 ret = check_extent_refs(trans, root, &extent_cache);
5991         if (ret == -EAGAIN) {
5992                 ret = btrfs_commit_transaction(trans, root);
5993                 if (ret)
5994                         goto out;
5995
5996                 trans = btrfs_start_transaction(root, 1);
5997                 if (IS_ERR(trans)) {
5998                         ret = PTR_ERR(trans);
5999                         goto out;
6000                 }
6001
6002                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6003                 free_extent_cache_tree(&seen);
6004                 free_extent_cache_tree(&pending);
6005                 free_extent_cache_tree(&reada);
6006                 free_extent_cache_tree(&nodes);
6007                 free_extent_record_cache(root->fs_info, &extent_cache);
6008                 goto again;
6009         }
6010
6011         err = check_chunks(&chunk_cache, &block_group_cache,
6012                            &dev_extent_cache, NULL, NULL, 0);
6013         if (err && !ret)
6014                 ret = err;
6015
6016         err = check_devices(&dev_cache, &dev_extent_cache);
6017         if (err && !ret)
6018                 ret = err;
6019
6020         if (trans) {
6021                 err = btrfs_commit_transaction(trans, root);
6022                 if (!ret)
6023                         ret = err;
6024         }
6025 out:
6026         if (repair) {
6027                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6028                 root->fs_info->fsck_extent_cache = NULL;
6029                 root->fs_info->free_extent_hook = NULL;
6030                 root->fs_info->corrupt_blocks = NULL;
6031         }
6032         free(bits);
6033         free_chunk_cache_tree(&chunk_cache);
6034         free_device_cache_tree(&dev_cache);
6035         free_block_group_tree(&block_group_cache);
6036         free_device_extent_tree(&dev_extent_cache);
6037         free_extent_cache_tree(&seen);
6038         free_extent_cache_tree(&pending);
6039         free_extent_cache_tree(&reada);
6040         free_extent_cache_tree(&nodes);
6041         return ret;
6042 }
6043
6044 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6045                            struct btrfs_root *root, int overwrite)
6046 {
6047         struct extent_buffer *c;
6048         struct extent_buffer *old = root->node;
6049         int level;
6050         int ret;
6051         struct btrfs_disk_key disk_key = {0,0,0};
6052
6053         level = 0;
6054
6055         if (overwrite) {
6056                 c = old;
6057                 extent_buffer_get(c);
6058                 goto init;
6059         }
6060         c = btrfs_alloc_free_block(trans, root,
6061                                    btrfs_level_size(root, 0),
6062                                    root->root_key.objectid,
6063                                    &disk_key, level, 0, 0);
6064         if (IS_ERR(c)) {
6065                 c = old;
6066                 extent_buffer_get(c);
6067                 overwrite = 1;
6068         }
6069 init:
6070         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6071         btrfs_set_header_level(c, level);
6072         btrfs_set_header_bytenr(c, c->start);
6073         btrfs_set_header_generation(c, trans->transid);
6074         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6075         btrfs_set_header_owner(c, root->root_key.objectid);
6076
6077         write_extent_buffer(c, root->fs_info->fsid,
6078                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6079
6080         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6081                             btrfs_header_chunk_tree_uuid(c),
6082                             BTRFS_UUID_SIZE);
6083
6084         btrfs_mark_buffer_dirty(c);
6085         /*
6086          * this case can happen in the following case:
6087          *
6088          * 1.overwrite previous root.
6089          *
6090          * 2.reinit reloc data root, this is because we skip pin
6091          * down reloc data tree before which means we can allocate
6092          * same block bytenr here.
6093          */
6094         if (old->start == c->start) {
6095                 btrfs_set_root_generation(&root->root_item,
6096                                           trans->transid);
6097                 root->root_item.level = btrfs_header_level(root->node);
6098                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6099                                         &root->root_key, &root->root_item);
6100                 if (ret) {
6101                         free_extent_buffer(c);
6102                         return ret;
6103                 }
6104         }
6105         free_extent_buffer(old);
6106         root->node = c;
6107         add_root_to_dirty_list(root);
6108         return 0;
6109 }
6110
6111 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6112                                 struct extent_buffer *eb, int tree_root)
6113 {
6114         struct extent_buffer *tmp;
6115         struct btrfs_root_item *ri;
6116         struct btrfs_key key;
6117         u64 bytenr;
6118         u32 leafsize;
6119         int level = btrfs_header_level(eb);
6120         int nritems;
6121         int ret;
6122         int i;
6123
6124         /*
6125          * If we have pinned this block before, don't pin it again.
6126          * This can not only avoid forever loop with broken filesystem
6127          * but also give us some speedups.
6128          */
6129         if (test_range_bit(&fs_info->pinned_extents, eb->start,
6130                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
6131                 return 0;
6132
6133         btrfs_pin_extent(fs_info, eb->start, eb->len);
6134
6135         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6136         nritems = btrfs_header_nritems(eb);
6137         for (i = 0; i < nritems; i++) {
6138                 if (level == 0) {
6139                         btrfs_item_key_to_cpu(eb, &key, i);
6140                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6141                                 continue;
6142                         /* Skip the extent root and reloc roots */
6143                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6144                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6145                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6146                                 continue;
6147                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6148                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6149
6150                         /*
6151                          * If at any point we start needing the real root we
6152                          * will have to build a stump root for the root we are
6153                          * in, but for now this doesn't actually use the root so
6154                          * just pass in extent_root.
6155                          */
6156                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6157                                               leafsize, 0);
6158                         if (!tmp) {
6159                                 fprintf(stderr, "Error reading root block\n");
6160                                 return -EIO;
6161                         }
6162                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6163                         free_extent_buffer(tmp);
6164                         if (ret)
6165                                 return ret;
6166                 } else {
6167                         bytenr = btrfs_node_blockptr(eb, i);
6168
6169                         /* If we aren't the tree root don't read the block */
6170                         if (level == 1 && !tree_root) {
6171                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6172                                 continue;
6173                         }
6174
6175                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6176                                               leafsize, 0);
6177                         if (!tmp) {
6178                                 fprintf(stderr, "Error reading tree block\n");
6179                                 return -EIO;
6180                         }
6181                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6182                         free_extent_buffer(tmp);
6183                         if (ret)
6184                                 return ret;
6185                 }
6186         }
6187
6188         return 0;
6189 }
6190
6191 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6192 {
6193         int ret;
6194
6195         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6196         if (ret)
6197                 return ret;
6198
6199         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6200 }
6201
6202 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6203 {
6204         struct btrfs_block_group_cache *cache;
6205         struct btrfs_path *path;
6206         struct extent_buffer *leaf;
6207         struct btrfs_chunk *chunk;
6208         struct btrfs_key key;
6209         int ret;
6210         u64 start;
6211
6212         path = btrfs_alloc_path();
6213         if (!path)
6214                 return -ENOMEM;
6215
6216         key.objectid = 0;
6217         key.type = BTRFS_CHUNK_ITEM_KEY;
6218         key.offset = 0;
6219
6220         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6221         if (ret < 0) {
6222                 btrfs_free_path(path);
6223                 return ret;
6224         }
6225
6226         /*
6227          * We do this in case the block groups were screwed up and had alloc
6228          * bits that aren't actually set on the chunks.  This happens with
6229          * restored images every time and could happen in real life I guess.
6230          */
6231         fs_info->avail_data_alloc_bits = 0;
6232         fs_info->avail_metadata_alloc_bits = 0;
6233         fs_info->avail_system_alloc_bits = 0;
6234
6235         /* First we need to create the in-memory block groups */
6236         while (1) {
6237                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6238                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6239                         if (ret < 0) {
6240                                 btrfs_free_path(path);
6241                                 return ret;
6242                         }
6243                         if (ret) {
6244                                 ret = 0;
6245                                 break;
6246                         }
6247                 }
6248                 leaf = path->nodes[0];
6249                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6250                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6251                         path->slots[0]++;
6252                         continue;
6253                 }
6254
6255                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6256                                        struct btrfs_chunk);
6257                 btrfs_add_block_group(fs_info, 0,
6258                                       btrfs_chunk_type(leaf, chunk),
6259                                       key.objectid, key.offset,
6260                                       btrfs_chunk_length(leaf, chunk));
6261                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6262                                  key.offset + btrfs_chunk_length(leaf, chunk),
6263                                  GFP_NOFS);
6264                 path->slots[0]++;
6265         }
6266         start = 0;
6267         while (1) {
6268                 cache = btrfs_lookup_first_block_group(fs_info, start);
6269                 if (!cache)
6270                         break;
6271                 cache->cached = 1;
6272                 start = cache->key.objectid + cache->key.offset;
6273         }
6274
6275         btrfs_free_path(path);
6276         return 0;
6277 }
6278
6279 static int reset_balance(struct btrfs_trans_handle *trans,
6280                          struct btrfs_fs_info *fs_info)
6281 {
6282         struct btrfs_root *root = fs_info->tree_root;
6283         struct btrfs_path *path;
6284         struct extent_buffer *leaf;
6285         struct btrfs_key key;
6286         int del_slot, del_nr = 0;
6287         int ret;
6288         int found = 0;
6289
6290         path = btrfs_alloc_path();
6291         if (!path)
6292                 return -ENOMEM;
6293
6294         key.objectid = BTRFS_BALANCE_OBJECTID;
6295         key.type = BTRFS_BALANCE_ITEM_KEY;
6296         key.offset = 0;
6297
6298         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6299         if (ret) {
6300                 if (ret > 0)
6301                         ret = 0;
6302                 if (!ret)
6303                         goto reinit_data_reloc;
6304                 else
6305                         goto out;
6306         }
6307
6308         ret = btrfs_del_item(trans, root, path);
6309         if (ret)
6310                 goto out;
6311         btrfs_release_path(path);
6312
6313         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6314         key.type = BTRFS_ROOT_ITEM_KEY;
6315         key.offset = 0;
6316
6317         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6318         if (ret < 0)
6319                 goto out;
6320         while (1) {
6321                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6322                         if (!found)
6323                                 break;
6324
6325                         if (del_nr) {
6326                                 ret = btrfs_del_items(trans, root, path,
6327                                                       del_slot, del_nr);
6328                                 del_nr = 0;
6329                                 if (ret)
6330                                         goto out;
6331                         }
6332                         key.offset++;
6333                         btrfs_release_path(path);
6334
6335                         found = 0;
6336                         ret = btrfs_search_slot(trans, root, &key, path,
6337                                                 -1, 1);
6338                         if (ret < 0)
6339                                 goto out;
6340                         continue;
6341                 }
6342                 found = 1;
6343                 leaf = path->nodes[0];
6344                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6345                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6346                         break;
6347                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6348                         path->slots[0]++;
6349                         continue;
6350                 }
6351                 if (!del_nr) {
6352                         del_slot = path->slots[0];
6353                         del_nr = 1;
6354                 } else {
6355                         del_nr++;
6356                 }
6357                 path->slots[0]++;
6358         }
6359
6360         if (del_nr) {
6361                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6362                 if (ret)
6363                         goto out;
6364         }
6365         btrfs_release_path(path);
6366
6367 reinit_data_reloc:
6368         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6369         key.type = BTRFS_ROOT_ITEM_KEY;
6370         key.offset = (u64)-1;
6371         root = btrfs_read_fs_root(fs_info, &key);
6372         if (IS_ERR(root)) {
6373                 fprintf(stderr, "Error reading data reloc tree\n");
6374                 return PTR_ERR(root);
6375         }
6376         record_root_in_trans(trans, root);
6377         ret = btrfs_fsck_reinit_root(trans, root, 0);
6378         if (ret)
6379                 goto out;
6380         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6381 out:
6382         btrfs_free_path(path);
6383         return ret;
6384 }
6385
6386 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6387                               struct btrfs_fs_info *fs_info)
6388 {
6389         u64 start = 0;
6390         int ret;
6391
6392         /*
6393          * The only reason we don't do this is because right now we're just
6394          * walking the trees we find and pinning down their bytes, we don't look
6395          * at any of the leaves.  In order to do mixed groups we'd have to check
6396          * the leaves of any fs roots and pin down the bytes for any file
6397          * extents we find.  Not hard but why do it if we don't have to?
6398          */
6399         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6400                 fprintf(stderr, "We don't support re-initing the extent tree "
6401                         "for mixed block groups yet, please notify a btrfs "
6402                         "developer you want to do this so they can add this "
6403                         "functionality.\n");
6404                 return -EINVAL;
6405         }
6406
6407         /*
6408          * first we need to walk all of the trees except the extent tree and pin
6409          * down the bytes that are in use so we don't overwrite any existing
6410          * metadata.
6411          */
6412         ret = pin_metadata_blocks(fs_info);
6413         if (ret) {
6414                 fprintf(stderr, "error pinning down used bytes\n");
6415                 return ret;
6416         }
6417
6418         /*
6419          * Need to drop all the block groups since we're going to recreate all
6420          * of them again.
6421          */
6422         btrfs_free_block_groups(fs_info);
6423         ret = reset_block_groups(fs_info);
6424         if (ret) {
6425                 fprintf(stderr, "error resetting the block groups\n");
6426                 return ret;
6427         }
6428
6429         /* Ok we can allocate now, reinit the extent root */
6430         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6431         if (ret) {
6432                 fprintf(stderr, "extent root initialization failed\n");
6433                 /*
6434                  * When the transaction code is updated we should end the
6435                  * transaction, but for now progs only knows about commit so
6436                  * just return an error.
6437                  */
6438                 return ret;
6439         }
6440
6441         /*
6442          * Now we have all the in-memory block groups setup so we can make
6443          * allocations properly, and the metadata we care about is safe since we
6444          * pinned all of it above.
6445          */
6446         while (1) {
6447                 struct btrfs_block_group_cache *cache;
6448
6449                 cache = btrfs_lookup_first_block_group(fs_info, start);
6450                 if (!cache)
6451                         break;
6452                 start = cache->key.objectid + cache->key.offset;
6453                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6454                                         &cache->key, &cache->item,
6455                                         sizeof(cache->item));
6456                 if (ret) {
6457                         fprintf(stderr, "Error adding block group\n");
6458                         return ret;
6459                 }
6460                 btrfs_extent_post_op(trans, fs_info->extent_root);
6461         }
6462
6463         ret = reset_balance(trans, fs_info);
6464         if (ret)
6465                 fprintf(stderr, "error reseting the pending balance\n");
6466
6467         return ret;
6468 }
6469
6470 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6471 {
6472         struct btrfs_path *path;
6473         struct btrfs_trans_handle *trans;
6474         struct btrfs_key key;
6475         int ret;
6476
6477         printf("Recowing metadata block %llu\n", eb->start);
6478         key.objectid = btrfs_header_owner(eb);
6479         key.type = BTRFS_ROOT_ITEM_KEY;
6480         key.offset = (u64)-1;
6481
6482         root = btrfs_read_fs_root(root->fs_info, &key);
6483         if (IS_ERR(root)) {
6484                 fprintf(stderr, "Couldn't find owner root %llu\n",
6485                         key.objectid);
6486                 return PTR_ERR(root);
6487         }
6488
6489         path = btrfs_alloc_path();
6490         if (!path)
6491                 return -ENOMEM;
6492
6493         trans = btrfs_start_transaction(root, 1);
6494         if (IS_ERR(trans)) {
6495                 btrfs_free_path(path);
6496                 return PTR_ERR(trans);
6497         }
6498
6499         path->lowest_level = btrfs_header_level(eb);
6500         if (path->lowest_level)
6501                 btrfs_node_key_to_cpu(eb, &key, 0);
6502         else
6503                 btrfs_item_key_to_cpu(eb, &key, 0);
6504
6505         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6506         btrfs_commit_transaction(trans, root);
6507         btrfs_free_path(path);
6508         return ret;
6509 }
6510
6511 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6512 {
6513         struct btrfs_path *path;
6514         struct btrfs_trans_handle *trans;
6515         struct btrfs_key key;
6516         int ret;
6517
6518         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6519                bad->key.type, bad->key.offset);
6520         key.objectid = bad->root_id;
6521         key.type = BTRFS_ROOT_ITEM_KEY;
6522         key.offset = (u64)-1;
6523
6524         root = btrfs_read_fs_root(root->fs_info, &key);
6525         if (IS_ERR(root)) {
6526                 fprintf(stderr, "Couldn't find owner root %llu\n",
6527                         key.objectid);
6528                 return PTR_ERR(root);
6529         }
6530
6531         path = btrfs_alloc_path();
6532         if (!path)
6533                 return -ENOMEM;
6534
6535         trans = btrfs_start_transaction(root, 1);
6536         if (IS_ERR(trans)) {
6537                 btrfs_free_path(path);
6538                 return PTR_ERR(trans);
6539         }
6540
6541         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6542         if (ret) {
6543                 if (ret > 0)
6544                         ret = 0;
6545                 goto out;
6546         }
6547         ret = btrfs_del_item(trans, root, path);
6548 out:
6549         btrfs_commit_transaction(trans, root);
6550         btrfs_free_path(path);
6551         return ret;
6552 }
6553
6554 static int zero_log_tree(struct btrfs_root *root)
6555 {
6556         struct btrfs_trans_handle *trans;
6557         int ret;
6558
6559         trans = btrfs_start_transaction(root, 1);
6560         if (IS_ERR(trans)) {
6561                 ret = PTR_ERR(trans);
6562                 return ret;
6563         }
6564         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
6565         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
6566         ret = btrfs_commit_transaction(trans, root);
6567         return ret;
6568 }
6569
6570 static struct option long_options[] = {
6571         { "super", 1, NULL, 's' },
6572         { "repair", 0, NULL, 0 },
6573         { "init-csum-tree", 0, NULL, 0 },
6574         { "init-extent-tree", 0, NULL, 0 },
6575         { "check-data-csum", 0, NULL, 0 },
6576         { "backup", 0, NULL, 0 },
6577         { "subvol-extents", no_argument, NULL, 'E' },
6578         { "qgroup-report", 0, NULL, 'Q' },
6579         { NULL, 0, NULL, 0}
6580 };
6581
6582 const char * const cmd_check_usage[] = {
6583         "btrfs check [options] <device>",
6584         "Check an unmounted btrfs filesystem.",
6585         "",
6586         "-s|--super <superblock>     use this superblock copy",
6587         "-b|--backup                 use the backup root copy",
6588         "--repair                    try to repair the filesystem",
6589         "--init-csum-tree            create a new CRC tree",
6590         "--init-extent-tree          create a new extent tree",
6591         "--check-data-csum           verify checkums of data blocks",
6592         "--qgroup-report             print a report on qgroup consistency",
6593         "--subvol-extents            print subvolume extents and sharing state",
6594         NULL
6595 };
6596
6597 int cmd_check(int argc, char **argv)
6598 {
6599         struct cache_tree root_cache;
6600         struct btrfs_root *root;
6601         struct btrfs_fs_info *info;
6602         u64 bytenr = 0;
6603         u64 subvolid = 0;
6604         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6605         int ret;
6606         u64 num;
6607         int option_index = 0;
6608         int init_csum_tree = 0;
6609         int qgroup_report = 0;
6610         enum btrfs_open_ctree_flags ctree_flags =
6611                 OPEN_CTREE_PARTIAL | OPEN_CTREE_EXCLUSIVE;
6612
6613         while(1) {
6614                 int c;
6615                 c = getopt_long(argc, argv, "as:b", long_options,
6616                                 &option_index);
6617                 if (c < 0)
6618                         break;
6619                 switch(c) {
6620                         case 'a': /* ignored */ break;
6621                         case 'b':
6622                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6623                                 break;
6624                         case 's':
6625                                 num = arg_strtou64(optarg);
6626                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
6627                                         fprintf(stderr,
6628                                                 "ERROR: super mirror should be less than: %d\n",
6629                                                 BTRFS_SUPER_MIRROR_MAX);
6630                                         exit(1);
6631                                 }
6632                                 bytenr = btrfs_sb_offset(((int)num));
6633                                 printf("using SB copy %llu, bytenr %llu\n", num,
6634                                        (unsigned long long)bytenr);
6635                                 break;
6636                         case 'Q':
6637                                 qgroup_report = 1;
6638                                 break;
6639                         case 'E':
6640                                 subvolid = arg_strtou64(optarg);
6641                                 break;
6642                         case '?':
6643                         case 'h':
6644                                 usage(cmd_check_usage);
6645                 }
6646                 if (option_index == 1) {
6647                         printf("enabling repair mode\n");
6648                         repair = 1;
6649                         ctree_flags |= OPEN_CTREE_WRITES;
6650                 } else if (option_index == 2) {
6651                         printf("Creating a new CRC tree\n");
6652                         init_csum_tree = 1;
6653                         repair = 1;
6654                         ctree_flags |= OPEN_CTREE_WRITES;
6655                 } else if (option_index == 3) {
6656                         init_extent_tree = 1;
6657                         ctree_flags |= (OPEN_CTREE_WRITES |
6658                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6659                         repair = 1;
6660                 } else if (option_index == 4) {
6661                         check_data_csum = 1;
6662                 }
6663         }
6664         argc = argc - optind;
6665
6666         if (check_argc_exact(argc, 1))
6667                 usage(cmd_check_usage);
6668
6669         radix_tree_init();
6670         cache_tree_init(&root_cache);
6671
6672         if((ret = check_mounted(argv[optind])) < 0) {
6673                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6674                 goto err_out;
6675         } else if(ret) {
6676                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6677                 ret = -EBUSY;
6678                 goto err_out;
6679         }
6680
6681         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6682         if (!info) {
6683                 fprintf(stderr, "Couldn't open file system\n");
6684                 ret = -EIO;
6685                 goto err_out;
6686         }
6687
6688         root = info->fs_root;
6689         /*
6690          * repair mode will force us to commit transaction which
6691          * will make us fail to load log tree when mounting.
6692          */
6693         if (repair && btrfs_super_log_root(info->super_copy)) {
6694                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
6695                 if (!ret) {
6696                         ret = 1;
6697                         goto close_out;
6698                 }
6699                 ret = zero_log_tree(root);
6700                 if (ret) {
6701                         fprintf(stderr, "fail to zero log tree\n");
6702                         goto close_out;
6703                 }
6704         }
6705
6706         uuid_unparse(info->super_copy->fsid, uuidbuf);
6707         if (qgroup_report) {
6708                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
6709                        uuidbuf);
6710                 ret = qgroup_verify_all(info);
6711                 if (ret == 0)
6712                         print_qgroup_report(1);
6713                 goto close_out;
6714         }
6715         if (subvolid) {
6716                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
6717                        subvolid, argv[optind], uuidbuf);
6718                 ret = print_extent_state(info, subvolid);
6719                 goto close_out;
6720         }
6721         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6722
6723         if (!extent_buffer_uptodate(info->tree_root->node) ||
6724             !extent_buffer_uptodate(info->dev_root->node) ||
6725             !extent_buffer_uptodate(info->chunk_root->node)) {
6726                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6727                 ret = -EIO;
6728                 goto close_out;
6729         }
6730
6731         if (init_extent_tree || init_csum_tree) {
6732                 struct btrfs_trans_handle *trans;
6733
6734                 trans = btrfs_start_transaction(info->extent_root, 0);
6735                 if (IS_ERR(trans)) {
6736                         fprintf(stderr, "Error starting transaction\n");
6737                         ret = PTR_ERR(trans);
6738                         goto close_out;
6739                 }
6740
6741                 if (init_extent_tree) {
6742                         printf("Creating a new extent tree\n");
6743                         ret = reinit_extent_tree(trans, info);
6744                         if (ret)
6745                                 goto close_out;
6746                 }
6747
6748                 if (init_csum_tree) {
6749                         fprintf(stderr, "Reinit crc root\n");
6750                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6751                         if (ret) {
6752                                 fprintf(stderr, "crc root initialization failed\n");
6753                                 ret = -EIO;
6754                                 goto close_out;
6755                         }
6756                 }
6757                 /*
6758                  * Ok now we commit and run the normal fsck, which will add
6759                  * extent entries for all of the items it finds.
6760                  */
6761                 ret = btrfs_commit_transaction(trans, info->extent_root);
6762                 if (ret)
6763                         goto close_out;
6764         }
6765         if (!extent_buffer_uptodate(info->extent_root->node)) {
6766                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6767                 ret = -EIO;
6768                 goto close_out;
6769         }
6770
6771         fprintf(stderr, "checking extents\n");
6772         ret = check_chunks_and_extents(root);
6773         if (ret)
6774                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
6775
6776         fprintf(stderr, "checking free space cache\n");
6777         ret = check_space_cache(root);
6778         if (ret)
6779                 goto out;
6780
6781         /*
6782          * We used to have to have these hole extents in between our real
6783          * extents so if we don't have this flag set we need to make sure there
6784          * are no gaps in the file extents for inodes, otherwise we can just
6785          * ignore it when this happens.
6786          */
6787         no_holes = btrfs_fs_incompat(root->fs_info,
6788                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
6789         fprintf(stderr, "checking fs roots\n");
6790         ret = check_fs_roots(root, &root_cache);
6791         if (ret)
6792                 goto out;
6793
6794         fprintf(stderr, "checking csums\n");
6795         ret = check_csums(root);
6796         if (ret)
6797                 goto out;
6798
6799         fprintf(stderr, "checking root refs\n");
6800         ret = check_root_refs(root, &root_cache);
6801         if (ret)
6802                 goto out;
6803
6804         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
6805                 struct extent_buffer *eb;
6806
6807                 eb = list_first_entry(&root->fs_info->recow_ebs,
6808                                       struct extent_buffer, recow);
6809                 ret = recow_extent_buffer(root, eb);
6810                 if (ret)
6811                         break;
6812         }
6813
6814         while (!list_empty(&delete_items)) {
6815                 struct bad_item *bad;
6816
6817                 bad = list_first_entry(&delete_items, struct bad_item, list);
6818                 list_del_init(&bad->list);
6819                 if (repair)
6820                         ret = delete_bad_item(root, bad);
6821                 free(bad);
6822         }
6823
6824         if (info->quota_enabled) {
6825                 int err;
6826                 fprintf(stderr, "checking quota groups\n");
6827                 err = qgroup_verify_all(info);
6828                 if (err)
6829                         goto out;
6830         }
6831
6832         if (!list_empty(&root->fs_info->recow_ebs)) {
6833                 fprintf(stderr, "Transid errors in file system\n");
6834                 ret = 1;
6835         }
6836 out:
6837         print_qgroup_report(0);
6838         if (found_old_backref) { /*
6839                  * there was a disk format change when mixed
6840                  * backref was in testing tree. The old format
6841                  * existed about one week.
6842                  */
6843                 printf("\n * Found old mixed backref format. "
6844                        "The old format is not supported! *"
6845                        "\n * Please mount the FS in readonly mode, "
6846                        "backup data and re-format the FS. *\n\n");
6847                 ret = 1;
6848         }
6849         printf("found %llu bytes used err is %d\n",
6850                (unsigned long long)bytes_used, ret);
6851         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
6852         printf("total tree bytes: %llu\n",
6853                (unsigned long long)total_btree_bytes);
6854         printf("total fs tree bytes: %llu\n",
6855                (unsigned long long)total_fs_tree_bytes);
6856         printf("total extent tree bytes: %llu\n",
6857                (unsigned long long)total_extent_tree_bytes);
6858         printf("btree space waste bytes: %llu\n",
6859                (unsigned long long)btree_space_waste);
6860         printf("file data blocks allocated: %llu\n referenced %llu\n",
6861                 (unsigned long long)data_bytes_allocated,
6862                 (unsigned long long)data_bytes_referenced);
6863         printf("%s\n", BTRFS_BUILD_VERSION);
6864
6865         free_root_recs_tree(&root_cache);
6866 close_out:
6867         close_ctree(root);
6868 err_out:
6869         return ret;
6870 }