btrfs-progs: Fix the argument requirement for '--subvol-extents'
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42 #include "rbtree-utils.h"
43 #include "backref.h"
44 #include "ulist.h"
45
46 static u64 bytes_used = 0;
47 static u64 total_csum_bytes = 0;
48 static u64 total_btree_bytes = 0;
49 static u64 total_fs_tree_bytes = 0;
50 static u64 total_extent_tree_bytes = 0;
51 static u64 btree_space_waste = 0;
52 static u64 data_bytes_allocated = 0;
53 static u64 data_bytes_referenced = 0;
54 static int found_old_backref = 0;
55 static LIST_HEAD(duplicate_extents);
56 static LIST_HEAD(delete_items);
57 static int repair = 0;
58 static int no_holes = 0;
59 static int init_extent_tree = 0;
60 static int check_data_csum = 0;
61
62 struct extent_backref {
63         struct list_head list;
64         unsigned int is_data:1;
65         unsigned int found_extent_tree:1;
66         unsigned int full_backref:1;
67         unsigned int found_ref:1;
68         unsigned int broken:1;
69 };
70
71 struct data_backref {
72         struct extent_backref node;
73         union {
74                 u64 parent;
75                 u64 root;
76         };
77         u64 owner;
78         u64 offset;
79         u64 disk_bytenr;
80         u64 bytes;
81         u64 ram_bytes;
82         u32 num_refs;
83         u32 found_ref;
84 };
85
86 struct tree_backref {
87         struct extent_backref node;
88         union {
89                 u64 parent;
90                 u64 root;
91         };
92 };
93
94 struct extent_record {
95         struct list_head backrefs;
96         struct list_head dups;
97         struct list_head list;
98         struct cache_extent cache;
99         struct btrfs_disk_key parent_key;
100         u64 start;
101         u64 max_size;
102         u64 nr;
103         u64 refs;
104         u64 extent_item_refs;
105         u64 generation;
106         u64 parent_generation;
107         u64 info_objectid;
108         u32 num_duplicates;
109         u8 info_level;
110         unsigned int found_rec:1;
111         unsigned int content_checked:1;
112         unsigned int owner_ref_checked:1;
113         unsigned int is_root:1;
114         unsigned int metadata:1;
115 };
116
117 struct inode_backref {
118         struct list_head list;
119         unsigned int found_dir_item:1;
120         unsigned int found_dir_index:1;
121         unsigned int found_inode_ref:1;
122         unsigned int filetype:8;
123         int errors;
124         unsigned int ref_type;
125         u64 dir;
126         u64 index;
127         u16 namelen;
128         char name[0];
129 };
130
131 struct dropping_root_item_record {
132         struct list_head list;
133         struct btrfs_root_item ri;
134         struct btrfs_key found_key;
135 };
136
137 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
138 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
139 #define REF_ERR_NO_INODE_REF            (1 << 2)
140 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
141 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
142 #define REF_ERR_DUP_INODE_REF           (1 << 5)
143 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
144 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
145 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
146 #define REF_ERR_NO_ROOT_REF             (1 << 9)
147 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
148 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
149 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
150
151 struct inode_record {
152         struct list_head backrefs;
153         unsigned int checked:1;
154         unsigned int merging:1;
155         unsigned int found_inode_item:1;
156         unsigned int found_dir_item:1;
157         unsigned int found_file_extent:1;
158         unsigned int found_csum_item:1;
159         unsigned int some_csum_missing:1;
160         unsigned int nodatasum:1;
161         int errors;
162
163         u64 ino;
164         u32 nlink;
165         u32 imode;
166         u64 isize;
167         u64 nbytes;
168
169         u32 found_link;
170         u64 found_size;
171         u64 extent_start;
172         u64 extent_end;
173         u64 first_extent_gap;
174
175         u32 refs;
176 };
177
178 #define I_ERR_NO_INODE_ITEM             (1 << 0)
179 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
180 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
181 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
182 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
183 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
184 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
185 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
186 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
187 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
188 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
189 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
190 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
191 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
192
193 struct root_backref {
194         struct list_head list;
195         unsigned int found_dir_item:1;
196         unsigned int found_dir_index:1;
197         unsigned int found_back_ref:1;
198         unsigned int found_forward_ref:1;
199         unsigned int reachable:1;
200         int errors;
201         u64 ref_root;
202         u64 dir;
203         u64 index;
204         u16 namelen;
205         char name[0];
206 };
207
208 struct root_record {
209         struct list_head backrefs;
210         struct cache_extent cache;
211         unsigned int found_root_item:1;
212         u64 objectid;
213         u32 found_ref;
214 };
215
216 struct ptr_node {
217         struct cache_extent cache;
218         void *data;
219 };
220
221 struct shared_node {
222         struct cache_extent cache;
223         struct cache_tree root_cache;
224         struct cache_tree inode_cache;
225         struct inode_record *current;
226         u32 refs;
227 };
228
229 struct block_info {
230         u64 start;
231         u32 size;
232 };
233
234 struct walk_control {
235         struct cache_tree shared;
236         struct shared_node *nodes[BTRFS_MAX_LEVEL];
237         int active_node;
238         int root_level;
239 };
240
241 struct bad_item {
242         struct btrfs_key key;
243         u64 root_id;
244         struct list_head list;
245 };
246
247 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
248
249 static void record_root_in_trans(struct btrfs_trans_handle *trans,
250                                  struct btrfs_root *root)
251 {
252         if (root->last_trans != trans->transid) {
253                 root->track_dirty = 1;
254                 root->last_trans = trans->transid;
255                 root->commit_root = root->node;
256                 extent_buffer_get(root->node);
257         }
258 }
259
260 static u8 imode_to_type(u32 imode)
261 {
262 #define S_SHIFT 12
263         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
264                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
265                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
266                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
267                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
268                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
269                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
270                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
271         };
272
273         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
274 #undef S_SHIFT
275 }
276
277 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
278 {
279         struct device_record *rec1;
280         struct device_record *rec2;
281
282         rec1 = rb_entry(node1, struct device_record, node);
283         rec2 = rb_entry(node2, struct device_record, node);
284         if (rec1->devid > rec2->devid)
285                 return -1;
286         else if (rec1->devid < rec2->devid)
287                 return 1;
288         else
289                 return 0;
290 }
291
292 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
293 {
294         struct inode_record *rec;
295         struct inode_backref *backref;
296         struct inode_backref *orig;
297         size_t size;
298
299         rec = malloc(sizeof(*rec));
300         memcpy(rec, orig_rec, sizeof(*rec));
301         rec->refs = 1;
302         INIT_LIST_HEAD(&rec->backrefs);
303
304         list_for_each_entry(orig, &orig_rec->backrefs, list) {
305                 size = sizeof(*orig) + orig->namelen + 1;
306                 backref = malloc(size);
307                 memcpy(backref, orig, size);
308                 list_add_tail(&backref->list, &rec->backrefs);
309         }
310         return rec;
311 }
312
313 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
314 {
315         u64 root_objectid = root->root_key.objectid;
316         int errors = rec->errors;
317
318         if (!errors)
319                 return;
320         /* reloc root errors, we print its corresponding fs root objectid*/
321         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
322                 root_objectid = root->root_key.offset;
323                 fprintf(stderr, "reloc");
324         }
325         fprintf(stderr, "root %llu inode %llu errors %x",
326                 (unsigned long long) root_objectid,
327                 (unsigned long long) rec->ino, rec->errors);
328
329         if (errors & I_ERR_NO_INODE_ITEM)
330                 fprintf(stderr, ", no inode item");
331         if (errors & I_ERR_NO_ORPHAN_ITEM)
332                 fprintf(stderr, ", no orphan item");
333         if (errors & I_ERR_DUP_INODE_ITEM)
334                 fprintf(stderr, ", dup inode item");
335         if (errors & I_ERR_DUP_DIR_INDEX)
336                 fprintf(stderr, ", dup dir index");
337         if (errors & I_ERR_ODD_DIR_ITEM)
338                 fprintf(stderr, ", odd dir item");
339         if (errors & I_ERR_ODD_FILE_EXTENT)
340                 fprintf(stderr, ", odd file extent");
341         if (errors & I_ERR_BAD_FILE_EXTENT)
342                 fprintf(stderr, ", bad file extent");
343         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
344                 fprintf(stderr, ", file extent overlap");
345         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
346                 fprintf(stderr, ", file extent discount");
347         if (errors & I_ERR_DIR_ISIZE_WRONG)
348                 fprintf(stderr, ", dir isize wrong");
349         if (errors & I_ERR_FILE_NBYTES_WRONG)
350                 fprintf(stderr, ", nbytes wrong");
351         if (errors & I_ERR_ODD_CSUM_ITEM)
352                 fprintf(stderr, ", odd csum item");
353         if (errors & I_ERR_SOME_CSUM_MISSING)
354                 fprintf(stderr, ", some csum missing");
355         if (errors & I_ERR_LINK_COUNT_WRONG)
356                 fprintf(stderr, ", link count wrong");
357         fprintf(stderr, "\n");
358 }
359
360 static void print_ref_error(int errors)
361 {
362         if (errors & REF_ERR_NO_DIR_ITEM)
363                 fprintf(stderr, ", no dir item");
364         if (errors & REF_ERR_NO_DIR_INDEX)
365                 fprintf(stderr, ", no dir index");
366         if (errors & REF_ERR_NO_INODE_REF)
367                 fprintf(stderr, ", no inode ref");
368         if (errors & REF_ERR_DUP_DIR_ITEM)
369                 fprintf(stderr, ", dup dir item");
370         if (errors & REF_ERR_DUP_DIR_INDEX)
371                 fprintf(stderr, ", dup dir index");
372         if (errors & REF_ERR_DUP_INODE_REF)
373                 fprintf(stderr, ", dup inode ref");
374         if (errors & REF_ERR_INDEX_UNMATCH)
375                 fprintf(stderr, ", index unmatch");
376         if (errors & REF_ERR_FILETYPE_UNMATCH)
377                 fprintf(stderr, ", filetype unmatch");
378         if (errors & REF_ERR_NAME_TOO_LONG)
379                 fprintf(stderr, ", name too long");
380         if (errors & REF_ERR_NO_ROOT_REF)
381                 fprintf(stderr, ", no root ref");
382         if (errors & REF_ERR_NO_ROOT_BACKREF)
383                 fprintf(stderr, ", no root backref");
384         if (errors & REF_ERR_DUP_ROOT_REF)
385                 fprintf(stderr, ", dup root ref");
386         if (errors & REF_ERR_DUP_ROOT_BACKREF)
387                 fprintf(stderr, ", dup root backref");
388         fprintf(stderr, "\n");
389 }
390
391 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
392                                           u64 ino, int mod)
393 {
394         struct ptr_node *node;
395         struct cache_extent *cache;
396         struct inode_record *rec = NULL;
397         int ret;
398
399         cache = lookup_cache_extent(inode_cache, ino, 1);
400         if (cache) {
401                 node = container_of(cache, struct ptr_node, cache);
402                 rec = node->data;
403                 if (mod && rec->refs > 1) {
404                         node->data = clone_inode_rec(rec);
405                         rec->refs--;
406                         rec = node->data;
407                 }
408         } else if (mod) {
409                 rec = calloc(1, sizeof(*rec));
410                 rec->ino = ino;
411                 rec->extent_start = (u64)-1;
412                 rec->first_extent_gap = (u64)-1;
413                 rec->refs = 1;
414                 INIT_LIST_HEAD(&rec->backrefs);
415
416                 node = malloc(sizeof(*node));
417                 node->cache.start = ino;
418                 node->cache.size = 1;
419                 node->data = rec;
420
421                 if (ino == BTRFS_FREE_INO_OBJECTID)
422                         rec->found_link = 1;
423
424                 ret = insert_cache_extent(inode_cache, &node->cache);
425                 BUG_ON(ret);
426         }
427         return rec;
428 }
429
430 static void free_inode_rec(struct inode_record *rec)
431 {
432         struct inode_backref *backref;
433
434         if (--rec->refs > 0)
435                 return;
436
437         while (!list_empty(&rec->backrefs)) {
438                 backref = list_entry(rec->backrefs.next,
439                                      struct inode_backref, list);
440                 list_del(&backref->list);
441                 free(backref);
442         }
443         free(rec);
444 }
445
446 static int can_free_inode_rec(struct inode_record *rec)
447 {
448         if (!rec->errors && rec->checked && rec->found_inode_item &&
449             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
450                 return 1;
451         return 0;
452 }
453
454 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
455                                  struct inode_record *rec)
456 {
457         struct cache_extent *cache;
458         struct inode_backref *tmp, *backref;
459         struct ptr_node *node;
460         unsigned char filetype;
461
462         if (!rec->found_inode_item)
463                 return;
464
465         filetype = imode_to_type(rec->imode);
466         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
467                 if (backref->found_dir_item && backref->found_dir_index) {
468                         if (backref->filetype != filetype)
469                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
470                         if (!backref->errors && backref->found_inode_ref) {
471                                 list_del(&backref->list);
472                                 free(backref);
473                         }
474                 }
475         }
476
477         if (!rec->checked || rec->merging)
478                 return;
479
480         if (S_ISDIR(rec->imode)) {
481                 if (rec->found_size != rec->isize)
482                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
483                 if (rec->found_file_extent)
484                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
485         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
486                 if (rec->found_dir_item)
487                         rec->errors |= I_ERR_ODD_DIR_ITEM;
488                 if (rec->found_size != rec->nbytes)
489                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
490                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
491                         rec->first_extent_gap = 0;
492                 if (rec->nlink > 0 && !no_holes &&
493                     (rec->extent_end < rec->isize ||
494                      rec->first_extent_gap < rec->isize))
495                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
496         }
497
498         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
499                 if (rec->found_csum_item && rec->nodatasum)
500                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
501                 if (rec->some_csum_missing && !rec->nodatasum)
502                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
503         }
504
505         BUG_ON(rec->refs != 1);
506         if (can_free_inode_rec(rec)) {
507                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
508                 node = container_of(cache, struct ptr_node, cache);
509                 BUG_ON(node->data != rec);
510                 remove_cache_extent(inode_cache, &node->cache);
511                 free(node);
512                 free_inode_rec(rec);
513         }
514 }
515
516 static int check_orphan_item(struct btrfs_root *root, u64 ino)
517 {
518         struct btrfs_path path;
519         struct btrfs_key key;
520         int ret;
521
522         key.objectid = BTRFS_ORPHAN_OBJECTID;
523         key.type = BTRFS_ORPHAN_ITEM_KEY;
524         key.offset = ino;
525
526         btrfs_init_path(&path);
527         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
528         btrfs_release_path(&path);
529         if (ret > 0)
530                 ret = -ENOENT;
531         return ret;
532 }
533
534 static int process_inode_item(struct extent_buffer *eb,
535                               int slot, struct btrfs_key *key,
536                               struct shared_node *active_node)
537 {
538         struct inode_record *rec;
539         struct btrfs_inode_item *item;
540
541         rec = active_node->current;
542         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
543         if (rec->found_inode_item) {
544                 rec->errors |= I_ERR_DUP_INODE_ITEM;
545                 return 1;
546         }
547         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
548         rec->nlink = btrfs_inode_nlink(eb, item);
549         rec->isize = btrfs_inode_size(eb, item);
550         rec->nbytes = btrfs_inode_nbytes(eb, item);
551         rec->imode = btrfs_inode_mode(eb, item);
552         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
553                 rec->nodatasum = 1;
554         rec->found_inode_item = 1;
555         if (rec->nlink == 0)
556                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
557         maybe_free_inode_rec(&active_node->inode_cache, rec);
558         return 0;
559 }
560
561 static struct inode_backref *get_inode_backref(struct inode_record *rec,
562                                                 const char *name,
563                                                 int namelen, u64 dir)
564 {
565         struct inode_backref *backref;
566
567         list_for_each_entry(backref, &rec->backrefs, list) {
568                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
569                         break;
570                 if (backref->dir != dir || backref->namelen != namelen)
571                         continue;
572                 if (memcmp(name, backref->name, namelen))
573                         continue;
574                 return backref;
575         }
576
577         backref = malloc(sizeof(*backref) + namelen + 1);
578         memset(backref, 0, sizeof(*backref));
579         backref->dir = dir;
580         backref->namelen = namelen;
581         memcpy(backref->name, name, namelen);
582         backref->name[namelen] = '\0';
583         list_add_tail(&backref->list, &rec->backrefs);
584         return backref;
585 }
586
587 static int add_inode_backref(struct cache_tree *inode_cache,
588                              u64 ino, u64 dir, u64 index,
589                              const char *name, int namelen,
590                              int filetype, int itemtype, int errors)
591 {
592         struct inode_record *rec;
593         struct inode_backref *backref;
594
595         rec = get_inode_rec(inode_cache, ino, 1);
596         backref = get_inode_backref(rec, name, namelen, dir);
597         if (errors)
598                 backref->errors |= errors;
599         if (itemtype == BTRFS_DIR_INDEX_KEY) {
600                 if (backref->found_dir_index)
601                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
602                 if (backref->found_inode_ref && backref->index != index)
603                         backref->errors |= REF_ERR_INDEX_UNMATCH;
604                 if (backref->found_dir_item && backref->filetype != filetype)
605                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
606
607                 backref->index = index;
608                 backref->filetype = filetype;
609                 backref->found_dir_index = 1;
610         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
611                 rec->found_link++;
612                 if (backref->found_dir_item)
613                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
614                 if (backref->found_dir_index && backref->filetype != filetype)
615                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
616
617                 backref->filetype = filetype;
618                 backref->found_dir_item = 1;
619         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
620                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
621                 if (backref->found_inode_ref)
622                         backref->errors |= REF_ERR_DUP_INODE_REF;
623                 if (backref->found_dir_index && backref->index != index)
624                         backref->errors |= REF_ERR_INDEX_UNMATCH;
625                 else
626                         backref->index = index;
627
628                 backref->ref_type = itemtype;
629                 backref->found_inode_ref = 1;
630         } else {
631                 BUG_ON(1);
632         }
633
634         maybe_free_inode_rec(inode_cache, rec);
635         return 0;
636 }
637
638 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
639                             struct cache_tree *dst_cache)
640 {
641         struct inode_backref *backref;
642         u32 dir_count = 0;
643
644         dst->merging = 1;
645         list_for_each_entry(backref, &src->backrefs, list) {
646                 if (backref->found_dir_index) {
647                         add_inode_backref(dst_cache, dst->ino, backref->dir,
648                                         backref->index, backref->name,
649                                         backref->namelen, backref->filetype,
650                                         BTRFS_DIR_INDEX_KEY, backref->errors);
651                 }
652                 if (backref->found_dir_item) {
653                         dir_count++;
654                         add_inode_backref(dst_cache, dst->ino,
655                                         backref->dir, 0, backref->name,
656                                         backref->namelen, backref->filetype,
657                                         BTRFS_DIR_ITEM_KEY, backref->errors);
658                 }
659                 if (backref->found_inode_ref) {
660                         add_inode_backref(dst_cache, dst->ino,
661                                         backref->dir, backref->index,
662                                         backref->name, backref->namelen, 0,
663                                         backref->ref_type, backref->errors);
664                 }
665         }
666
667         if (src->found_dir_item)
668                 dst->found_dir_item = 1;
669         if (src->found_file_extent)
670                 dst->found_file_extent = 1;
671         if (src->found_csum_item)
672                 dst->found_csum_item = 1;
673         if (src->some_csum_missing)
674                 dst->some_csum_missing = 1;
675         if (dst->first_extent_gap > src->first_extent_gap)
676                 dst->first_extent_gap = src->first_extent_gap;
677
678         BUG_ON(src->found_link < dir_count);
679         dst->found_link += src->found_link - dir_count;
680         dst->found_size += src->found_size;
681         if (src->extent_start != (u64)-1) {
682                 if (dst->extent_start == (u64)-1) {
683                         dst->extent_start = src->extent_start;
684                         dst->extent_end = src->extent_end;
685                 } else {
686                         if (dst->extent_end > src->extent_start)
687                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
688                         else if (dst->extent_end < src->extent_start &&
689                                  dst->extent_end < dst->first_extent_gap)
690                                 dst->first_extent_gap = dst->extent_end;
691                         if (dst->extent_end < src->extent_end)
692                                 dst->extent_end = src->extent_end;
693                 }
694         }
695
696         dst->errors |= src->errors;
697         if (src->found_inode_item) {
698                 if (!dst->found_inode_item) {
699                         dst->nlink = src->nlink;
700                         dst->isize = src->isize;
701                         dst->nbytes = src->nbytes;
702                         dst->imode = src->imode;
703                         dst->nodatasum = src->nodatasum;
704                         dst->found_inode_item = 1;
705                 } else {
706                         dst->errors |= I_ERR_DUP_INODE_ITEM;
707                 }
708         }
709         dst->merging = 0;
710
711         return 0;
712 }
713
714 static int splice_shared_node(struct shared_node *src_node,
715                               struct shared_node *dst_node)
716 {
717         struct cache_extent *cache;
718         struct ptr_node *node, *ins;
719         struct cache_tree *src, *dst;
720         struct inode_record *rec, *conflict;
721         u64 current_ino = 0;
722         int splice = 0;
723         int ret;
724
725         if (--src_node->refs == 0)
726                 splice = 1;
727         if (src_node->current)
728                 current_ino = src_node->current->ino;
729
730         src = &src_node->root_cache;
731         dst = &dst_node->root_cache;
732 again:
733         cache = search_cache_extent(src, 0);
734         while (cache) {
735                 node = container_of(cache, struct ptr_node, cache);
736                 rec = node->data;
737                 cache = next_cache_extent(cache);
738
739                 if (splice) {
740                         remove_cache_extent(src, &node->cache);
741                         ins = node;
742                 } else {
743                         ins = malloc(sizeof(*ins));
744                         ins->cache.start = node->cache.start;
745                         ins->cache.size = node->cache.size;
746                         ins->data = rec;
747                         rec->refs++;
748                 }
749                 ret = insert_cache_extent(dst, &ins->cache);
750                 if (ret == -EEXIST) {
751                         conflict = get_inode_rec(dst, rec->ino, 1);
752                         merge_inode_recs(rec, conflict, dst);
753                         if (rec->checked) {
754                                 conflict->checked = 1;
755                                 if (dst_node->current == conflict)
756                                         dst_node->current = NULL;
757                         }
758                         maybe_free_inode_rec(dst, conflict);
759                         free_inode_rec(rec);
760                         free(ins);
761                 } else {
762                         BUG_ON(ret);
763                 }
764         }
765
766         if (src == &src_node->root_cache) {
767                 src = &src_node->inode_cache;
768                 dst = &dst_node->inode_cache;
769                 goto again;
770         }
771
772         if (current_ino > 0 && (!dst_node->current ||
773             current_ino > dst_node->current->ino)) {
774                 if (dst_node->current) {
775                         dst_node->current->checked = 1;
776                         maybe_free_inode_rec(dst, dst_node->current);
777                 }
778                 dst_node->current = get_inode_rec(dst, current_ino, 1);
779         }
780         return 0;
781 }
782
783 static void free_inode_ptr(struct cache_extent *cache)
784 {
785         struct ptr_node *node;
786         struct inode_record *rec;
787
788         node = container_of(cache, struct ptr_node, cache);
789         rec = node->data;
790         free_inode_rec(rec);
791         free(node);
792 }
793
794 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
795
796 static struct shared_node *find_shared_node(struct cache_tree *shared,
797                                             u64 bytenr)
798 {
799         struct cache_extent *cache;
800         struct shared_node *node;
801
802         cache = lookup_cache_extent(shared, bytenr, 1);
803         if (cache) {
804                 node = container_of(cache, struct shared_node, cache);
805                 return node;
806         }
807         return NULL;
808 }
809
810 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
811 {
812         int ret;
813         struct shared_node *node;
814
815         node = calloc(1, sizeof(*node));
816         node->cache.start = bytenr;
817         node->cache.size = 1;
818         cache_tree_init(&node->root_cache);
819         cache_tree_init(&node->inode_cache);
820         node->refs = refs;
821
822         ret = insert_cache_extent(shared, &node->cache);
823         BUG_ON(ret);
824         return 0;
825 }
826
827 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
828                              struct walk_control *wc, int level)
829 {
830         struct shared_node *node;
831         struct shared_node *dest;
832
833         if (level == wc->active_node)
834                 return 0;
835
836         BUG_ON(wc->active_node <= level);
837         node = find_shared_node(&wc->shared, bytenr);
838         if (!node) {
839                 add_shared_node(&wc->shared, bytenr, refs);
840                 node = find_shared_node(&wc->shared, bytenr);
841                 wc->nodes[level] = node;
842                 wc->active_node = level;
843                 return 0;
844         }
845
846         if (wc->root_level == wc->active_node &&
847             btrfs_root_refs(&root->root_item) == 0) {
848                 if (--node->refs == 0) {
849                         free_inode_recs_tree(&node->root_cache);
850                         free_inode_recs_tree(&node->inode_cache);
851                         remove_cache_extent(&wc->shared, &node->cache);
852                         free(node);
853                 }
854                 return 1;
855         }
856
857         dest = wc->nodes[wc->active_node];
858         splice_shared_node(node, dest);
859         if (node->refs == 0) {
860                 remove_cache_extent(&wc->shared, &node->cache);
861                 free(node);
862         }
863         return 1;
864 }
865
866 static int leave_shared_node(struct btrfs_root *root,
867                              struct walk_control *wc, int level)
868 {
869         struct shared_node *node;
870         struct shared_node *dest;
871         int i;
872
873         if (level == wc->root_level)
874                 return 0;
875
876         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
877                 if (wc->nodes[i])
878                         break;
879         }
880         BUG_ON(i >= BTRFS_MAX_LEVEL);
881
882         node = wc->nodes[wc->active_node];
883         wc->nodes[wc->active_node] = NULL;
884         wc->active_node = i;
885
886         dest = wc->nodes[wc->active_node];
887         if (wc->active_node < wc->root_level ||
888             btrfs_root_refs(&root->root_item) > 0) {
889                 BUG_ON(node->refs <= 1);
890                 splice_shared_node(node, dest);
891         } else {
892                 BUG_ON(node->refs < 2);
893                 node->refs--;
894         }
895         return 0;
896 }
897
898 /*
899  * Returns:
900  * < 0 - on error
901  * 1   - if the root with id child_root_id is a child of root parent_root_id
902  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
903  *       has other root(s) as parent(s)
904  * 2   - if the root child_root_id doesn't have any parent roots
905  */
906 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
907                          u64 child_root_id)
908 {
909         struct btrfs_path path;
910         struct btrfs_key key;
911         struct extent_buffer *leaf;
912         int has_parent = 0;
913         int ret;
914
915         btrfs_init_path(&path);
916
917         key.objectid = parent_root_id;
918         key.type = BTRFS_ROOT_REF_KEY;
919         key.offset = child_root_id;
920         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
921                                 0, 0);
922         if (ret < 0)
923                 return ret;
924         btrfs_release_path(&path);
925         if (!ret)
926                 return 1;
927
928         key.objectid = child_root_id;
929         key.type = BTRFS_ROOT_BACKREF_KEY;
930         key.offset = 0;
931         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
932                                 0, 0);
933         if (ret < 0)
934                 goto out;
935
936         while (1) {
937                 leaf = path.nodes[0];
938                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
939                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
940                         if (ret)
941                                 break;
942                         leaf = path.nodes[0];
943                 }
944
945                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
946                 if (key.objectid != child_root_id ||
947                     key.type != BTRFS_ROOT_BACKREF_KEY)
948                         break;
949
950                 has_parent = 1;
951
952                 if (key.offset == parent_root_id) {
953                         btrfs_release_path(&path);
954                         return 1;
955                 }
956
957                 path.slots[0]++;
958         }
959 out:
960         btrfs_release_path(&path);
961         if (ret < 0)
962                 return ret;
963         return has_parent ? 0 : 2;
964 }
965
966 static int process_dir_item(struct btrfs_root *root,
967                             struct extent_buffer *eb,
968                             int slot, struct btrfs_key *key,
969                             struct shared_node *active_node)
970 {
971         u32 total;
972         u32 cur = 0;
973         u32 len;
974         u32 name_len;
975         u32 data_len;
976         int error;
977         int nritems = 0;
978         int filetype;
979         struct btrfs_dir_item *di;
980         struct inode_record *rec;
981         struct cache_tree *root_cache;
982         struct cache_tree *inode_cache;
983         struct btrfs_key location;
984         char namebuf[BTRFS_NAME_LEN];
985
986         root_cache = &active_node->root_cache;
987         inode_cache = &active_node->inode_cache;
988         rec = active_node->current;
989         rec->found_dir_item = 1;
990
991         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
992         total = btrfs_item_size_nr(eb, slot);
993         while (cur < total) {
994                 nritems++;
995                 btrfs_dir_item_key_to_cpu(eb, di, &location);
996                 name_len = btrfs_dir_name_len(eb, di);
997                 data_len = btrfs_dir_data_len(eb, di);
998                 filetype = btrfs_dir_type(eb, di);
999
1000                 rec->found_size += name_len;
1001                 if (name_len <= BTRFS_NAME_LEN) {
1002                         len = name_len;
1003                         error = 0;
1004                 } else {
1005                         len = BTRFS_NAME_LEN;
1006                         error = REF_ERR_NAME_TOO_LONG;
1007                 }
1008                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1009
1010                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1011                         add_inode_backref(inode_cache, location.objectid,
1012                                           key->objectid, key->offset, namebuf,
1013                                           len, filetype, key->type, error);
1014                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1015                         add_inode_backref(root_cache, location.objectid,
1016                                           key->objectid, key->offset,
1017                                           namebuf, len, filetype,
1018                                           key->type, error);
1019                 } else {
1020                         fprintf(stderr, "invalid location in dir item %u\n",
1021                                 location.type);
1022                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1023                                           key->objectid, key->offset, namebuf,
1024                                           len, filetype, key->type, error);
1025                 }
1026
1027                 len = sizeof(*di) + name_len + data_len;
1028                 di = (struct btrfs_dir_item *)((char *)di + len);
1029                 cur += len;
1030         }
1031         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1032                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1033
1034         return 0;
1035 }
1036
1037 static int process_inode_ref(struct extent_buffer *eb,
1038                              int slot, struct btrfs_key *key,
1039                              struct shared_node *active_node)
1040 {
1041         u32 total;
1042         u32 cur = 0;
1043         u32 len;
1044         u32 name_len;
1045         u64 index;
1046         int error;
1047         struct cache_tree *inode_cache;
1048         struct btrfs_inode_ref *ref;
1049         char namebuf[BTRFS_NAME_LEN];
1050
1051         inode_cache = &active_node->inode_cache;
1052
1053         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1054         total = btrfs_item_size_nr(eb, slot);
1055         while (cur < total) {
1056                 name_len = btrfs_inode_ref_name_len(eb, ref);
1057                 index = btrfs_inode_ref_index(eb, ref);
1058                 if (name_len <= BTRFS_NAME_LEN) {
1059                         len = name_len;
1060                         error = 0;
1061                 } else {
1062                         len = BTRFS_NAME_LEN;
1063                         error = REF_ERR_NAME_TOO_LONG;
1064                 }
1065                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1066                 add_inode_backref(inode_cache, key->objectid, key->offset,
1067                                   index, namebuf, len, 0, key->type, error);
1068
1069                 len = sizeof(*ref) + name_len;
1070                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1071                 cur += len;
1072         }
1073         return 0;
1074 }
1075
1076 static int process_inode_extref(struct extent_buffer *eb,
1077                                 int slot, struct btrfs_key *key,
1078                                 struct shared_node *active_node)
1079 {
1080         u32 total;
1081         u32 cur = 0;
1082         u32 len;
1083         u32 name_len;
1084         u64 index;
1085         u64 parent;
1086         int error;
1087         struct cache_tree *inode_cache;
1088         struct btrfs_inode_extref *extref;
1089         char namebuf[BTRFS_NAME_LEN];
1090
1091         inode_cache = &active_node->inode_cache;
1092
1093         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1094         total = btrfs_item_size_nr(eb, slot);
1095         while (cur < total) {
1096                 name_len = btrfs_inode_extref_name_len(eb, extref);
1097                 index = btrfs_inode_extref_index(eb, extref);
1098                 parent = btrfs_inode_extref_parent(eb, extref);
1099                 if (name_len <= BTRFS_NAME_LEN) {
1100                         len = name_len;
1101                         error = 0;
1102                 } else {
1103                         len = BTRFS_NAME_LEN;
1104                         error = REF_ERR_NAME_TOO_LONG;
1105                 }
1106                 read_extent_buffer(eb, namebuf,
1107                                    (unsigned long)(extref + 1), len);
1108                 add_inode_backref(inode_cache, key->objectid, parent,
1109                                   index, namebuf, len, 0, key->type, error);
1110
1111                 len = sizeof(*extref) + name_len;
1112                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1113                 cur += len;
1114         }
1115         return 0;
1116
1117 }
1118
1119 static int count_csum_range(struct btrfs_root *root, u64 start,
1120                             u64 len, u64 *found)
1121 {
1122         struct btrfs_key key;
1123         struct btrfs_path path;
1124         struct extent_buffer *leaf;
1125         int ret;
1126         size_t size;
1127         *found = 0;
1128         u64 csum_end;
1129         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1130
1131         btrfs_init_path(&path);
1132
1133         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1134         key.offset = start;
1135         key.type = BTRFS_EXTENT_CSUM_KEY;
1136
1137         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1138                                 &key, &path, 0, 0);
1139         if (ret < 0)
1140                 goto out;
1141         if (ret > 0 && path.slots[0] > 0) {
1142                 leaf = path.nodes[0];
1143                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1144                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1145                     key.type == BTRFS_EXTENT_CSUM_KEY)
1146                         path.slots[0]--;
1147         }
1148
1149         while (len > 0) {
1150                 leaf = path.nodes[0];
1151                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1152                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1153                         if (ret > 0)
1154                                 break;
1155                         else if (ret < 0)
1156                                 goto out;
1157                         leaf = path.nodes[0];
1158                 }
1159
1160                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1161                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1162                     key.type != BTRFS_EXTENT_CSUM_KEY)
1163                         break;
1164
1165                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1166                 if (key.offset >= start + len)
1167                         break;
1168
1169                 if (key.offset > start)
1170                         start = key.offset;
1171
1172                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1173                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1174                 if (csum_end > start) {
1175                         size = min(csum_end - start, len);
1176                         len -= size;
1177                         start += size;
1178                         *found += size;
1179                 }
1180
1181                 path.slots[0]++;
1182         }
1183 out:
1184         if (ret < 0)
1185                 return ret;
1186         btrfs_release_path(&path);
1187         return 0;
1188 }
1189
1190 static int process_file_extent(struct btrfs_root *root,
1191                                 struct extent_buffer *eb,
1192                                 int slot, struct btrfs_key *key,
1193                                 struct shared_node *active_node)
1194 {
1195         struct inode_record *rec;
1196         struct btrfs_file_extent_item *fi;
1197         u64 num_bytes = 0;
1198         u64 disk_bytenr = 0;
1199         u64 extent_offset = 0;
1200         u64 mask = root->sectorsize - 1;
1201         int extent_type;
1202         int ret;
1203
1204         rec = active_node->current;
1205         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1206         rec->found_file_extent = 1;
1207
1208         if (rec->extent_start == (u64)-1) {
1209                 rec->extent_start = key->offset;
1210                 rec->extent_end = key->offset;
1211         }
1212
1213         if (rec->extent_end > key->offset)
1214                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1215         else if (rec->extent_end < key->offset &&
1216                  rec->extent_end < rec->first_extent_gap)
1217                 rec->first_extent_gap = rec->extent_end;
1218
1219         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1220         extent_type = btrfs_file_extent_type(eb, fi);
1221
1222         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1223                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1224                 if (num_bytes == 0)
1225                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1226                 rec->found_size += num_bytes;
1227                 num_bytes = (num_bytes + mask) & ~mask;
1228         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1229                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1230                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1231                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1232                 extent_offset = btrfs_file_extent_offset(eb, fi);
1233                 if (num_bytes == 0 || (num_bytes & mask))
1234                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1235                 if (num_bytes + extent_offset >
1236                     btrfs_file_extent_ram_bytes(eb, fi))
1237                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1238                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1239                     (btrfs_file_extent_compression(eb, fi) ||
1240                      btrfs_file_extent_encryption(eb, fi) ||
1241                      btrfs_file_extent_other_encoding(eb, fi)))
1242                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1243                 if (disk_bytenr > 0)
1244                         rec->found_size += num_bytes;
1245         } else {
1246                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1247         }
1248         rec->extent_end = key->offset + num_bytes;
1249
1250         if (disk_bytenr > 0) {
1251                 u64 found;
1252                 if (btrfs_file_extent_compression(eb, fi))
1253                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1254                 else
1255                         disk_bytenr += extent_offset;
1256
1257                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1258                 if (ret < 0)
1259                         return ret;
1260                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1261                         if (found > 0)
1262                                 rec->found_csum_item = 1;
1263                         if (found < num_bytes)
1264                                 rec->some_csum_missing = 1;
1265                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1266                         if (found > 0)
1267                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1268                 }
1269         }
1270         return 0;
1271 }
1272
1273 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1274                             struct walk_control *wc)
1275 {
1276         struct btrfs_key key;
1277         u32 nritems;
1278         int i;
1279         int ret = 0;
1280         struct cache_tree *inode_cache;
1281         struct shared_node *active_node;
1282
1283         if (wc->root_level == wc->active_node &&
1284             btrfs_root_refs(&root->root_item) == 0)
1285                 return 0;
1286
1287         active_node = wc->nodes[wc->active_node];
1288         inode_cache = &active_node->inode_cache;
1289         nritems = btrfs_header_nritems(eb);
1290         for (i = 0; i < nritems; i++) {
1291                 btrfs_item_key_to_cpu(eb, &key, i);
1292
1293                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1294                         continue;
1295                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1296                         continue;
1297
1298                 if (active_node->current == NULL ||
1299                     active_node->current->ino < key.objectid) {
1300                         if (active_node->current) {
1301                                 active_node->current->checked = 1;
1302                                 maybe_free_inode_rec(inode_cache,
1303                                                      active_node->current);
1304                         }
1305                         active_node->current = get_inode_rec(inode_cache,
1306                                                              key.objectid, 1);
1307                 }
1308                 switch (key.type) {
1309                 case BTRFS_DIR_ITEM_KEY:
1310                 case BTRFS_DIR_INDEX_KEY:
1311                         ret = process_dir_item(root, eb, i, &key, active_node);
1312                         break;
1313                 case BTRFS_INODE_REF_KEY:
1314                         ret = process_inode_ref(eb, i, &key, active_node);
1315                         break;
1316                 case BTRFS_INODE_EXTREF_KEY:
1317                         ret = process_inode_extref(eb, i, &key, active_node);
1318                         break;
1319                 case BTRFS_INODE_ITEM_KEY:
1320                         ret = process_inode_item(eb, i, &key, active_node);
1321                         break;
1322                 case BTRFS_EXTENT_DATA_KEY:
1323                         ret = process_file_extent(root, eb, i, &key,
1324                                                   active_node);
1325                         break;
1326                 default:
1327                         break;
1328                 };
1329         }
1330         return ret;
1331 }
1332
1333 static void reada_walk_down(struct btrfs_root *root,
1334                             struct extent_buffer *node, int slot)
1335 {
1336         u64 bytenr;
1337         u64 ptr_gen;
1338         u32 nritems;
1339         u32 blocksize;
1340         int i;
1341         int level;
1342
1343         level = btrfs_header_level(node);
1344         if (level != 1)
1345                 return;
1346
1347         nritems = btrfs_header_nritems(node);
1348         blocksize = btrfs_level_size(root, level - 1);
1349         for (i = slot; i < nritems; i++) {
1350                 bytenr = btrfs_node_blockptr(node, i);
1351                 ptr_gen = btrfs_node_ptr_generation(node, i);
1352                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1353         }
1354 }
1355
1356 /*
1357  * Check the child node/leaf by the following condition:
1358  * 1. the first item key of the node/leaf should be the same with the one
1359  *    in parent.
1360  * 2. block in parent node should match the child node/leaf.
1361  * 3. generation of parent node and child's header should be consistent.
1362  *
1363  * Or the child node/leaf pointed by the key in parent is not valid.
1364  *
1365  * We hope to check leaf owner too, but since subvol may share leaves,
1366  * which makes leaf owner check not so strong, key check should be
1367  * sufficient enough for that case.
1368  */
1369 static int check_child_node(struct btrfs_root *root,
1370                             struct extent_buffer *parent, int slot,
1371                             struct extent_buffer *child)
1372 {
1373         struct btrfs_key parent_key;
1374         struct btrfs_key child_key;
1375         int ret = 0;
1376
1377         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1378         if (btrfs_header_level(child) == 0)
1379                 btrfs_item_key_to_cpu(child, &child_key, 0);
1380         else
1381                 btrfs_node_key_to_cpu(child, &child_key, 0);
1382
1383         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1384                 ret = -EINVAL;
1385                 fprintf(stderr,
1386                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1387                         parent_key.objectid, parent_key.type, parent_key.offset,
1388                         child_key.objectid, child_key.type, child_key.offset);
1389         }
1390         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1391                 ret = -EINVAL;
1392                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1393                         btrfs_node_blockptr(parent, slot),
1394                         btrfs_header_bytenr(child));
1395         }
1396         if (btrfs_node_ptr_generation(parent, slot) !=
1397             btrfs_header_generation(child)) {
1398                 ret = -EINVAL;
1399                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1400                         btrfs_header_generation(child),
1401                         btrfs_node_ptr_generation(parent, slot));
1402         }
1403         return ret;
1404 }
1405
1406 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1407                           struct walk_control *wc, int *level)
1408 {
1409         enum btrfs_tree_block_status status;
1410         u64 bytenr;
1411         u64 ptr_gen;
1412         struct extent_buffer *next;
1413         struct extent_buffer *cur;
1414         u32 blocksize;
1415         int ret, err = 0;
1416         u64 refs;
1417
1418         WARN_ON(*level < 0);
1419         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1420         ret = btrfs_lookup_extent_info(NULL, root,
1421                                        path->nodes[*level]->start,
1422                                        *level, 1, &refs, NULL);
1423         if (ret < 0) {
1424                 err = ret;
1425                 goto out;
1426         }
1427
1428         if (refs > 1) {
1429                 ret = enter_shared_node(root, path->nodes[*level]->start,
1430                                         refs, wc, *level);
1431                 if (ret > 0) {
1432                         err = ret;
1433                         goto out;
1434                 }
1435         }
1436
1437         while (*level >= 0) {
1438                 WARN_ON(*level < 0);
1439                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1440                 cur = path->nodes[*level];
1441
1442                 if (btrfs_header_level(cur) != *level)
1443                         WARN_ON(1);
1444
1445                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1446                         break;
1447                 if (*level == 0) {
1448                         ret = process_one_leaf(root, cur, wc);
1449                         if (ret < 0)
1450                                 err = ret;
1451                         break;
1452                 }
1453                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1454                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1455                 blocksize = btrfs_level_size(root, *level - 1);
1456                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1457                                                1, &refs, NULL);
1458                 if (ret < 0)
1459                         refs = 0;
1460
1461                 if (refs > 1) {
1462                         ret = enter_shared_node(root, bytenr, refs,
1463                                                 wc, *level - 1);
1464                         if (ret > 0) {
1465                                 path->slots[*level]++;
1466                                 continue;
1467                         }
1468                 }
1469
1470                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1471                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1472                         free_extent_buffer(next);
1473                         reada_walk_down(root, cur, path->slots[*level]);
1474                         next = read_tree_block(root, bytenr, blocksize,
1475                                                ptr_gen);
1476                         if (!next) {
1477                                 err = -EIO;
1478                                 goto out;
1479                         }
1480                 }
1481
1482                 ret = check_child_node(root, cur, path->slots[*level], next);
1483                 if (ret) {
1484                         err = ret;
1485                         goto out;
1486                 }
1487
1488                 if (btrfs_is_leaf(next))
1489                         status = btrfs_check_leaf(root, NULL, next);
1490                 else
1491                         status = btrfs_check_node(root, NULL, next);
1492                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1493                         free_extent_buffer(next);
1494                         err = -EIO;
1495                         goto out;
1496                 }
1497
1498                 *level = *level - 1;
1499                 free_extent_buffer(path->nodes[*level]);
1500                 path->nodes[*level] = next;
1501                 path->slots[*level] = 0;
1502         }
1503 out:
1504         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1505         return err;
1506 }
1507
1508 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1509                         struct walk_control *wc, int *level)
1510 {
1511         int i;
1512         struct extent_buffer *leaf;
1513
1514         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1515                 leaf = path->nodes[i];
1516                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1517                         path->slots[i]++;
1518                         *level = i;
1519                         return 0;
1520                 } else {
1521                         free_extent_buffer(path->nodes[*level]);
1522                         path->nodes[*level] = NULL;
1523                         BUG_ON(*level > wc->active_node);
1524                         if (*level == wc->active_node)
1525                                 leave_shared_node(root, wc, *level);
1526                         *level = i + 1;
1527                 }
1528         }
1529         return 1;
1530 }
1531
1532 static int check_root_dir(struct inode_record *rec)
1533 {
1534         struct inode_backref *backref;
1535         int ret = -1;
1536
1537         if (!rec->found_inode_item || rec->errors)
1538                 goto out;
1539         if (rec->nlink != 1 || rec->found_link != 0)
1540                 goto out;
1541         if (list_empty(&rec->backrefs))
1542                 goto out;
1543         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1544         if (!backref->found_inode_ref)
1545                 goto out;
1546         if (backref->index != 0 || backref->namelen != 2 ||
1547             memcmp(backref->name, "..", 2))
1548                 goto out;
1549         if (backref->found_dir_index || backref->found_dir_item)
1550                 goto out;
1551         ret = 0;
1552 out:
1553         return ret;
1554 }
1555
1556 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1557                               struct btrfs_root *root, struct btrfs_path *path,
1558                               struct inode_record *rec)
1559 {
1560         struct btrfs_inode_item *ei;
1561         struct btrfs_key key;
1562         int ret;
1563
1564         key.objectid = rec->ino;
1565         key.type = BTRFS_INODE_ITEM_KEY;
1566         key.offset = (u64)-1;
1567
1568         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1569         if (ret < 0)
1570                 goto out;
1571         if (ret) {
1572                 if (!path->slots[0]) {
1573                         ret = -ENOENT;
1574                         goto out;
1575                 }
1576                 path->slots[0]--;
1577                 ret = 0;
1578         }
1579         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1580         if (key.objectid != rec->ino) {
1581                 ret = -ENOENT;
1582                 goto out;
1583         }
1584
1585         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1586                             struct btrfs_inode_item);
1587         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1588         btrfs_mark_buffer_dirty(path->nodes[0]);
1589         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1590         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1591                root->root_key.objectid);
1592 out:
1593         btrfs_release_path(path);
1594         return ret;
1595 }
1596
1597 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1598                                     struct btrfs_root *root,
1599                                     struct btrfs_path *path,
1600                                     struct inode_record *rec)
1601 {
1602         struct btrfs_key key;
1603         int ret;
1604
1605         key.objectid = BTRFS_ORPHAN_OBJECTID;
1606         key.type = BTRFS_ORPHAN_ITEM_KEY;
1607         key.offset = rec->ino;
1608
1609         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1610         btrfs_release_path(path);
1611         if (!ret)
1612                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1613         return ret;
1614 }
1615
1616 static int add_missing_dir_index(struct btrfs_root *root,
1617                                  struct cache_tree *inode_cache,
1618                                  struct inode_record *rec,
1619                                  struct inode_backref *backref)
1620 {
1621         struct btrfs_path *path;
1622         struct btrfs_trans_handle *trans;
1623         struct btrfs_dir_item *dir_item;
1624         struct extent_buffer *leaf;
1625         struct btrfs_key key;
1626         struct btrfs_disk_key disk_key;
1627         struct inode_record *dir_rec;
1628         unsigned long name_ptr;
1629         u32 data_size = sizeof(*dir_item) + backref->namelen;
1630         int ret;
1631
1632         path = btrfs_alloc_path();
1633         if (!path)
1634                 return -ENOMEM;
1635
1636         trans = btrfs_start_transaction(root, 1);
1637         if (IS_ERR(trans)) {
1638                 btrfs_free_path(path);
1639                 return PTR_ERR(trans);
1640         }
1641
1642         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1643                 (unsigned long long)rec->ino);
1644         key.objectid = backref->dir;
1645         key.type = BTRFS_DIR_INDEX_KEY;
1646         key.offset = backref->index;
1647
1648         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
1649         BUG_ON(ret);
1650
1651         leaf = path->nodes[0];
1652         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
1653
1654         disk_key.objectid = cpu_to_le64(rec->ino);
1655         disk_key.type = BTRFS_INODE_ITEM_KEY;
1656         disk_key.offset = 0;
1657
1658         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1659         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1660         btrfs_set_dir_data_len(leaf, dir_item, 0);
1661         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1662         name_ptr = (unsigned long)(dir_item + 1);
1663         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1664         btrfs_mark_buffer_dirty(leaf);
1665         btrfs_free_path(path);
1666         btrfs_commit_transaction(trans, root);
1667
1668         backref->found_dir_index = 1;
1669         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1670         if (!dir_rec)
1671                 return 0;
1672         dir_rec->found_size += backref->namelen;
1673         if (dir_rec->found_size == dir_rec->isize &&
1674             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1675                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1676         if (dir_rec->found_size != dir_rec->isize)
1677                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1678
1679         return 0;
1680 }
1681
1682 static int delete_dir_index(struct btrfs_root *root,
1683                             struct cache_tree *inode_cache,
1684                             struct inode_record *rec,
1685                             struct inode_backref *backref)
1686 {
1687         struct btrfs_trans_handle *trans;
1688         struct btrfs_dir_item *di;
1689         struct btrfs_path *path;
1690         int ret = 0;
1691
1692         path = btrfs_alloc_path();
1693         if (!path)
1694                 return -ENOMEM;
1695
1696         trans = btrfs_start_transaction(root, 1);
1697         if (IS_ERR(trans)) {
1698                 btrfs_free_path(path);
1699                 return PTR_ERR(trans);
1700         }
1701
1702
1703         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1704                 (unsigned long long)backref->dir,
1705                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1706                 (unsigned long long)root->objectid);
1707
1708         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
1709                                     backref->name, backref->namelen,
1710                                     backref->index, -1);
1711         if (IS_ERR(di)) {
1712                 ret = PTR_ERR(di);
1713                 btrfs_free_path(path);
1714                 btrfs_commit_transaction(trans, root);
1715                 if (ret == -ENOENT)
1716                         return 0;
1717                 return ret;
1718         }
1719
1720         if (!di)
1721                 ret = btrfs_del_item(trans, root, path);
1722         else
1723                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
1724         BUG_ON(ret);
1725         btrfs_free_path(path);
1726         btrfs_commit_transaction(trans, root);
1727         return ret;
1728 }
1729
1730 static int repair_inode_backrefs(struct btrfs_root *root,
1731                                  struct inode_record *rec,
1732                                  struct cache_tree *inode_cache,
1733                                  int delete)
1734 {
1735         struct inode_backref *tmp, *backref;
1736         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1737         int ret = 0;
1738         int repaired = 0;
1739
1740         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1741                 /* Index 0 for root dir's are special, don't mess with it */
1742                 if (rec->ino == root_dirid && backref->index == 0)
1743                         continue;
1744
1745                 if (delete &&
1746                     ((backref->found_dir_index && !backref->found_inode_ref) ||
1747                      (backref->found_dir_index && backref->found_inode_ref &&
1748                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
1749                         ret = delete_dir_index(root, inode_cache, rec, backref);
1750                         if (ret)
1751                                 break;
1752                         repaired++;
1753                         list_del(&backref->list);
1754                         free(backref);
1755                 }
1756
1757                 if (!delete && !backref->found_dir_index &&
1758                     backref->found_dir_item && backref->found_inode_ref) {
1759                         ret = add_missing_dir_index(root, inode_cache, rec,
1760                                                     backref);
1761                         if (ret)
1762                                 break;
1763                         repaired++;
1764                         if (backref->found_dir_item &&
1765                             backref->found_dir_index &&
1766                             backref->found_dir_index) {
1767                                 if (!backref->errors &&
1768                                     backref->found_inode_ref) {
1769                                         list_del(&backref->list);
1770                                         free(backref);
1771                                 }
1772                         }
1773                 }
1774
1775         }
1776         return ret ? ret : repaired;
1777 }
1778
1779 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1780 {
1781         struct btrfs_trans_handle *trans;
1782         struct btrfs_path *path;
1783         int ret = 0;
1784
1785         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1786                 return rec->errors;
1787
1788         path = btrfs_alloc_path();
1789         if (!path)
1790                 return -ENOMEM;
1791
1792         trans = btrfs_start_transaction(root, 1);
1793         if (IS_ERR(trans)) {
1794                 btrfs_free_path(path);
1795                 return PTR_ERR(trans);
1796         }
1797
1798         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1799                 ret = repair_inode_isize(trans, root, path, rec);
1800         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1801                 ret = repair_inode_orphan_item(trans, root, path, rec);
1802         btrfs_commit_transaction(trans, root);
1803         btrfs_free_path(path);
1804         return ret;
1805 }
1806
1807 static int check_inode_recs(struct btrfs_root *root,
1808                             struct cache_tree *inode_cache)
1809 {
1810         struct cache_extent *cache;
1811         struct ptr_node *node;
1812         struct inode_record *rec;
1813         struct inode_backref *backref;
1814         int stage = 0;
1815         int ret;
1816         int err = 0;
1817         u64 error = 0;
1818         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1819
1820         if (btrfs_root_refs(&root->root_item) == 0) {
1821                 if (!cache_tree_empty(inode_cache))
1822                         fprintf(stderr, "warning line %d\n", __LINE__);
1823                 return 0;
1824         }
1825
1826         /*
1827          * We need to repair backrefs first because we could change some of the
1828          * errors in the inode recs.
1829          *
1830          * We also need to go through and delete invalid backrefs first and then
1831          * add the correct ones second.  We do this because we may get EEXIST
1832          * when adding back the correct index because we hadn't yet deleted the
1833          * invalid index.
1834          *
1835          * For example, if we were missing a dir index then the directories
1836          * isize would be wrong, so if we fixed the isize to what we thought it
1837          * would be and then fixed the backref we'd still have a invalid fs, so
1838          * we need to add back the dir index and then check to see if the isize
1839          * is still wrong.
1840          */
1841         while (stage < 3) {
1842                 stage++;
1843                 if (stage == 3 && !err)
1844                         break;
1845
1846                 cache = search_cache_extent(inode_cache, 0);
1847                 while (repair && cache) {
1848                         node = container_of(cache, struct ptr_node, cache);
1849                         rec = node->data;
1850                         cache = next_cache_extent(cache);
1851
1852                         /* Need to free everything up and rescan */
1853                         if (stage == 3) {
1854                                 remove_cache_extent(inode_cache, &node->cache);
1855                                 free(node);
1856                                 free_inode_rec(rec);
1857                                 continue;
1858                         }
1859
1860                         if (list_empty(&rec->backrefs))
1861                                 continue;
1862
1863                         ret = repair_inode_backrefs(root, rec, inode_cache,
1864                                                     stage == 1);
1865                         if (ret < 0) {
1866                                 err = ret;
1867                                 stage = 2;
1868                                 break;
1869                         } if (ret > 0) {
1870                                 err = -EAGAIN;
1871                         }
1872                 }
1873         }
1874         if (err)
1875                 return err;
1876
1877         rec = get_inode_rec(inode_cache, root_dirid, 0);
1878         if (rec) {
1879                 ret = check_root_dir(rec);
1880                 if (ret) {
1881                         fprintf(stderr, "root %llu root dir %llu error\n",
1882                                 (unsigned long long)root->root_key.objectid,
1883                                 (unsigned long long)root_dirid);
1884                         error++;
1885                 }
1886         } else {
1887                 fprintf(stderr, "root %llu root dir %llu not found\n",
1888                         (unsigned long long)root->root_key.objectid,
1889                         (unsigned long long)root_dirid);
1890         }
1891
1892         while (1) {
1893                 cache = search_cache_extent(inode_cache, 0);
1894                 if (!cache)
1895                         break;
1896                 node = container_of(cache, struct ptr_node, cache);
1897                 rec = node->data;
1898                 remove_cache_extent(inode_cache, &node->cache);
1899                 free(node);
1900                 if (rec->ino == root_dirid ||
1901                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1902                         free_inode_rec(rec);
1903                         continue;
1904                 }
1905
1906                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1907                         ret = check_orphan_item(root, rec->ino);
1908                         if (ret == 0)
1909                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1910                         if (can_free_inode_rec(rec)) {
1911                                 free_inode_rec(rec);
1912                                 continue;
1913                         }
1914                 }
1915
1916                 if (repair) {
1917                         ret = try_repair_inode(root, rec);
1918                         if (ret == 0 && can_free_inode_rec(rec)) {
1919                                 free_inode_rec(rec);
1920                                 continue;
1921                         }
1922                         ret = 0;
1923                 }
1924
1925                 error++;
1926                 if (!rec->found_inode_item)
1927                         rec->errors |= I_ERR_NO_INODE_ITEM;
1928                 if (rec->found_link != rec->nlink)
1929                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1930                 print_inode_error(root, rec);
1931                 list_for_each_entry(backref, &rec->backrefs, list) {
1932                         if (!backref->found_dir_item)
1933                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1934                         if (!backref->found_dir_index)
1935                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1936                         if (!backref->found_inode_ref)
1937                                 backref->errors |= REF_ERR_NO_INODE_REF;
1938                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1939                                 " namelen %u name %s filetype %d errors %x",
1940                                 (unsigned long long)backref->dir,
1941                                 (unsigned long long)backref->index,
1942                                 backref->namelen, backref->name,
1943                                 backref->filetype, backref->errors);
1944                         print_ref_error(backref->errors);
1945                 }
1946                 free_inode_rec(rec);
1947         }
1948         return (error > 0) ? -1 : 0;
1949 }
1950
1951 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1952                                         u64 objectid)
1953 {
1954         struct cache_extent *cache;
1955         struct root_record *rec = NULL;
1956         int ret;
1957
1958         cache = lookup_cache_extent(root_cache, objectid, 1);
1959         if (cache) {
1960                 rec = container_of(cache, struct root_record, cache);
1961         } else {
1962                 rec = calloc(1, sizeof(*rec));
1963                 rec->objectid = objectid;
1964                 INIT_LIST_HEAD(&rec->backrefs);
1965                 rec->cache.start = objectid;
1966                 rec->cache.size = 1;
1967
1968                 ret = insert_cache_extent(root_cache, &rec->cache);
1969                 BUG_ON(ret);
1970         }
1971         return rec;
1972 }
1973
1974 static struct root_backref *get_root_backref(struct root_record *rec,
1975                                              u64 ref_root, u64 dir, u64 index,
1976                                              const char *name, int namelen)
1977 {
1978         struct root_backref *backref;
1979
1980         list_for_each_entry(backref, &rec->backrefs, list) {
1981                 if (backref->ref_root != ref_root || backref->dir != dir ||
1982                     backref->namelen != namelen)
1983                         continue;
1984                 if (memcmp(name, backref->name, namelen))
1985                         continue;
1986                 return backref;
1987         }
1988
1989         backref = malloc(sizeof(*backref) + namelen + 1);
1990         memset(backref, 0, sizeof(*backref));
1991         backref->ref_root = ref_root;
1992         backref->dir = dir;
1993         backref->index = index;
1994         backref->namelen = namelen;
1995         memcpy(backref->name, name, namelen);
1996         backref->name[namelen] = '\0';
1997         list_add_tail(&backref->list, &rec->backrefs);
1998         return backref;
1999 }
2000
2001 static void free_root_record(struct cache_extent *cache)
2002 {
2003         struct root_record *rec;
2004         struct root_backref *backref;
2005
2006         rec = container_of(cache, struct root_record, cache);
2007         while (!list_empty(&rec->backrefs)) {
2008                 backref = list_entry(rec->backrefs.next,
2009                                      struct root_backref, list);
2010                 list_del(&backref->list);
2011                 free(backref);
2012         }
2013
2014         kfree(rec);
2015 }
2016
2017 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2018
2019 static int add_root_backref(struct cache_tree *root_cache,
2020                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2021                             const char *name, int namelen,
2022                             int item_type, int errors)
2023 {
2024         struct root_record *rec;
2025         struct root_backref *backref;
2026
2027         rec = get_root_rec(root_cache, root_id);
2028         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2029
2030         backref->errors |= errors;
2031
2032         if (item_type != BTRFS_DIR_ITEM_KEY) {
2033                 if (backref->found_dir_index || backref->found_back_ref ||
2034                     backref->found_forward_ref) {
2035                         if (backref->index != index)
2036                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2037                 } else {
2038                         backref->index = index;
2039                 }
2040         }
2041
2042         if (item_type == BTRFS_DIR_ITEM_KEY) {
2043                 if (backref->found_forward_ref)
2044                         rec->found_ref++;
2045                 backref->found_dir_item = 1;
2046         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2047                 backref->found_dir_index = 1;
2048         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2049                 if (backref->found_forward_ref)
2050                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2051                 else if (backref->found_dir_item)
2052                         rec->found_ref++;
2053                 backref->found_forward_ref = 1;
2054         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2055                 if (backref->found_back_ref)
2056                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2057                 backref->found_back_ref = 1;
2058         } else {
2059                 BUG_ON(1);
2060         }
2061
2062         if (backref->found_forward_ref && backref->found_dir_item)
2063                 backref->reachable = 1;
2064         return 0;
2065 }
2066
2067 static int merge_root_recs(struct btrfs_root *root,
2068                            struct cache_tree *src_cache,
2069                            struct cache_tree *dst_cache)
2070 {
2071         struct cache_extent *cache;
2072         struct ptr_node *node;
2073         struct inode_record *rec;
2074         struct inode_backref *backref;
2075         int ret = 0;
2076
2077         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2078                 free_inode_recs_tree(src_cache);
2079                 return 0;
2080         }
2081
2082         while (1) {
2083                 cache = search_cache_extent(src_cache, 0);
2084                 if (!cache)
2085                         break;
2086                 node = container_of(cache, struct ptr_node, cache);
2087                 rec = node->data;
2088                 remove_cache_extent(src_cache, &node->cache);
2089                 free(node);
2090
2091                 ret = is_child_root(root, root->objectid, rec->ino);
2092                 if (ret < 0)
2093                         break;
2094                 else if (ret == 0)
2095                         goto skip;
2096
2097                 list_for_each_entry(backref, &rec->backrefs, list) {
2098                         BUG_ON(backref->found_inode_ref);
2099                         if (backref->found_dir_item)
2100                                 add_root_backref(dst_cache, rec->ino,
2101                                         root->root_key.objectid, backref->dir,
2102                                         backref->index, backref->name,
2103                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2104                                         backref->errors);
2105                         if (backref->found_dir_index)
2106                                 add_root_backref(dst_cache, rec->ino,
2107                                         root->root_key.objectid, backref->dir,
2108                                         backref->index, backref->name,
2109                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2110                                         backref->errors);
2111                 }
2112 skip:
2113                 free_inode_rec(rec);
2114         }
2115         if (ret < 0)
2116                 return ret;
2117         return 0;
2118 }
2119
2120 static int check_root_refs(struct btrfs_root *root,
2121                            struct cache_tree *root_cache)
2122 {
2123         struct root_record *rec;
2124         struct root_record *ref_root;
2125         struct root_backref *backref;
2126         struct cache_extent *cache;
2127         int loop = 1;
2128         int ret;
2129         int error;
2130         int errors = 0;
2131
2132         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2133         rec->found_ref = 1;
2134
2135         /* fixme: this can not detect circular references */
2136         while (loop) {
2137                 loop = 0;
2138                 cache = search_cache_extent(root_cache, 0);
2139                 while (1) {
2140                         if (!cache)
2141                                 break;
2142                         rec = container_of(cache, struct root_record, cache);
2143                         cache = next_cache_extent(cache);
2144
2145                         if (rec->found_ref == 0)
2146                                 continue;
2147
2148                         list_for_each_entry(backref, &rec->backrefs, list) {
2149                                 if (!backref->reachable)
2150                                         continue;
2151
2152                                 ref_root = get_root_rec(root_cache,
2153                                                         backref->ref_root);
2154                                 if (ref_root->found_ref > 0)
2155                                         continue;
2156
2157                                 backref->reachable = 0;
2158                                 rec->found_ref--;
2159                                 if (rec->found_ref == 0)
2160                                         loop = 1;
2161                         }
2162                 }
2163         }
2164
2165         cache = search_cache_extent(root_cache, 0);
2166         while (1) {
2167                 if (!cache)
2168                         break;
2169                 rec = container_of(cache, struct root_record, cache);
2170                 cache = next_cache_extent(cache);
2171
2172                 if (rec->found_ref == 0 &&
2173                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2174                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2175                         ret = check_orphan_item(root->fs_info->tree_root,
2176                                                 rec->objectid);
2177                         if (ret == 0)
2178                                 continue;
2179
2180                         /*
2181                          * If we don't have a root item then we likely just have
2182                          * a dir item in a snapshot for this root but no actual
2183                          * ref key or anything so it's meaningless.
2184                          */
2185                         if (!rec->found_root_item)
2186                                 continue;
2187                         errors++;
2188                         fprintf(stderr, "fs tree %llu not referenced\n",
2189                                 (unsigned long long)rec->objectid);
2190                 }
2191
2192                 error = 0;
2193                 if (rec->found_ref > 0 && !rec->found_root_item)
2194                         error = 1;
2195                 list_for_each_entry(backref, &rec->backrefs, list) {
2196                         if (!backref->found_dir_item)
2197                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2198                         if (!backref->found_dir_index)
2199                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2200                         if (!backref->found_back_ref)
2201                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2202                         if (!backref->found_forward_ref)
2203                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2204                         if (backref->reachable && backref->errors)
2205                                 error = 1;
2206                 }
2207                 if (!error)
2208                         continue;
2209
2210                 errors++;
2211                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2212                         (unsigned long long)rec->objectid, rec->found_ref,
2213                          rec->found_root_item ? "" : "not found");
2214
2215                 list_for_each_entry(backref, &rec->backrefs, list) {
2216                         if (!backref->reachable)
2217                                 continue;
2218                         if (!backref->errors && rec->found_root_item)
2219                                 continue;
2220                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2221                                 " index %llu namelen %u name %s errors %x\n",
2222                                 (unsigned long long)backref->ref_root,
2223                                 (unsigned long long)backref->dir,
2224                                 (unsigned long long)backref->index,
2225                                 backref->namelen, backref->name,
2226                                 backref->errors);
2227                         print_ref_error(backref->errors);
2228                 }
2229         }
2230         return errors > 0 ? 1 : 0;
2231 }
2232
2233 static int process_root_ref(struct extent_buffer *eb, int slot,
2234                             struct btrfs_key *key,
2235                             struct cache_tree *root_cache)
2236 {
2237         u64 dirid;
2238         u64 index;
2239         u32 len;
2240         u32 name_len;
2241         struct btrfs_root_ref *ref;
2242         char namebuf[BTRFS_NAME_LEN];
2243         int error;
2244
2245         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
2246
2247         dirid = btrfs_root_ref_dirid(eb, ref);
2248         index = btrfs_root_ref_sequence(eb, ref);
2249         name_len = btrfs_root_ref_name_len(eb, ref);
2250
2251         if (name_len <= BTRFS_NAME_LEN) {
2252                 len = name_len;
2253                 error = 0;
2254         } else {
2255                 len = BTRFS_NAME_LEN;
2256                 error = REF_ERR_NAME_TOO_LONG;
2257         }
2258         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
2259
2260         if (key->type == BTRFS_ROOT_REF_KEY) {
2261                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
2262                                  index, namebuf, len, key->type, error);
2263         } else {
2264                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
2265                                  index, namebuf, len, key->type, error);
2266         }
2267         return 0;
2268 }
2269
2270 static int check_fs_root(struct btrfs_root *root,
2271                          struct cache_tree *root_cache,
2272                          struct walk_control *wc)
2273 {
2274         int ret = 0;
2275         int err = 0;
2276         int wret;
2277         int level;
2278         struct btrfs_path path;
2279         struct shared_node root_node;
2280         struct root_record *rec;
2281         struct btrfs_root_item *root_item = &root->root_item;
2282         enum btrfs_tree_block_status status;
2283
2284         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2285                 rec = get_root_rec(root_cache, root->root_key.objectid);
2286                 if (btrfs_root_refs(root_item) > 0)
2287                         rec->found_root_item = 1;
2288         }
2289
2290         btrfs_init_path(&path);
2291         memset(&root_node, 0, sizeof(root_node));
2292         cache_tree_init(&root_node.root_cache);
2293         cache_tree_init(&root_node.inode_cache);
2294
2295         level = btrfs_header_level(root->node);
2296         memset(wc->nodes, 0, sizeof(wc->nodes));
2297         wc->nodes[level] = &root_node;
2298         wc->active_node = level;
2299         wc->root_level = level;
2300
2301         /* We may not have checked the root block, lets do that now */
2302         if (btrfs_is_leaf(root->node))
2303                 status = btrfs_check_leaf(root, NULL, root->node);
2304         else
2305                 status = btrfs_check_node(root, NULL, root->node);
2306         if (status != BTRFS_TREE_BLOCK_CLEAN)
2307                 return -EIO;
2308
2309         if (btrfs_root_refs(root_item) > 0 ||
2310             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2311                 path.nodes[level] = root->node;
2312                 extent_buffer_get(root->node);
2313                 path.slots[level] = 0;
2314         } else {
2315                 struct btrfs_key key;
2316                 struct btrfs_disk_key found_key;
2317
2318                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2319                 level = root_item->drop_level;
2320                 path.lowest_level = level;
2321                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2322                 if (wret < 0)
2323                         goto skip_walking;
2324                 btrfs_node_key(path.nodes[level], &found_key,
2325                                 path.slots[level]);
2326                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2327                                         sizeof(found_key)));
2328         }
2329
2330         while (1) {
2331                 wret = walk_down_tree(root, &path, wc, &level);
2332                 if (wret < 0)
2333                         ret = wret;
2334                 if (wret != 0)
2335                         break;
2336
2337                 wret = walk_up_tree(root, &path, wc, &level);
2338                 if (wret < 0)
2339                         ret = wret;
2340                 if (wret != 0)
2341                         break;
2342         }
2343 skip_walking:
2344         btrfs_release_path(&path);
2345
2346         err = merge_root_recs(root, &root_node.root_cache, root_cache);
2347         if (err < 0)
2348                 ret = err;
2349
2350         if (root_node.current) {
2351                 root_node.current->checked = 1;
2352                 maybe_free_inode_rec(&root_node.inode_cache,
2353                                 root_node.current);
2354         }
2355
2356         err = check_inode_recs(root, &root_node.inode_cache);
2357         if (!ret)
2358                 ret = err;
2359         return ret;
2360 }
2361
2362 static int fs_root_objectid(u64 objectid)
2363 {
2364         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2365             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2366                 return 1;
2367         return is_fstree(objectid);
2368 }
2369
2370 static int check_fs_roots(struct btrfs_root *root,
2371                           struct cache_tree *root_cache)
2372 {
2373         struct btrfs_path path;
2374         struct btrfs_key key;
2375         struct walk_control wc;
2376         struct extent_buffer *leaf, *tree_node;
2377         struct btrfs_root *tmp_root;
2378         struct btrfs_root *tree_root = root->fs_info->tree_root;
2379         int ret;
2380         int err = 0;
2381
2382         /*
2383          * Just in case we made any changes to the extent tree that weren't
2384          * reflected into the free space cache yet.
2385          */
2386         if (repair)
2387                 reset_cached_block_groups(root->fs_info);
2388         memset(&wc, 0, sizeof(wc));
2389         cache_tree_init(&wc.shared);
2390         btrfs_init_path(&path);
2391
2392 again:
2393         key.offset = 0;
2394         key.objectid = 0;
2395         key.type = BTRFS_ROOT_ITEM_KEY;
2396         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2397         if (ret < 0) {
2398                 err = 1;
2399                 goto out;
2400         }
2401         tree_node = tree_root->node;
2402         while (1) {
2403                 if (tree_node != tree_root->node) {
2404                         free_root_recs_tree(root_cache);
2405                         btrfs_release_path(&path);
2406                         goto again;
2407                 }
2408                 leaf = path.nodes[0];
2409                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2410                         ret = btrfs_next_leaf(tree_root, &path);
2411                         if (ret) {
2412                                 if (ret < 0)
2413                                         err = 1;
2414                                 break;
2415                         }
2416                         leaf = path.nodes[0];
2417                 }
2418                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2419                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2420                     fs_root_objectid(key.objectid)) {
2421                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2422                                 tmp_root = btrfs_read_fs_root_no_cache(
2423                                                 root->fs_info, &key);
2424                         } else {
2425                                 key.offset = (u64)-1;
2426                                 tmp_root = btrfs_read_fs_root(
2427                                                 root->fs_info, &key);
2428                         }
2429                         if (IS_ERR(tmp_root)) {
2430                                 err = 1;
2431                                 goto next;
2432                         }
2433                         ret = check_fs_root(tmp_root, root_cache, &wc);
2434                         if (ret == -EAGAIN) {
2435                                 free_root_recs_tree(root_cache);
2436                                 btrfs_release_path(&path);
2437                                 goto again;
2438                         }
2439                         if (ret)
2440                                 err = 1;
2441                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
2442                                 btrfs_free_fs_root(tmp_root);
2443                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2444                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2445                         process_root_ref(leaf, path.slots[0], &key,
2446                                          root_cache);
2447                 }
2448 next:
2449                 path.slots[0]++;
2450         }
2451 out:
2452         btrfs_release_path(&path);
2453         if (err)
2454                 free_extent_cache_tree(&wc.shared);
2455         if (!cache_tree_empty(&wc.shared))
2456                 fprintf(stderr, "warning line %d\n", __LINE__);
2457
2458         return err;
2459 }
2460
2461 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2462 {
2463         struct list_head *cur = rec->backrefs.next;
2464         struct extent_backref *back;
2465         struct tree_backref *tback;
2466         struct data_backref *dback;
2467         u64 found = 0;
2468         int err = 0;
2469
2470         while(cur != &rec->backrefs) {
2471                 back = list_entry(cur, struct extent_backref, list);
2472                 cur = cur->next;
2473                 if (!back->found_extent_tree) {
2474                         err = 1;
2475                         if (!print_errs)
2476                                 goto out;
2477                         if (back->is_data) {
2478                                 dback = (struct data_backref *)back;
2479                                 fprintf(stderr, "Backref %llu %s %llu"
2480                                         " owner %llu offset %llu num_refs %lu"
2481                                         " not found in extent tree\n",
2482                                         (unsigned long long)rec->start,
2483                                         back->full_backref ?
2484                                         "parent" : "root",
2485                                         back->full_backref ?
2486                                         (unsigned long long)dback->parent:
2487                                         (unsigned long long)dback->root,
2488                                         (unsigned long long)dback->owner,
2489                                         (unsigned long long)dback->offset,
2490                                         (unsigned long)dback->num_refs);
2491                         } else {
2492                                 tback = (struct tree_backref *)back;
2493                                 fprintf(stderr, "Backref %llu parent %llu"
2494                                         " root %llu not found in extent tree\n",
2495                                         (unsigned long long)rec->start,
2496                                         (unsigned long long)tback->parent,
2497                                         (unsigned long long)tback->root);
2498                         }
2499                 }
2500                 if (!back->is_data && !back->found_ref) {
2501                         err = 1;
2502                         if (!print_errs)
2503                                 goto out;
2504                         tback = (struct tree_backref *)back;
2505                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2506                                 (unsigned long long)rec->start,
2507                                 back->full_backref ? "parent" : "root",
2508                                 back->full_backref ?
2509                                 (unsigned long long)tback->parent :
2510                                 (unsigned long long)tback->root, back);
2511                 }
2512                 if (back->is_data) {
2513                         dback = (struct data_backref *)back;
2514                         if (dback->found_ref != dback->num_refs) {
2515                                 err = 1;
2516                                 if (!print_errs)
2517                                         goto out;
2518                                 fprintf(stderr, "Incorrect local backref count"
2519                                         " on %llu %s %llu owner %llu"
2520                                         " offset %llu found %u wanted %u back %p\n",
2521                                         (unsigned long long)rec->start,
2522                                         back->full_backref ?
2523                                         "parent" : "root",
2524                                         back->full_backref ?
2525                                         (unsigned long long)dback->parent:
2526                                         (unsigned long long)dback->root,
2527                                         (unsigned long long)dback->owner,
2528                                         (unsigned long long)dback->offset,
2529                                         dback->found_ref, dback->num_refs, back);
2530                         }
2531                         if (dback->disk_bytenr != rec->start) {
2532                                 err = 1;
2533                                 if (!print_errs)
2534                                         goto out;
2535                                 fprintf(stderr, "Backref disk bytenr does not"
2536                                         " match extent record, bytenr=%llu, "
2537                                         "ref bytenr=%llu\n",
2538                                         (unsigned long long)rec->start,
2539                                         (unsigned long long)dback->disk_bytenr);
2540                         }
2541
2542                         if (dback->bytes != rec->nr) {
2543                                 err = 1;
2544                                 if (!print_errs)
2545                                         goto out;
2546                                 fprintf(stderr, "Backref bytes do not match "
2547                                         "extent backref, bytenr=%llu, ref "
2548                                         "bytes=%llu, backref bytes=%llu\n",
2549                                         (unsigned long long)rec->start,
2550                                         (unsigned long long)rec->nr,
2551                                         (unsigned long long)dback->bytes);
2552                         }
2553                 }
2554                 if (!back->is_data) {
2555                         found += 1;
2556                 } else {
2557                         dback = (struct data_backref *)back;
2558                         found += dback->found_ref;
2559                 }
2560         }
2561         if (found != rec->refs) {
2562                 err = 1;
2563                 if (!print_errs)
2564                         goto out;
2565                 fprintf(stderr, "Incorrect global backref count "
2566                         "on %llu found %llu wanted %llu\n",
2567                         (unsigned long long)rec->start,
2568                         (unsigned long long)found,
2569                         (unsigned long long)rec->refs);
2570         }
2571 out:
2572         return err;
2573 }
2574
2575 static int free_all_extent_backrefs(struct extent_record *rec)
2576 {
2577         struct extent_backref *back;
2578         struct list_head *cur;
2579         while (!list_empty(&rec->backrefs)) {
2580                 cur = rec->backrefs.next;
2581                 back = list_entry(cur, struct extent_backref, list);
2582                 list_del(cur);
2583                 free(back);
2584         }
2585         return 0;
2586 }
2587
2588 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2589                                      struct cache_tree *extent_cache)
2590 {
2591         struct cache_extent *cache;
2592         struct extent_record *rec;
2593
2594         while (1) {
2595                 cache = first_cache_extent(extent_cache);
2596                 if (!cache)
2597                         break;
2598                 rec = container_of(cache, struct extent_record, cache);
2599                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2600                 remove_cache_extent(extent_cache, cache);
2601                 free_all_extent_backrefs(rec);
2602                 free(rec);
2603         }
2604 }
2605
2606 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2607                                  struct extent_record *rec)
2608 {
2609         if (rec->content_checked && rec->owner_ref_checked &&
2610             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2611             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2612                 remove_cache_extent(extent_cache, &rec->cache);
2613                 free_all_extent_backrefs(rec);
2614                 list_del_init(&rec->list);
2615                 free(rec);
2616         }
2617         return 0;
2618 }
2619
2620 static int check_owner_ref(struct btrfs_root *root,
2621                             struct extent_record *rec,
2622                             struct extent_buffer *buf)
2623 {
2624         struct extent_backref *node;
2625         struct tree_backref *back;
2626         struct btrfs_root *ref_root;
2627         struct btrfs_key key;
2628         struct btrfs_path path;
2629         struct extent_buffer *parent;
2630         int level;
2631         int found = 0;
2632         int ret;
2633
2634         list_for_each_entry(node, &rec->backrefs, list) {
2635                 if (node->is_data)
2636                         continue;
2637                 if (!node->found_ref)
2638                         continue;
2639                 if (node->full_backref)
2640                         continue;
2641                 back = (struct tree_backref *)node;
2642                 if (btrfs_header_owner(buf) == back->root)
2643                         return 0;
2644         }
2645         BUG_ON(rec->is_root);
2646
2647         /* try to find the block by search corresponding fs tree */
2648         key.objectid = btrfs_header_owner(buf);
2649         key.type = BTRFS_ROOT_ITEM_KEY;
2650         key.offset = (u64)-1;
2651
2652         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2653         if (IS_ERR(ref_root))
2654                 return 1;
2655
2656         level = btrfs_header_level(buf);
2657         if (level == 0)
2658                 btrfs_item_key_to_cpu(buf, &key, 0);
2659         else
2660                 btrfs_node_key_to_cpu(buf, &key, 0);
2661
2662         btrfs_init_path(&path);
2663         path.lowest_level = level + 1;
2664         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2665         if (ret < 0)
2666                 return 0;
2667
2668         parent = path.nodes[level + 1];
2669         if (parent && buf->start == btrfs_node_blockptr(parent,
2670                                                         path.slots[level + 1]))
2671                 found = 1;
2672
2673         btrfs_release_path(&path);
2674         return found ? 0 : 1;
2675 }
2676
2677 static int is_extent_tree_record(struct extent_record *rec)
2678 {
2679         struct list_head *cur = rec->backrefs.next;
2680         struct extent_backref *node;
2681         struct tree_backref *back;
2682         int is_extent = 0;
2683
2684         while(cur != &rec->backrefs) {
2685                 node = list_entry(cur, struct extent_backref, list);
2686                 cur = cur->next;
2687                 if (node->is_data)
2688                         return 0;
2689                 back = (struct tree_backref *)node;
2690                 if (node->full_backref)
2691                         return 0;
2692                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2693                         is_extent = 1;
2694         }
2695         return is_extent;
2696 }
2697
2698
2699 static int record_bad_block_io(struct btrfs_fs_info *info,
2700                                struct cache_tree *extent_cache,
2701                                u64 start, u64 len)
2702 {
2703         struct extent_record *rec;
2704         struct cache_extent *cache;
2705         struct btrfs_key key;
2706
2707         cache = lookup_cache_extent(extent_cache, start, len);
2708         if (!cache)
2709                 return 0;
2710
2711         rec = container_of(cache, struct extent_record, cache);
2712         if (!is_extent_tree_record(rec))
2713                 return 0;
2714
2715         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2716         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2717 }
2718
2719 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2720                        struct extent_buffer *buf, int slot)
2721 {
2722         if (btrfs_header_level(buf)) {
2723                 struct btrfs_key_ptr ptr1, ptr2;
2724
2725                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2726                                    sizeof(struct btrfs_key_ptr));
2727                 read_extent_buffer(buf, &ptr2,
2728                                    btrfs_node_key_ptr_offset(slot + 1),
2729                                    sizeof(struct btrfs_key_ptr));
2730                 write_extent_buffer(buf, &ptr1,
2731                                     btrfs_node_key_ptr_offset(slot + 1),
2732                                     sizeof(struct btrfs_key_ptr));
2733                 write_extent_buffer(buf, &ptr2,
2734                                     btrfs_node_key_ptr_offset(slot),
2735                                     sizeof(struct btrfs_key_ptr));
2736                 if (slot == 0) {
2737                         struct btrfs_disk_key key;
2738                         btrfs_node_key(buf, &key, 0);
2739                         btrfs_fixup_low_keys(root, path, &key,
2740                                              btrfs_header_level(buf) + 1);
2741                 }
2742         } else {
2743                 struct btrfs_item *item1, *item2;
2744                 struct btrfs_key k1, k2;
2745                 char *item1_data, *item2_data;
2746                 u32 item1_offset, item2_offset, item1_size, item2_size;
2747
2748                 item1 = btrfs_item_nr(slot);
2749                 item2 = btrfs_item_nr(slot + 1);
2750                 btrfs_item_key_to_cpu(buf, &k1, slot);
2751                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2752                 item1_offset = btrfs_item_offset(buf, item1);
2753                 item2_offset = btrfs_item_offset(buf, item2);
2754                 item1_size = btrfs_item_size(buf, item1);
2755                 item2_size = btrfs_item_size(buf, item2);
2756
2757                 item1_data = malloc(item1_size);
2758                 if (!item1_data)
2759                         return -ENOMEM;
2760                 item2_data = malloc(item2_size);
2761                 if (!item2_data) {
2762                         free(item1_data);
2763                         return -ENOMEM;
2764                 }
2765
2766                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2767                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2768
2769                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2770                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2771                 free(item1_data);
2772                 free(item2_data);
2773
2774                 btrfs_set_item_offset(buf, item1, item2_offset);
2775                 btrfs_set_item_offset(buf, item2, item1_offset);
2776                 btrfs_set_item_size(buf, item1, item2_size);
2777                 btrfs_set_item_size(buf, item2, item1_size);
2778
2779                 path->slots[0] = slot;
2780                 btrfs_set_item_key_unsafe(root, path, &k2);
2781                 path->slots[0] = slot + 1;
2782                 btrfs_set_item_key_unsafe(root, path, &k1);
2783         }
2784         return 0;
2785 }
2786
2787 static int fix_key_order(struct btrfs_trans_handle *trans,
2788                          struct btrfs_root *root,
2789                          struct btrfs_path *path)
2790 {
2791         struct extent_buffer *buf;
2792         struct btrfs_key k1, k2;
2793         int i;
2794         int level = path->lowest_level;
2795         int ret;
2796
2797         buf = path->nodes[level];
2798         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2799                 if (level) {
2800                         btrfs_node_key_to_cpu(buf, &k1, i);
2801                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2802                 } else {
2803                         btrfs_item_key_to_cpu(buf, &k1, i);
2804                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2805                 }
2806                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2807                         continue;
2808                 ret = swap_values(root, path, buf, i);
2809                 if (ret)
2810                         break;
2811                 btrfs_mark_buffer_dirty(buf);
2812                 i = 0;
2813         }
2814         return ret;
2815 }
2816
2817 static int delete_bogus_item(struct btrfs_trans_handle *trans,
2818                              struct btrfs_root *root,
2819                              struct btrfs_path *path,
2820                              struct extent_buffer *buf, int slot)
2821 {
2822         struct btrfs_key key;
2823         int nritems = btrfs_header_nritems(buf);
2824
2825         btrfs_item_key_to_cpu(buf, &key, slot);
2826
2827         /* These are all the keys we can deal with missing. */
2828         if (key.type != BTRFS_DIR_INDEX_KEY &&
2829             key.type != BTRFS_EXTENT_ITEM_KEY &&
2830             key.type != BTRFS_METADATA_ITEM_KEY &&
2831             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
2832             key.type != BTRFS_EXTENT_DATA_REF_KEY)
2833                 return -1;
2834
2835         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
2836                (unsigned long long)key.objectid, key.type,
2837                (unsigned long long)key.offset, slot, buf->start);
2838         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
2839                               btrfs_item_nr_offset(slot + 1),
2840                               sizeof(struct btrfs_item) *
2841                               (nritems - slot - 1));
2842         btrfs_set_header_nritems(buf, nritems - 1);
2843         if (slot == 0) {
2844                 struct btrfs_disk_key disk_key;
2845
2846                 btrfs_item_key(buf, &disk_key, 0);
2847                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
2848         }
2849         btrfs_mark_buffer_dirty(buf);
2850         return 0;
2851 }
2852
2853 static int fix_item_offset(struct btrfs_trans_handle *trans,
2854                            struct btrfs_root *root,
2855                            struct btrfs_path *path)
2856 {
2857         struct extent_buffer *buf;
2858         int i;
2859         int ret = 0;
2860
2861         /* We should only get this for leaves */
2862         BUG_ON(path->lowest_level);
2863         buf = path->nodes[0];
2864 again:
2865         for (i = 0; i < btrfs_header_nritems(buf); i++) {
2866                 unsigned int shift = 0, offset;
2867
2868                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
2869                     BTRFS_LEAF_DATA_SIZE(root)) {
2870                         if (btrfs_item_end_nr(buf, i) >
2871                             BTRFS_LEAF_DATA_SIZE(root)) {
2872                                 ret = delete_bogus_item(trans, root, path,
2873                                                         buf, i);
2874                                 if (!ret)
2875                                         goto again;
2876                                 fprintf(stderr, "item is off the end of the "
2877                                         "leaf, can't fix\n");
2878                                 ret = -EIO;
2879                                 break;
2880                         }
2881                         shift = BTRFS_LEAF_DATA_SIZE(root) -
2882                                 btrfs_item_end_nr(buf, i);
2883                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
2884                            btrfs_item_offset_nr(buf, i - 1)) {
2885                         if (btrfs_item_end_nr(buf, i) >
2886                             btrfs_item_offset_nr(buf, i - 1)) {
2887                                 ret = delete_bogus_item(trans, root, path,
2888                                                         buf, i);
2889                                 if (!ret)
2890                                         goto again;
2891                                 fprintf(stderr, "items overlap, can't fix\n");
2892                                 ret = -EIO;
2893                                 break;
2894                         }
2895                         shift = btrfs_item_offset_nr(buf, i - 1) -
2896                                 btrfs_item_end_nr(buf, i);
2897                 }
2898                 if (!shift)
2899                         continue;
2900
2901                 printf("Shifting item nr %d by %u bytes in block %llu\n",
2902                        i, shift, (unsigned long long)buf->start);
2903                 offset = btrfs_item_offset_nr(buf, i);
2904                 memmove_extent_buffer(buf,
2905                                       btrfs_leaf_data(buf) + offset + shift,
2906                                       btrfs_leaf_data(buf) + offset,
2907                                       btrfs_item_size_nr(buf, i));
2908                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
2909                                       offset + shift);
2910                 btrfs_mark_buffer_dirty(buf);
2911         }
2912
2913         /*
2914          * We may have moved things, in which case we want to exit so we don't
2915          * write those changes out.  Once we have proper abort functionality in
2916          * progs this can be changed to something nicer.
2917          */
2918         BUG_ON(ret);
2919         return ret;
2920 }
2921
2922 /*
2923  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
2924  * then just return -EIO.
2925  */
2926 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2927                                 struct btrfs_root *root,
2928                                 struct extent_buffer *buf,
2929                                 enum btrfs_tree_block_status status)
2930 {
2931         struct ulist *roots;
2932         struct ulist_node *node;
2933         struct btrfs_root *search_root;
2934         struct btrfs_path *path;
2935         struct ulist_iterator iter;
2936         struct btrfs_key root_key, key;
2937         int ret;
2938
2939         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
2940             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
2941                 return -EIO;
2942
2943         path = btrfs_alloc_path();
2944         if (!path)
2945                 return -EIO;
2946
2947         ret = btrfs_find_all_roots(trans, root->fs_info, buf->start,
2948                                    0, &roots);
2949         if (ret) {
2950                 btrfs_free_path(path);
2951                 return -EIO;
2952         }
2953
2954         ULIST_ITER_INIT(&iter);
2955         while ((node = ulist_next(roots, &iter))) {
2956                 root_key.objectid = node->val;
2957                 root_key.type = BTRFS_ROOT_ITEM_KEY;
2958                 root_key.offset = (u64)-1;
2959
2960                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
2961                 if (IS_ERR(root)) {
2962                         ret = -EIO;
2963                         break;
2964                 }
2965
2966                 record_root_in_trans(trans, search_root);
2967
2968                 path->lowest_level = btrfs_header_level(buf);
2969                 path->skip_check_block = 1;
2970                 if (path->lowest_level)
2971                         btrfs_node_key_to_cpu(buf, &key, 0);
2972                 else
2973                         btrfs_item_key_to_cpu(buf, &key, 0);
2974                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
2975                 if (ret) {
2976                         ret = -EIO;
2977                         break;
2978                 }
2979                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2980                         ret = fix_key_order(trans, search_root, path);
2981                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
2982                         ret = fix_item_offset(trans, search_root, path);
2983                 if (ret)
2984                         break;
2985                 btrfs_release_path(path);
2986         }
2987         ulist_free(roots);
2988         btrfs_free_path(path);
2989         return ret;
2990 }
2991
2992 static int check_block(struct btrfs_trans_handle *trans,
2993                        struct btrfs_root *root,
2994                        struct cache_tree *extent_cache,
2995                        struct extent_buffer *buf, u64 flags)
2996 {
2997         struct extent_record *rec;
2998         struct cache_extent *cache;
2999         struct btrfs_key key;
3000         enum btrfs_tree_block_status status;
3001         int ret = 0;
3002         int level;
3003
3004         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
3005         if (!cache)
3006                 return 1;
3007         rec = container_of(cache, struct extent_record, cache);
3008         rec->generation = btrfs_header_generation(buf);
3009
3010         level = btrfs_header_level(buf);
3011         if (btrfs_header_nritems(buf) > 0) {
3012
3013                 if (level == 0)
3014                         btrfs_item_key_to_cpu(buf, &key, 0);
3015                 else
3016                         btrfs_node_key_to_cpu(buf, &key, 0);
3017
3018                 rec->info_objectid = key.objectid;
3019         }
3020         rec->info_level = level;
3021
3022         if (btrfs_is_leaf(buf))
3023                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
3024         else
3025                 status = btrfs_check_node(root, &rec->parent_key, buf);
3026
3027         if (status != BTRFS_TREE_BLOCK_CLEAN) {
3028                 if (repair)
3029                         status = try_to_fix_bad_block(trans, root, buf,
3030                                                       status);
3031                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
3032                         ret = -EIO;
3033                         fprintf(stderr, "bad block %llu\n",
3034                                 (unsigned long long)buf->start);
3035                 } else {
3036                         /*
3037                          * Signal to callers we need to start the scan over
3038                          * again since we'll have cow'ed blocks.
3039                          */
3040                         ret = -EAGAIN;
3041                 }
3042         } else {
3043                 rec->content_checked = 1;
3044                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3045                         rec->owner_ref_checked = 1;
3046                 else {
3047                         ret = check_owner_ref(root, rec, buf);
3048                         if (!ret)
3049                                 rec->owner_ref_checked = 1;
3050                 }
3051         }
3052         if (!ret)
3053                 maybe_free_extent_rec(extent_cache, rec);
3054         return ret;
3055 }
3056
3057 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3058                                                 u64 parent, u64 root)
3059 {
3060         struct list_head *cur = rec->backrefs.next;
3061         struct extent_backref *node;
3062         struct tree_backref *back;
3063
3064         while(cur != &rec->backrefs) {
3065                 node = list_entry(cur, struct extent_backref, list);
3066                 cur = cur->next;
3067                 if (node->is_data)
3068                         continue;
3069                 back = (struct tree_backref *)node;
3070                 if (parent > 0) {
3071                         if (!node->full_backref)
3072                                 continue;
3073                         if (parent == back->parent)
3074                                 return back;
3075                 } else {
3076                         if (node->full_backref)
3077                                 continue;
3078                         if (back->root == root)
3079                                 return back;
3080                 }
3081         }
3082         return NULL;
3083 }
3084
3085 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
3086                                                 u64 parent, u64 root)
3087 {
3088         struct tree_backref *ref = malloc(sizeof(*ref));
3089         memset(&ref->node, 0, sizeof(ref->node));
3090         if (parent > 0) {
3091                 ref->parent = parent;
3092                 ref->node.full_backref = 1;
3093         } else {
3094                 ref->root = root;
3095                 ref->node.full_backref = 0;
3096         }
3097         list_add_tail(&ref->node.list, &rec->backrefs);
3098
3099         return ref;
3100 }
3101
3102 static struct data_backref *find_data_backref(struct extent_record *rec,
3103                                                 u64 parent, u64 root,
3104                                                 u64 owner, u64 offset,
3105                                                 int found_ref,
3106                                                 u64 disk_bytenr, u64 bytes)
3107 {
3108         struct list_head *cur = rec->backrefs.next;
3109         struct extent_backref *node;
3110         struct data_backref *back;
3111
3112         while(cur != &rec->backrefs) {
3113                 node = list_entry(cur, struct extent_backref, list);
3114                 cur = cur->next;
3115                 if (!node->is_data)
3116                         continue;
3117                 back = (struct data_backref *)node;
3118                 if (parent > 0) {
3119                         if (!node->full_backref)
3120                                 continue;
3121                         if (parent == back->parent)
3122                                 return back;
3123                 } else {
3124                         if (node->full_backref)
3125                                 continue;
3126                         if (back->root == root && back->owner == owner &&
3127                             back->offset == offset) {
3128                                 if (found_ref && node->found_ref &&
3129                                     (back->bytes != bytes ||
3130                                     back->disk_bytenr != disk_bytenr))
3131                                         continue;
3132                                 return back;
3133                         }
3134                 }
3135         }
3136         return NULL;
3137 }
3138
3139 static struct data_backref *alloc_data_backref(struct extent_record *rec,
3140                                                 u64 parent, u64 root,
3141                                                 u64 owner, u64 offset,
3142                                                 u64 max_size)
3143 {
3144         struct data_backref *ref = malloc(sizeof(*ref));
3145         memset(&ref->node, 0, sizeof(ref->node));
3146         ref->node.is_data = 1;
3147
3148         if (parent > 0) {
3149                 ref->parent = parent;
3150                 ref->owner = 0;
3151                 ref->offset = 0;
3152                 ref->node.full_backref = 1;
3153         } else {
3154                 ref->root = root;
3155                 ref->owner = owner;
3156                 ref->offset = offset;
3157                 ref->node.full_backref = 0;
3158         }
3159         ref->bytes = max_size;
3160         ref->found_ref = 0;
3161         ref->num_refs = 0;
3162         list_add_tail(&ref->node.list, &rec->backrefs);
3163         if (max_size > rec->max_size)
3164                 rec->max_size = max_size;
3165         return ref;
3166 }
3167
3168 static int add_extent_rec(struct cache_tree *extent_cache,
3169                           struct btrfs_key *parent_key, u64 parent_gen,
3170                           u64 start, u64 nr, u64 extent_item_refs,
3171                           int is_root, int inc_ref, int set_checked,
3172                           int metadata, int extent_rec, u64 max_size)
3173 {
3174         struct extent_record *rec;
3175         struct cache_extent *cache;
3176         int ret = 0;
3177         int dup = 0;
3178
3179         cache = lookup_cache_extent(extent_cache, start, nr);
3180         if (cache) {
3181                 rec = container_of(cache, struct extent_record, cache);
3182                 if (inc_ref)
3183                         rec->refs++;
3184                 if (rec->nr == 1)
3185                         rec->nr = max(nr, max_size);
3186
3187                 /*
3188                  * We need to make sure to reset nr to whatever the extent
3189                  * record says was the real size, this way we can compare it to
3190                  * the backrefs.
3191                  */
3192                 if (extent_rec) {
3193                         if (start != rec->start || rec->found_rec) {
3194                                 struct extent_record *tmp;
3195
3196                                 dup = 1;
3197                                 if (list_empty(&rec->list))
3198                                         list_add_tail(&rec->list,
3199                                                       &duplicate_extents);
3200
3201                                 /*
3202                                  * We have to do this song and dance in case we
3203                                  * find an extent record that falls inside of
3204                                  * our current extent record but does not have
3205                                  * the same objectid.
3206                                  */
3207                                 tmp = malloc(sizeof(*tmp));
3208                                 if (!tmp)
3209                                         return -ENOMEM;
3210                                 tmp->start = start;
3211                                 tmp->max_size = max_size;
3212                                 tmp->nr = nr;
3213                                 tmp->found_rec = 1;
3214                                 tmp->metadata = metadata;
3215                                 tmp->extent_item_refs = extent_item_refs;
3216                                 INIT_LIST_HEAD(&tmp->list);
3217                                 list_add_tail(&tmp->list, &rec->dups);
3218                                 rec->num_duplicates++;
3219                         } else {
3220                                 rec->nr = nr;
3221                                 rec->found_rec = 1;
3222                         }
3223                 }
3224
3225                 if (extent_item_refs && !dup) {
3226                         if (rec->extent_item_refs) {
3227                                 fprintf(stderr, "block %llu rec "
3228                                         "extent_item_refs %llu, passed %llu\n",
3229                                         (unsigned long long)start,
3230                                         (unsigned long long)
3231                                                         rec->extent_item_refs,
3232                                         (unsigned long long)extent_item_refs);
3233                         }
3234                         rec->extent_item_refs = extent_item_refs;
3235                 }
3236                 if (is_root)
3237                         rec->is_root = 1;
3238                 if (set_checked) {
3239                         rec->content_checked = 1;
3240                         rec->owner_ref_checked = 1;
3241                 }
3242
3243                 if (parent_key)
3244                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
3245                 if (parent_gen)
3246                         rec->parent_generation = parent_gen;
3247
3248                 if (rec->max_size < max_size)
3249                         rec->max_size = max_size;
3250
3251                 maybe_free_extent_rec(extent_cache, rec);
3252                 return ret;
3253         }
3254         rec = malloc(sizeof(*rec));
3255         rec->start = start;
3256         rec->max_size = max_size;
3257         rec->nr = max(nr, max_size);
3258         rec->found_rec = !!extent_rec;
3259         rec->content_checked = 0;
3260         rec->owner_ref_checked = 0;
3261         rec->num_duplicates = 0;
3262         rec->metadata = metadata;
3263         INIT_LIST_HEAD(&rec->backrefs);
3264         INIT_LIST_HEAD(&rec->dups);
3265         INIT_LIST_HEAD(&rec->list);
3266
3267         if (is_root)
3268                 rec->is_root = 1;
3269         else
3270                 rec->is_root = 0;
3271
3272         if (inc_ref)
3273                 rec->refs = 1;
3274         else
3275                 rec->refs = 0;
3276
3277         if (extent_item_refs)
3278                 rec->extent_item_refs = extent_item_refs;
3279         else
3280                 rec->extent_item_refs = 0;
3281
3282         if (parent_key)
3283                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
3284         else
3285                 memset(&rec->parent_key, 0, sizeof(*parent_key));
3286
3287         if (parent_gen)
3288                 rec->parent_generation = parent_gen;
3289         else
3290                 rec->parent_generation = 0;
3291
3292         rec->cache.start = start;
3293         rec->cache.size = nr;
3294         ret = insert_cache_extent(extent_cache, &rec->cache);
3295         BUG_ON(ret);
3296         bytes_used += nr;
3297         if (set_checked) {
3298                 rec->content_checked = 1;
3299                 rec->owner_ref_checked = 1;
3300         }
3301         return ret;
3302 }
3303
3304 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
3305                             u64 parent, u64 root, int found_ref)
3306 {
3307         struct extent_record *rec;
3308         struct tree_backref *back;
3309         struct cache_extent *cache;
3310
3311         cache = lookup_cache_extent(extent_cache, bytenr, 1);
3312         if (!cache) {
3313                 add_extent_rec(extent_cache, NULL, 0, bytenr,
3314                                1, 0, 0, 0, 0, 1, 0, 0);
3315                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
3316                 if (!cache)
3317                         abort();
3318         }
3319
3320         rec = container_of(cache, struct extent_record, cache);
3321         if (rec->start != bytenr) {
3322                 abort();
3323         }
3324
3325         back = find_tree_backref(rec, parent, root);
3326         if (!back)
3327                 back = alloc_tree_backref(rec, parent, root);
3328
3329         if (found_ref) {
3330                 if (back->node.found_ref) {
3331                         fprintf(stderr, "Extent back ref already exists "
3332                                 "for %llu parent %llu root %llu \n",
3333                                 (unsigned long long)bytenr,
3334                                 (unsigned long long)parent,
3335                                 (unsigned long long)root);
3336                 }
3337                 back->node.found_ref = 1;
3338         } else {
3339                 if (back->node.found_extent_tree) {
3340                         fprintf(stderr, "Extent back ref already exists "
3341                                 "for %llu parent %llu root %llu \n",
3342                                 (unsigned long long)bytenr,
3343                                 (unsigned long long)parent,
3344                                 (unsigned long long)root);
3345                 }
3346                 back->node.found_extent_tree = 1;
3347         }
3348         maybe_free_extent_rec(extent_cache, rec);
3349         return 0;
3350 }
3351
3352 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
3353                             u64 parent, u64 root, u64 owner, u64 offset,
3354                             u32 num_refs, int found_ref, u64 max_size)
3355 {
3356         struct extent_record *rec;
3357         struct data_backref *back;
3358         struct cache_extent *cache;
3359
3360         cache = lookup_cache_extent(extent_cache, bytenr, 1);
3361         if (!cache) {
3362                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
3363                                0, 0, max_size);
3364                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
3365                 if (!cache)
3366                         abort();
3367         }
3368
3369         rec = container_of(cache, struct extent_record, cache);
3370         if (rec->max_size < max_size)
3371                 rec->max_size = max_size;
3372
3373         /*
3374          * If found_ref is set then max_size is the real size and must match the
3375          * existing refs.  So if we have already found a ref then we need to
3376          * make sure that this ref matches the existing one, otherwise we need
3377          * to add a new backref so we can notice that the backrefs don't match
3378          * and we need to figure out who is telling the truth.  This is to
3379          * account for that awful fsync bug I introduced where we'd end up with
3380          * a btrfs_file_extent_item that would have its length include multiple
3381          * prealloc extents or point inside of a prealloc extent.
3382          */
3383         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
3384                                  bytenr, max_size);
3385         if (!back)
3386                 back = alloc_data_backref(rec, parent, root, owner, offset,
3387                                           max_size);
3388
3389         if (found_ref) {
3390                 BUG_ON(num_refs != 1);
3391                 if (back->node.found_ref)
3392                         BUG_ON(back->bytes != max_size);
3393                 back->node.found_ref = 1;
3394                 back->found_ref += 1;
3395                 back->bytes = max_size;
3396                 back->disk_bytenr = bytenr;
3397                 rec->refs += 1;
3398                 rec->content_checked = 1;
3399                 rec->owner_ref_checked = 1;
3400         } else {
3401                 if (back->node.found_extent_tree) {
3402                         fprintf(stderr, "Extent back ref already exists "
3403                                 "for %llu parent %llu root %llu "
3404                                 "owner %llu offset %llu num_refs %lu\n",
3405                                 (unsigned long long)bytenr,
3406                                 (unsigned long long)parent,
3407                                 (unsigned long long)root,
3408                                 (unsigned long long)owner,
3409                                 (unsigned long long)offset,
3410                                 (unsigned long)num_refs);
3411                 }
3412                 back->num_refs = num_refs;
3413                 back->node.found_extent_tree = 1;
3414         }
3415         maybe_free_extent_rec(extent_cache, rec);
3416         return 0;
3417 }
3418
3419 static int add_pending(struct cache_tree *pending,
3420                        struct cache_tree *seen, u64 bytenr, u32 size)
3421 {
3422         int ret;
3423         ret = add_cache_extent(seen, bytenr, size);
3424         if (ret)
3425                 return ret;
3426         add_cache_extent(pending, bytenr, size);
3427         return 0;
3428 }
3429
3430 static int pick_next_pending(struct cache_tree *pending,
3431                         struct cache_tree *reada,
3432                         struct cache_tree *nodes,
3433                         u64 last, struct block_info *bits, int bits_nr,
3434                         int *reada_bits)
3435 {
3436         unsigned long node_start = last;
3437         struct cache_extent *cache;
3438         int ret;
3439
3440         cache = search_cache_extent(reada, 0);
3441         if (cache) {
3442                 bits[0].start = cache->start;
3443                 bits[0].size = cache->size;
3444                 *reada_bits = 1;
3445                 return 1;
3446         }
3447         *reada_bits = 0;
3448         if (node_start > 32768)
3449                 node_start -= 32768;
3450
3451         cache = search_cache_extent(nodes, node_start);
3452         if (!cache)
3453                 cache = search_cache_extent(nodes, 0);
3454
3455         if (!cache) {
3456                  cache = search_cache_extent(pending, 0);
3457                  if (!cache)
3458                          return 0;
3459                  ret = 0;
3460                  do {
3461                          bits[ret].start = cache->start;
3462                          bits[ret].size = cache->size;
3463                          cache = next_cache_extent(cache);
3464                          ret++;
3465                  } while (cache && ret < bits_nr);
3466                  return ret;
3467         }
3468
3469         ret = 0;
3470         do {
3471                 bits[ret].start = cache->start;
3472                 bits[ret].size = cache->size;
3473                 cache = next_cache_extent(cache);
3474                 ret++;
3475         } while (cache && ret < bits_nr);
3476
3477         if (bits_nr - ret > 8) {
3478                 u64 lookup = bits[0].start + bits[0].size;
3479                 struct cache_extent *next;
3480                 next = search_cache_extent(pending, lookup);
3481                 while(next) {
3482                         if (next->start - lookup > 32768)
3483                                 break;
3484                         bits[ret].start = next->start;
3485                         bits[ret].size = next->size;
3486                         lookup = next->start + next->size;
3487                         ret++;
3488                         if (ret == bits_nr)
3489                                 break;
3490                         next = next_cache_extent(next);
3491                         if (!next)
3492                                 break;
3493                 }
3494         }
3495         return ret;
3496 }
3497
3498 static void free_chunk_record(struct cache_extent *cache)
3499 {
3500         struct chunk_record *rec;
3501
3502         rec = container_of(cache, struct chunk_record, cache);
3503         list_del_init(&rec->list);
3504         list_del_init(&rec->dextents);
3505         free(rec);
3506 }
3507
3508 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
3509 {
3510         cache_tree_free_extents(chunk_cache, free_chunk_record);
3511 }
3512
3513 static void free_device_record(struct rb_node *node)
3514 {
3515         struct device_record *rec;
3516
3517         rec = container_of(node, struct device_record, node);
3518         free(rec);
3519 }
3520
3521 FREE_RB_BASED_TREE(device_cache, free_device_record);
3522
3523 int insert_block_group_record(struct block_group_tree *tree,
3524                               struct block_group_record *bg_rec)
3525 {
3526         int ret;
3527
3528         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3529         if (ret)
3530                 return ret;
3531
3532         list_add_tail(&bg_rec->list, &tree->block_groups);
3533         return 0;
3534 }
3535
3536 static void free_block_group_record(struct cache_extent *cache)
3537 {
3538         struct block_group_record *rec;
3539
3540         rec = container_of(cache, struct block_group_record, cache);
3541         list_del_init(&rec->list);
3542         free(rec);
3543 }
3544
3545 void free_block_group_tree(struct block_group_tree *tree)
3546 {
3547         cache_tree_free_extents(&tree->tree, free_block_group_record);
3548 }
3549
3550 int insert_device_extent_record(struct device_extent_tree *tree,
3551                                 struct device_extent_record *de_rec)
3552 {
3553         int ret;
3554
3555         /*
3556          * Device extent is a bit different from the other extents, because
3557          * the extents which belong to the different devices may have the
3558          * same start and size, so we need use the special extent cache
3559          * search/insert functions.
3560          */
3561         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3562         if (ret)
3563                 return ret;
3564
3565         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3566         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3567         return 0;
3568 }
3569
3570 static void free_device_extent_record(struct cache_extent *cache)
3571 {
3572         struct device_extent_record *rec;
3573
3574         rec = container_of(cache, struct device_extent_record, cache);
3575         if (!list_empty(&rec->chunk_list))
3576                 list_del_init(&rec->chunk_list);
3577         if (!list_empty(&rec->device_list))
3578                 list_del_init(&rec->device_list);
3579         free(rec);
3580 }
3581
3582 void free_device_extent_tree(struct device_extent_tree *tree)
3583 {
3584         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3585 }
3586
3587 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3588 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3589                                  struct extent_buffer *leaf, int slot)
3590 {
3591         struct btrfs_extent_ref_v0 *ref0;
3592         struct btrfs_key key;
3593
3594         btrfs_item_key_to_cpu(leaf, &key, slot);
3595         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3596         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3597                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3598         } else {
3599                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3600                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3601         }
3602         return 0;
3603 }
3604 #endif
3605
3606 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3607                                             struct btrfs_key *key,
3608                                             int slot)
3609 {
3610         struct btrfs_chunk *ptr;
3611         struct chunk_record *rec;
3612         int num_stripes, i;
3613
3614         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3615         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3616
3617         rec = malloc(btrfs_chunk_record_size(num_stripes));
3618         if (!rec) {
3619                 fprintf(stderr, "memory allocation failed\n");
3620                 exit(-1);
3621         }
3622
3623         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3624
3625         INIT_LIST_HEAD(&rec->list);
3626         INIT_LIST_HEAD(&rec->dextents);
3627         rec->bg_rec = NULL;
3628
3629         rec->cache.start = key->offset;
3630         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3631
3632         rec->generation = btrfs_header_generation(leaf);
3633
3634         rec->objectid = key->objectid;
3635         rec->type = key->type;
3636         rec->offset = key->offset;
3637
3638         rec->length = rec->cache.size;
3639         rec->owner = btrfs_chunk_owner(leaf, ptr);
3640         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3641         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3642         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3643         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3644         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3645         rec->num_stripes = num_stripes;
3646         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3647
3648         for (i = 0; i < rec->num_stripes; ++i) {
3649                 rec->stripes[i].devid =
3650                         btrfs_stripe_devid_nr(leaf, ptr, i);
3651                 rec->stripes[i].offset =
3652                         btrfs_stripe_offset_nr(leaf, ptr, i);
3653                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3654                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3655                                 BTRFS_UUID_SIZE);
3656         }
3657
3658         return rec;
3659 }
3660
3661 static int process_chunk_item(struct cache_tree *chunk_cache,
3662                               struct btrfs_key *key, struct extent_buffer *eb,
3663                               int slot)
3664 {
3665         struct chunk_record *rec;
3666         int ret = 0;
3667
3668         rec = btrfs_new_chunk_record(eb, key, slot);
3669         ret = insert_cache_extent(chunk_cache, &rec->cache);
3670         if (ret) {
3671                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3672                         rec->offset, rec->length);
3673                 free(rec);
3674         }
3675
3676         return ret;
3677 }
3678
3679 static int process_device_item(struct rb_root *dev_cache,
3680                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3681 {
3682         struct btrfs_dev_item *ptr;
3683         struct device_record *rec;
3684         int ret = 0;
3685
3686         ptr = btrfs_item_ptr(eb,
3687                 slot, struct btrfs_dev_item);
3688
3689         rec = malloc(sizeof(*rec));
3690         if (!rec) {
3691                 fprintf(stderr, "memory allocation failed\n");
3692                 return -ENOMEM;
3693         }
3694
3695         rec->devid = key->offset;
3696         rec->generation = btrfs_header_generation(eb);
3697
3698         rec->objectid = key->objectid;
3699         rec->type = key->type;
3700         rec->offset = key->offset;
3701
3702         rec->devid = btrfs_device_id(eb, ptr);
3703         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3704         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3705
3706         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3707         if (ret) {
3708                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3709                 free(rec);
3710         }
3711
3712         return ret;
3713 }
3714
3715 struct block_group_record *
3716 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3717                              int slot)
3718 {
3719         struct btrfs_block_group_item *ptr;
3720         struct block_group_record *rec;
3721
3722         rec = malloc(sizeof(*rec));
3723         if (!rec) {
3724                 fprintf(stderr, "memory allocation failed\n");
3725                 exit(-1);
3726         }
3727         memset(rec, 0, sizeof(*rec));
3728
3729         rec->cache.start = key->objectid;
3730         rec->cache.size = key->offset;
3731
3732         rec->generation = btrfs_header_generation(leaf);
3733
3734         rec->objectid = key->objectid;
3735         rec->type = key->type;
3736         rec->offset = key->offset;
3737
3738         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3739         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3740
3741         INIT_LIST_HEAD(&rec->list);
3742
3743         return rec;
3744 }
3745
3746 static int process_block_group_item(struct block_group_tree *block_group_cache,
3747                                     struct btrfs_key *key,
3748                                     struct extent_buffer *eb, int slot)
3749 {
3750         struct block_group_record *rec;
3751         int ret = 0;
3752
3753         rec = btrfs_new_block_group_record(eb, key, slot);
3754         ret = insert_block_group_record(block_group_cache, rec);
3755         if (ret) {
3756                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3757                         rec->objectid, rec->offset);
3758                 free(rec);
3759         }
3760
3761         return ret;
3762 }
3763
3764 struct device_extent_record *
3765 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3766                                struct btrfs_key *key, int slot)
3767 {
3768         struct device_extent_record *rec;
3769         struct btrfs_dev_extent *ptr;
3770
3771         rec = malloc(sizeof(*rec));
3772         if (!rec) {
3773                 fprintf(stderr, "memory allocation failed\n");
3774                 exit(-1);
3775         }
3776         memset(rec, 0, sizeof(*rec));
3777
3778         rec->cache.objectid = key->objectid;
3779         rec->cache.start = key->offset;
3780
3781         rec->generation = btrfs_header_generation(leaf);
3782
3783         rec->objectid = key->objectid;
3784         rec->type = key->type;
3785         rec->offset = key->offset;
3786
3787         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3788         rec->chunk_objecteid =
3789                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3790         rec->chunk_offset =
3791                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3792         rec->length = btrfs_dev_extent_length(leaf, ptr);
3793         rec->cache.size = rec->length;
3794
3795         INIT_LIST_HEAD(&rec->chunk_list);
3796         INIT_LIST_HEAD(&rec->device_list);
3797
3798         return rec;
3799 }
3800
3801 static int
3802 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3803                            struct btrfs_key *key, struct extent_buffer *eb,
3804                            int slot)
3805 {
3806         struct device_extent_record *rec;
3807         int ret;
3808
3809         rec = btrfs_new_device_extent_record(eb, key, slot);
3810         ret = insert_device_extent_record(dev_extent_cache, rec);
3811         if (ret) {
3812                 fprintf(stderr,
3813                         "Device extent[%llu, %llu, %llu] existed.\n",
3814                         rec->objectid, rec->offset, rec->length);
3815                 free(rec);
3816         }
3817
3818         return ret;
3819 }
3820
3821 static int process_extent_item(struct btrfs_root *root,
3822                                struct cache_tree *extent_cache,
3823                                struct extent_buffer *eb, int slot)
3824 {
3825         struct btrfs_extent_item *ei;
3826         struct btrfs_extent_inline_ref *iref;
3827         struct btrfs_extent_data_ref *dref;
3828         struct btrfs_shared_data_ref *sref;
3829         struct btrfs_key key;
3830         unsigned long end;
3831         unsigned long ptr;
3832         int type;
3833         u32 item_size = btrfs_item_size_nr(eb, slot);
3834         u64 refs = 0;
3835         u64 offset;
3836         u64 num_bytes;
3837         int metadata = 0;
3838
3839         btrfs_item_key_to_cpu(eb, &key, slot);
3840
3841         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3842                 metadata = 1;
3843                 num_bytes = root->leafsize;
3844         } else {
3845                 num_bytes = key.offset;
3846         }
3847
3848         if (item_size < sizeof(*ei)) {
3849 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3850                 struct btrfs_extent_item_v0 *ei0;
3851                 BUG_ON(item_size != sizeof(*ei0));
3852                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3853                 refs = btrfs_extent_refs_v0(eb, ei0);
3854 #else
3855                 BUG();
3856 #endif
3857                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3858                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3859                                       num_bytes);
3860         }
3861
3862         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3863         refs = btrfs_extent_refs(eb, ei);
3864
3865         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3866                        refs, 0, 0, 0, metadata, 1, num_bytes);
3867
3868         ptr = (unsigned long)(ei + 1);
3869         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3870             key.type == BTRFS_EXTENT_ITEM_KEY)
3871                 ptr += sizeof(struct btrfs_tree_block_info);
3872
3873         end = (unsigned long)ei + item_size;
3874         while (ptr < end) {
3875                 iref = (struct btrfs_extent_inline_ref *)ptr;
3876                 type = btrfs_extent_inline_ref_type(eb, iref);
3877                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3878                 switch (type) {
3879                 case BTRFS_TREE_BLOCK_REF_KEY:
3880                         add_tree_backref(extent_cache, key.objectid,
3881                                          0, offset, 0);
3882                         break;
3883                 case BTRFS_SHARED_BLOCK_REF_KEY:
3884                         add_tree_backref(extent_cache, key.objectid,
3885                                          offset, 0, 0);
3886                         break;
3887                 case BTRFS_EXTENT_DATA_REF_KEY:
3888                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3889                         add_data_backref(extent_cache, key.objectid, 0,
3890                                         btrfs_extent_data_ref_root(eb, dref),
3891                                         btrfs_extent_data_ref_objectid(eb,
3892                                                                        dref),
3893                                         btrfs_extent_data_ref_offset(eb, dref),
3894                                         btrfs_extent_data_ref_count(eb, dref),
3895                                         0, num_bytes);
3896                         break;
3897                 case BTRFS_SHARED_DATA_REF_KEY:
3898                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3899                         add_data_backref(extent_cache, key.objectid, offset,
3900                                         0, 0, 0,
3901                                         btrfs_shared_data_ref_count(eb, sref),
3902                                         0, num_bytes);
3903                         break;
3904                 default:
3905                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3906                                 key.objectid, key.type, num_bytes);
3907                         goto out;
3908                 }
3909                 ptr += btrfs_extent_inline_ref_size(type);
3910         }
3911         WARN_ON(ptr > end);
3912 out:
3913         return 0;
3914 }
3915
3916 static int check_cache_range(struct btrfs_root *root,
3917                              struct btrfs_block_group_cache *cache,
3918                              u64 offset, u64 bytes)
3919 {
3920         struct btrfs_free_space *entry;
3921         u64 *logical;
3922         u64 bytenr;
3923         int stripe_len;
3924         int i, nr, ret;
3925
3926         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3927                 bytenr = btrfs_sb_offset(i);
3928                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3929                                        cache->key.objectid, bytenr, 0,
3930                                        &logical, &nr, &stripe_len);
3931                 if (ret)
3932                         return ret;
3933
3934                 while (nr--) {
3935                         if (logical[nr] + stripe_len <= offset)
3936                                 continue;
3937                         if (offset + bytes <= logical[nr])
3938                                 continue;
3939                         if (logical[nr] == offset) {
3940                                 if (stripe_len >= bytes) {
3941                                         kfree(logical);
3942                                         return 0;
3943                                 }
3944                                 bytes -= stripe_len;
3945                                 offset += stripe_len;
3946                         } else if (logical[nr] < offset) {
3947                                 if (logical[nr] + stripe_len >=
3948                                     offset + bytes) {
3949                                         kfree(logical);
3950                                         return 0;
3951                                 }
3952                                 bytes = (offset + bytes) -
3953                                         (logical[nr] + stripe_len);
3954                                 offset = logical[nr] + stripe_len;
3955                         } else {
3956                                 /*
3957                                  * Could be tricky, the super may land in the
3958                                  * middle of the area we're checking.  First
3959                                  * check the easiest case, it's at the end.
3960                                  */
3961                                 if (logical[nr] + stripe_len >=
3962                                     bytes + offset) {
3963                                         bytes = logical[nr] - offset;
3964                                         continue;
3965                                 }
3966
3967                                 /* Check the left side */
3968                                 ret = check_cache_range(root, cache,
3969                                                         offset,
3970                                                         logical[nr] - offset);
3971                                 if (ret) {
3972                                         kfree(logical);
3973                                         return ret;
3974                                 }
3975
3976                                 /* Now we continue with the right side */
3977                                 bytes = (offset + bytes) -
3978                                         (logical[nr] + stripe_len);
3979                                 offset = logical[nr] + stripe_len;
3980                         }
3981                 }
3982
3983                 kfree(logical);
3984         }
3985
3986         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3987         if (!entry) {
3988                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3989                         offset, offset+bytes);
3990                 return -EINVAL;
3991         }
3992
3993         if (entry->offset != offset) {
3994                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3995                         entry->offset);
3996                 return -EINVAL;
3997         }
3998
3999         if (entry->bytes != bytes) {
4000                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
4001                         bytes, entry->bytes, offset);
4002                 return -EINVAL;
4003         }
4004
4005         unlink_free_space(cache->free_space_ctl, entry);
4006         free(entry);
4007         return 0;
4008 }
4009
4010 static int verify_space_cache(struct btrfs_root *root,
4011                               struct btrfs_block_group_cache *cache)
4012 {
4013         struct btrfs_path *path;
4014         struct extent_buffer *leaf;
4015         struct btrfs_key key;
4016         u64 last;
4017         int ret = 0;
4018
4019         path = btrfs_alloc_path();
4020         if (!path)
4021                 return -ENOMEM;
4022
4023         root = root->fs_info->extent_root;
4024
4025         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
4026
4027         key.objectid = last;
4028         key.offset = 0;
4029         key.type = BTRFS_EXTENT_ITEM_KEY;
4030
4031         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4032         if (ret < 0)
4033                 goto out;
4034         ret = 0;
4035         while (1) {
4036                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4037                         ret = btrfs_next_leaf(root, path);
4038                         if (ret < 0)
4039                                 goto out;
4040                         if (ret > 0) {
4041                                 ret = 0;
4042                                 break;
4043                         }
4044                 }
4045                 leaf = path->nodes[0];
4046                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4047                 if (key.objectid >= cache->key.offset + cache->key.objectid)
4048                         break;
4049                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
4050                     key.type != BTRFS_METADATA_ITEM_KEY) {
4051                         path->slots[0]++;
4052                         continue;
4053                 }
4054
4055                 if (last == key.objectid) {
4056                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
4057                                 last = key.objectid + key.offset;
4058                         else
4059                                 last = key.objectid + root->leafsize;
4060                         path->slots[0]++;
4061                         continue;
4062                 }
4063
4064                 ret = check_cache_range(root, cache, last,
4065                                         key.objectid - last);
4066                 if (ret)
4067                         break;
4068                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
4069                         last = key.objectid + key.offset;
4070                 else
4071                         last = key.objectid + root->leafsize;
4072                 path->slots[0]++;
4073         }
4074
4075         if (last < cache->key.objectid + cache->key.offset)
4076                 ret = check_cache_range(root, cache, last,
4077                                         cache->key.objectid +
4078                                         cache->key.offset - last);
4079
4080 out:
4081         btrfs_free_path(path);
4082
4083         if (!ret &&
4084             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
4085                 fprintf(stderr, "There are still entries left in the space "
4086                         "cache\n");
4087                 ret = -EINVAL;
4088         }
4089
4090         return ret;
4091 }
4092
4093 static int check_space_cache(struct btrfs_root *root)
4094 {
4095         struct btrfs_block_group_cache *cache;
4096         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
4097         int ret;
4098         int error = 0;
4099
4100         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
4101             btrfs_super_generation(root->fs_info->super_copy) !=
4102             btrfs_super_cache_generation(root->fs_info->super_copy)) {
4103                 printf("cache and super generation don't match, space cache "
4104                        "will be invalidated\n");
4105                 return 0;
4106         }
4107
4108         while (1) {
4109                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
4110                 if (!cache)
4111                         break;
4112
4113                 start = cache->key.objectid + cache->key.offset;
4114                 if (!cache->free_space_ctl) {
4115                         if (btrfs_init_free_space_ctl(cache,
4116                                                       root->sectorsize)) {
4117                                 ret = -ENOMEM;
4118                                 break;
4119                         }
4120                 } else {
4121                         btrfs_remove_free_space_cache(cache);
4122                 }
4123
4124                 ret = load_free_space_cache(root->fs_info, cache);
4125                 if (!ret)
4126                         continue;
4127
4128                 ret = verify_space_cache(root, cache);
4129                 if (ret) {
4130                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
4131                                 cache->key.objectid);
4132                         error++;
4133                 }
4134         }
4135
4136         return error ? -EINVAL : 0;
4137 }
4138
4139 static int read_extent_data(struct btrfs_root *root, char *data,
4140                         u64 logical, u64 *len, int mirror)
4141 {
4142         u64 offset = 0;
4143         struct btrfs_multi_bio *multi = NULL;
4144         struct btrfs_fs_info *info = root->fs_info;
4145         struct btrfs_device *device;
4146         int ret = 0;
4147         u64 max_len = *len;
4148
4149         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
4150                               &multi, mirror, NULL);
4151         if (ret) {
4152                 fprintf(stderr, "Couldn't map the block %llu\n",
4153                                 logical + offset);
4154                 goto err;
4155         }
4156         device = multi->stripes[0].dev;
4157
4158         if (device->fd == 0)
4159                 goto err;
4160         if (*len > max_len)
4161                 *len = max_len;
4162
4163         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
4164         if (ret != *len)
4165                 ret = -EIO;
4166         else
4167                 ret = 0;
4168 err:
4169         kfree(multi);
4170         return ret;
4171 }
4172
4173 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
4174                         u64 num_bytes, unsigned long leaf_offset,
4175                         struct extent_buffer *eb) {
4176
4177         u64 offset = 0;
4178         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
4179         char *data;
4180         unsigned long csum_offset;
4181         u32 csum;
4182         u32 csum_expected;
4183         u64 read_len;
4184         u64 data_checked = 0;
4185         u64 tmp;
4186         int ret = 0;
4187         int mirror;
4188         int num_copies;
4189
4190         if (num_bytes % root->sectorsize)
4191                 return -EINVAL;
4192
4193         data = malloc(num_bytes);
4194         if (!data)
4195                 return -ENOMEM;
4196
4197         while (offset < num_bytes) {
4198                 mirror = 0;
4199 again:
4200                 read_len = num_bytes - offset;
4201                 /* read as much space once a time */
4202                 ret = read_extent_data(root, data + offset,
4203                                 bytenr + offset, &read_len, mirror);
4204                 if (ret)
4205                         goto out;
4206                 data_checked = 0;
4207                 /* verify every 4k data's checksum */
4208                 while (data_checked < read_len) {
4209                         csum = ~(u32)0;
4210                         tmp = offset + data_checked;
4211
4212                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
4213                                                csum, root->sectorsize);
4214                         btrfs_csum_final(csum, (char *)&csum);
4215
4216                         csum_offset = leaf_offset +
4217                                  tmp / root->sectorsize * csum_size;
4218                         read_extent_buffer(eb, (char *)&csum_expected,
4219                                            csum_offset, csum_size);
4220                         /* try another mirror */
4221                         if (csum != csum_expected) {
4222                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
4223                                                 mirror, bytenr + tmp,
4224                                                 csum, csum_expected);
4225                                 num_copies = btrfs_num_copies(
4226                                                 &root->fs_info->mapping_tree,
4227                                                 bytenr, num_bytes);
4228                                 if (mirror < num_copies - 1) {
4229                                         mirror += 1;
4230                                         goto again;
4231                                 }
4232                         }
4233                         data_checked += root->sectorsize;
4234                 }
4235                 offset += read_len;
4236         }
4237 out:
4238         free(data);
4239         return ret;
4240 }
4241
4242 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
4243                                u64 num_bytes)
4244 {
4245         struct btrfs_path *path;
4246         struct extent_buffer *leaf;
4247         struct btrfs_key key;
4248         int ret;
4249
4250         path = btrfs_alloc_path();
4251         if (!path) {
4252                 fprintf(stderr, "Error allocing path\n");
4253                 return -ENOMEM;
4254         }
4255
4256         key.objectid = bytenr;
4257         key.type = BTRFS_EXTENT_ITEM_KEY;
4258         key.offset = (u64)-1;
4259
4260 again:
4261         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
4262                                 0, 0);
4263         if (ret < 0) {
4264                 fprintf(stderr, "Error looking up extent record %d\n", ret);
4265                 btrfs_free_path(path);
4266                 return ret;
4267         } else if (ret) {
4268                 if (path->slots[0] > 0) {
4269                         path->slots[0]--;
4270                 } else {
4271                         ret = btrfs_prev_leaf(root, path);
4272                         if (ret < 0) {
4273                                 goto out;
4274                         } else if (ret > 0) {
4275                                 ret = 0;
4276                                 goto out;
4277                         }
4278                 }
4279         }
4280
4281         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4282
4283         /*
4284          * Block group items come before extent items if they have the same
4285          * bytenr, so walk back one more just in case.  Dear future traveler,
4286          * first congrats on mastering time travel.  Now if it's not too much
4287          * trouble could you go back to 2006 and tell Chris to make the
4288          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
4289          * EXTENT_ITEM_KEY please?
4290          */
4291         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
4292                 if (path->slots[0] > 0) {
4293                         path->slots[0]--;
4294                 } else {
4295                         ret = btrfs_prev_leaf(root, path);
4296                         if (ret < 0) {
4297                                 goto out;
4298                         } else if (ret > 0) {
4299                                 ret = 0;
4300                                 goto out;
4301                         }
4302                 }
4303                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4304         }
4305
4306         while (num_bytes) {
4307                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4308                         ret = btrfs_next_leaf(root, path);
4309                         if (ret < 0) {
4310                                 fprintf(stderr, "Error going to next leaf "
4311                                         "%d\n", ret);
4312                                 btrfs_free_path(path);
4313                                 return ret;
4314                         } else if (ret) {
4315                                 break;
4316                         }
4317                 }
4318                 leaf = path->nodes[0];
4319                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4320                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
4321                         path->slots[0]++;
4322                         continue;
4323                 }
4324                 if (key.objectid + key.offset < bytenr) {
4325                         path->slots[0]++;
4326                         continue;
4327                 }
4328                 if (key.objectid > bytenr + num_bytes)
4329                         break;
4330
4331                 if (key.objectid == bytenr) {
4332                         if (key.offset >= num_bytes) {
4333                                 num_bytes = 0;
4334                                 break;
4335                         }
4336                         num_bytes -= key.offset;
4337                         bytenr += key.offset;
4338                 } else if (key.objectid < bytenr) {
4339                         if (key.objectid + key.offset >= bytenr + num_bytes) {
4340                                 num_bytes = 0;
4341                                 break;
4342                         }
4343                         num_bytes = (bytenr + num_bytes) -
4344                                 (key.objectid + key.offset);
4345                         bytenr = key.objectid + key.offset;
4346                 } else {
4347                         if (key.objectid + key.offset < bytenr + num_bytes) {
4348                                 u64 new_start = key.objectid + key.offset;
4349                                 u64 new_bytes = bytenr + num_bytes - new_start;
4350
4351                                 /*
4352                                  * Weird case, the extent is in the middle of
4353                                  * our range, we'll have to search one side
4354                                  * and then the other.  Not sure if this happens
4355                                  * in real life, but no harm in coding it up
4356                                  * anyway just in case.
4357                                  */
4358                                 btrfs_release_path(path);
4359                                 ret = check_extent_exists(root, new_start,
4360                                                           new_bytes);
4361                                 if (ret) {
4362                                         fprintf(stderr, "Right section didn't "
4363                                                 "have a record\n");
4364                                         break;
4365                                 }
4366                                 num_bytes = key.objectid - bytenr;
4367                                 goto again;
4368                         }
4369                         num_bytes = key.objectid - bytenr;
4370                 }
4371                 path->slots[0]++;
4372         }
4373         ret = 0;
4374
4375 out:
4376         if (num_bytes && !ret) {
4377                 fprintf(stderr, "There are no extents for csum range "
4378                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
4379                 ret = 1;
4380         }
4381
4382         btrfs_free_path(path);
4383         return ret;
4384 }
4385
4386 static int check_csums(struct btrfs_root *root)
4387 {
4388         struct btrfs_path *path;
4389         struct extent_buffer *leaf;
4390         struct btrfs_key key;
4391         u64 offset = 0, num_bytes = 0;
4392         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
4393         int errors = 0;
4394         int ret;
4395         u64 data_len;
4396         unsigned long leaf_offset;
4397
4398         root = root->fs_info->csum_root;
4399         if (!extent_buffer_uptodate(root->node)) {
4400                 fprintf(stderr, "No valid csum tree found\n");
4401                 return -ENOENT;
4402         }
4403
4404         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
4405         key.type = BTRFS_EXTENT_CSUM_KEY;
4406         key.offset = 0;
4407
4408         path = btrfs_alloc_path();
4409         if (!path)
4410                 return -ENOMEM;
4411
4412         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4413         if (ret < 0) {
4414                 fprintf(stderr, "Error searching csum tree %d\n", ret);
4415                 btrfs_free_path(path);
4416                 return ret;
4417         }
4418
4419         if (ret > 0 && path->slots[0])
4420                 path->slots[0]--;
4421         ret = 0;
4422
4423         while (1) {
4424                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4425                         ret = btrfs_next_leaf(root, path);
4426                         if (ret < 0) {
4427                                 fprintf(stderr, "Error going to next leaf "
4428                                         "%d\n", ret);
4429                                 break;
4430                         }
4431                         if (ret)
4432                                 break;
4433                 }
4434                 leaf = path->nodes[0];
4435
4436                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4437                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
4438                         path->slots[0]++;
4439                         continue;
4440                 }
4441
4442                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
4443                               csum_size) * root->sectorsize;
4444                 if (!check_data_csum)
4445                         goto skip_csum_check;
4446                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
4447                 ret = check_extent_csums(root, key.offset, data_len,
4448                                          leaf_offset, leaf);
4449                 if (ret)
4450                         break;
4451 skip_csum_check:
4452                 if (!num_bytes) {
4453                         offset = key.offset;
4454                 } else if (key.offset != offset + num_bytes) {
4455                         ret = check_extent_exists(root, offset, num_bytes);
4456                         if (ret) {
4457                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
4458                                         "there is no extent record\n",
4459                                         offset, offset+num_bytes);
4460                                 errors++;
4461                         }
4462                         offset = key.offset;
4463                         num_bytes = 0;
4464                 }
4465                 num_bytes += data_len;
4466                 path->slots[0]++;
4467         }
4468
4469         btrfs_free_path(path);
4470         return errors;
4471 }
4472
4473 static int is_dropped_key(struct btrfs_key *key,
4474                           struct btrfs_key *drop_key) {
4475         if (key->objectid < drop_key->objectid)
4476                 return 1;
4477         else if (key->objectid == drop_key->objectid) {
4478                 if (key->type < drop_key->type)
4479                         return 1;
4480                 else if (key->type == drop_key->type) {
4481                         if (key->offset < drop_key->offset)
4482                                 return 1;
4483                 }
4484         }
4485         return 0;
4486 }
4487
4488 static int run_next_block(struct btrfs_trans_handle *trans,
4489                           struct btrfs_root *root,
4490                           struct block_info *bits,
4491                           int bits_nr,
4492                           u64 *last,
4493                           struct cache_tree *pending,
4494                           struct cache_tree *seen,
4495                           struct cache_tree *reada,
4496                           struct cache_tree *nodes,
4497                           struct cache_tree *extent_cache,
4498                           struct cache_tree *chunk_cache,
4499                           struct rb_root *dev_cache,
4500                           struct block_group_tree *block_group_cache,
4501                           struct device_extent_tree *dev_extent_cache,
4502                           struct btrfs_root_item *ri)
4503 {
4504         struct extent_buffer *buf;
4505         u64 bytenr;
4506         u32 size;
4507         u64 parent;
4508         u64 owner;
4509         u64 flags;
4510         u64 ptr;
4511         u64 gen = 0;
4512         int ret = 0;
4513         int i;
4514         int nritems;
4515         struct btrfs_key key;
4516         struct cache_extent *cache;
4517         int reada_bits;
4518
4519         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
4520                                     bits_nr, &reada_bits);
4521         if (nritems == 0)
4522                 return 1;
4523
4524         if (!reada_bits) {
4525                 for(i = 0; i < nritems; i++) {
4526                         ret = add_cache_extent(reada, bits[i].start,
4527                                                bits[i].size);
4528                         if (ret == -EEXIST)
4529                                 continue;
4530
4531                         /* fixme, get the parent transid */
4532                         readahead_tree_block(root, bits[i].start,
4533                                              bits[i].size, 0);
4534                 }
4535         }
4536         *last = bits[0].start;
4537         bytenr = bits[0].start;
4538         size = bits[0].size;
4539
4540         cache = lookup_cache_extent(pending, bytenr, size);
4541         if (cache) {
4542                 remove_cache_extent(pending, cache);
4543                 free(cache);
4544         }
4545         cache = lookup_cache_extent(reada, bytenr, size);
4546         if (cache) {
4547                 remove_cache_extent(reada, cache);
4548                 free(cache);
4549         }
4550         cache = lookup_cache_extent(nodes, bytenr, size);
4551         if (cache) {
4552                 remove_cache_extent(nodes, cache);
4553                 free(cache);
4554         }
4555         cache = lookup_cache_extent(extent_cache, bytenr, size);
4556         if (cache) {
4557                 struct extent_record *rec;
4558
4559                 rec = container_of(cache, struct extent_record, cache);
4560                 gen = rec->parent_generation;
4561         }
4562
4563         /* fixme, get the real parent transid */
4564         buf = read_tree_block(root, bytenr, size, gen);
4565         if (!extent_buffer_uptodate(buf)) {
4566                 record_bad_block_io(root->fs_info,
4567                                     extent_cache, bytenr, size);
4568                 goto out;
4569         }
4570
4571         nritems = btrfs_header_nritems(buf);
4572
4573         /*
4574          * FIXME, this only works only if we don't have any full
4575          * backref mode.
4576          */
4577         if (!init_extent_tree) {
4578                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4579                                        btrfs_header_level(buf), 1, NULL,
4580                                        &flags);
4581                 if (ret < 0)
4582                         flags = 0;
4583         } else {
4584                 flags = 0;
4585         }
4586
4587         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4588                 parent = bytenr;
4589                 owner = 0;
4590         } else {
4591                 parent = 0;
4592                 owner = btrfs_header_owner(buf);
4593         }
4594
4595         ret = check_block(trans, root, extent_cache, buf, flags);
4596         if (ret)
4597                 goto out;
4598
4599         if (btrfs_is_leaf(buf)) {
4600                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4601                 for (i = 0; i < nritems; i++) {
4602                         struct btrfs_file_extent_item *fi;
4603                         btrfs_item_key_to_cpu(buf, &key, i);
4604                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4605                                 process_extent_item(root, extent_cache, buf,
4606                                                     i);
4607                                 continue;
4608                         }
4609                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4610                                 process_extent_item(root, extent_cache, buf,
4611                                                     i);
4612                                 continue;
4613                         }
4614                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4615                                 total_csum_bytes +=
4616                                         btrfs_item_size_nr(buf, i);
4617                                 continue;
4618                         }
4619                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4620                                 process_chunk_item(chunk_cache, &key, buf, i);
4621                                 continue;
4622                         }
4623                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4624                                 process_device_item(dev_cache, &key, buf, i);
4625                                 continue;
4626                         }
4627                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4628                                 process_block_group_item(block_group_cache,
4629                                         &key, buf, i);
4630                                 continue;
4631                         }
4632                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4633                                 process_device_extent_item(dev_extent_cache,
4634                                         &key, buf, i);
4635                                 continue;
4636
4637                         }
4638                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4639 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4640                                 process_extent_ref_v0(extent_cache, buf, i);
4641 #else
4642                                 BUG();
4643 #endif
4644                                 continue;
4645                         }
4646
4647                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4648                                 add_tree_backref(extent_cache, key.objectid, 0,
4649                                                  key.offset, 0);
4650                                 continue;
4651                         }
4652                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4653                                 add_tree_backref(extent_cache, key.objectid,
4654                                                  key.offset, 0, 0);
4655                                 continue;
4656                         }
4657                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4658                                 struct btrfs_extent_data_ref *ref;
4659                                 ref = btrfs_item_ptr(buf, i,
4660                                                 struct btrfs_extent_data_ref);
4661                                 add_data_backref(extent_cache,
4662                                         key.objectid, 0,
4663                                         btrfs_extent_data_ref_root(buf, ref),
4664                                         btrfs_extent_data_ref_objectid(buf,
4665                                                                        ref),
4666                                         btrfs_extent_data_ref_offset(buf, ref),
4667                                         btrfs_extent_data_ref_count(buf, ref),
4668                                         0, root->sectorsize);
4669                                 continue;
4670                         }
4671                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4672                                 struct btrfs_shared_data_ref *ref;
4673                                 ref = btrfs_item_ptr(buf, i,
4674                                                 struct btrfs_shared_data_ref);
4675                                 add_data_backref(extent_cache,
4676                                         key.objectid, key.offset, 0, 0, 0,
4677                                         btrfs_shared_data_ref_count(buf, ref),
4678                                         0, root->sectorsize);
4679                                 continue;
4680                         }
4681                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4682                                 struct bad_item *bad;
4683
4684                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4685                                         continue;
4686                                 if (!owner)
4687                                         continue;
4688                                 bad = malloc(sizeof(struct bad_item));
4689                                 if (!bad)
4690                                         continue;
4691                                 INIT_LIST_HEAD(&bad->list);
4692                                 memcpy(&bad->key, &key,
4693                                        sizeof(struct btrfs_key));
4694                                 bad->root_id = owner;
4695                                 list_add_tail(&bad->list, &delete_items);
4696                                 continue;
4697                         }
4698                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4699                                 continue;
4700                         fi = btrfs_item_ptr(buf, i,
4701                                             struct btrfs_file_extent_item);
4702                         if (btrfs_file_extent_type(buf, fi) ==
4703                             BTRFS_FILE_EXTENT_INLINE)
4704                                 continue;
4705                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4706                                 continue;
4707
4708                         data_bytes_allocated +=
4709                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4710                         if (data_bytes_allocated < root->sectorsize) {
4711                                 abort();
4712                         }
4713                         data_bytes_referenced +=
4714                                 btrfs_file_extent_num_bytes(buf, fi);
4715                         add_data_backref(extent_cache,
4716                                 btrfs_file_extent_disk_bytenr(buf, fi),
4717                                 parent, owner, key.objectid, key.offset -
4718                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4719                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4720                 }
4721         } else {
4722                 int level;
4723                 struct btrfs_key first_key;
4724
4725                 first_key.objectid = 0;
4726
4727                 if (nritems > 0)
4728                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4729                 level = btrfs_header_level(buf);
4730                 for (i = 0; i < nritems; i++) {
4731                         ptr = btrfs_node_blockptr(buf, i);
4732                         size = btrfs_level_size(root, level - 1);
4733                         btrfs_node_key_to_cpu(buf, &key, i);
4734                         if (ri != NULL) {
4735                                 struct btrfs_key drop_key;
4736                                 btrfs_disk_key_to_cpu(&drop_key,
4737                                                       &ri->drop_progress);
4738                                 if ((level == ri->drop_level)
4739                                     && is_dropped_key(&key, &drop_key)) {
4740                                         continue;
4741                                 }
4742                         }
4743                         ret = add_extent_rec(extent_cache, &key,
4744                                              btrfs_node_ptr_generation(buf, i),
4745                                              ptr, size, 0, 0, 1, 0, 1, 0,
4746                                              size);
4747                         BUG_ON(ret);
4748
4749                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4750
4751                         if (level > 1) {
4752                                 add_pending(nodes, seen, ptr, size);
4753                         } else {
4754                                 add_pending(pending, seen, ptr, size);
4755                         }
4756                 }
4757                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4758                                       nritems) * sizeof(struct btrfs_key_ptr);
4759         }
4760         total_btree_bytes += buf->len;
4761         if (fs_root_objectid(btrfs_header_owner(buf)))
4762                 total_fs_tree_bytes += buf->len;
4763         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4764                 total_extent_tree_bytes += buf->len;
4765         if (!found_old_backref &&
4766             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4767             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4768             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4769                 found_old_backref = 1;
4770 out:
4771         free_extent_buffer(buf);
4772         return ret;
4773 }
4774
4775 static int add_root_to_pending(struct extent_buffer *buf,
4776                                struct cache_tree *extent_cache,
4777                                struct cache_tree *pending,
4778                                struct cache_tree *seen,
4779                                struct cache_tree *nodes,
4780                                struct btrfs_key *root_key)
4781 {
4782         if (btrfs_header_level(buf) > 0)
4783                 add_pending(nodes, seen, buf->start, buf->len);
4784         else
4785                 add_pending(pending, seen, buf->start, buf->len);
4786         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4787                        0, 1, 1, 0, 1, 0, buf->len);
4788
4789         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4790             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4791                 add_tree_backref(extent_cache, buf->start, buf->start,
4792                                  0, 1);
4793         else
4794                 add_tree_backref(extent_cache, buf->start, 0,
4795                                  root_key->objectid, 1);
4796         return 0;
4797 }
4798
4799 /* as we fix the tree, we might be deleting blocks that
4800  * we're tracking for repair.  This hook makes sure we
4801  * remove any backrefs for blocks as we are fixing them.
4802  */
4803 static int free_extent_hook(struct btrfs_trans_handle *trans,
4804                             struct btrfs_root *root,
4805                             u64 bytenr, u64 num_bytes, u64 parent,
4806                             u64 root_objectid, u64 owner, u64 offset,
4807                             int refs_to_drop)
4808 {
4809         struct extent_record *rec;
4810         struct cache_extent *cache;
4811         int is_data;
4812         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4813
4814         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4815         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4816         if (!cache)
4817                 return 0;
4818
4819         rec = container_of(cache, struct extent_record, cache);
4820         if (is_data) {
4821                 struct data_backref *back;
4822                 back = find_data_backref(rec, parent, root_objectid, owner,
4823                                          offset, 1, bytenr, num_bytes);
4824                 if (!back)
4825                         goto out;
4826                 if (back->node.found_ref) {
4827                         back->found_ref -= refs_to_drop;
4828                         if (rec->refs)
4829                                 rec->refs -= refs_to_drop;
4830                 }
4831                 if (back->node.found_extent_tree) {
4832                         back->num_refs -= refs_to_drop;
4833                         if (rec->extent_item_refs)
4834                                 rec->extent_item_refs -= refs_to_drop;
4835                 }
4836                 if (back->found_ref == 0)
4837                         back->node.found_ref = 0;
4838                 if (back->num_refs == 0)
4839                         back->node.found_extent_tree = 0;
4840
4841                 if (!back->node.found_extent_tree && back->node.found_ref) {
4842                         list_del(&back->node.list);
4843                         free(back);
4844                 }
4845         } else {
4846                 struct tree_backref *back;
4847                 back = find_tree_backref(rec, parent, root_objectid);
4848                 if (!back)
4849                         goto out;
4850                 if (back->node.found_ref) {
4851                         if (rec->refs)
4852                                 rec->refs--;
4853                         back->node.found_ref = 0;
4854                 }
4855                 if (back->node.found_extent_tree) {
4856                         if (rec->extent_item_refs)
4857                                 rec->extent_item_refs--;
4858                         back->node.found_extent_tree = 0;
4859                 }
4860                 if (!back->node.found_extent_tree && back->node.found_ref) {
4861                         list_del(&back->node.list);
4862                         free(back);
4863                 }
4864         }
4865         maybe_free_extent_rec(extent_cache, rec);
4866 out:
4867         return 0;
4868 }
4869
4870 static int delete_extent_records(struct btrfs_trans_handle *trans,
4871                                  struct btrfs_root *root,
4872                                  struct btrfs_path *path,
4873                                  u64 bytenr, u64 new_len)
4874 {
4875         struct btrfs_key key;
4876         struct btrfs_key found_key;
4877         struct extent_buffer *leaf;
4878         int ret;
4879         int slot;
4880
4881
4882         key.objectid = bytenr;
4883         key.type = (u8)-1;
4884         key.offset = (u64)-1;
4885
4886         while(1) {
4887                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4888                                         &key, path, 0, 1);
4889                 if (ret < 0)
4890                         break;
4891
4892                 if (ret > 0) {
4893                         ret = 0;
4894                         if (path->slots[0] == 0)
4895                                 break;
4896                         path->slots[0]--;
4897                 }
4898                 ret = 0;
4899
4900                 leaf = path->nodes[0];
4901                 slot = path->slots[0];
4902
4903                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4904                 if (found_key.objectid != bytenr)
4905                         break;
4906
4907                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4908                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4909                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4910                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4911                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4912                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4913                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4914                         btrfs_release_path(path);
4915                         if (found_key.type == 0) {
4916                                 if (found_key.offset == 0)
4917                                         break;
4918                                 key.offset = found_key.offset - 1;
4919                                 key.type = found_key.type;
4920                         }
4921                         key.type = found_key.type - 1;
4922                         key.offset = (u64)-1;
4923                         continue;
4924                 }
4925
4926                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4927                         found_key.objectid, found_key.type, found_key.offset);
4928
4929                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4930                 if (ret)
4931                         break;
4932                 btrfs_release_path(path);
4933
4934                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4935                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4936                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4937                                 found_key.offset : root->leafsize;
4938
4939                         ret = btrfs_update_block_group(trans, root, bytenr,
4940                                                        bytes, 0, 0);
4941                         if (ret)
4942                                 break;
4943                 }
4944         }
4945
4946         btrfs_release_path(path);
4947         return ret;
4948 }
4949
4950 /*
4951  * for a single backref, this will allocate a new extent
4952  * and add the backref to it.
4953  */
4954 static int record_extent(struct btrfs_trans_handle *trans,
4955                          struct btrfs_fs_info *info,
4956                          struct btrfs_path *path,
4957                          struct extent_record *rec,
4958                          struct extent_backref *back,
4959                          int allocated, u64 flags)
4960 {
4961         int ret;
4962         struct btrfs_root *extent_root = info->extent_root;
4963         struct extent_buffer *leaf;
4964         struct btrfs_key ins_key;
4965         struct btrfs_extent_item *ei;
4966         struct tree_backref *tback;
4967         struct data_backref *dback;
4968         struct btrfs_tree_block_info *bi;
4969
4970         if (!back->is_data)
4971                 rec->max_size = max_t(u64, rec->max_size,
4972                                     info->extent_root->leafsize);
4973
4974         if (!allocated) {
4975                 u32 item_size = sizeof(*ei);
4976
4977                 if (!back->is_data)
4978                         item_size += sizeof(*bi);
4979
4980                 ins_key.objectid = rec->start;
4981                 ins_key.offset = rec->max_size;
4982                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4983
4984                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4985                                         &ins_key, item_size);
4986                 if (ret)
4987                         goto fail;
4988
4989                 leaf = path->nodes[0];
4990                 ei = btrfs_item_ptr(leaf, path->slots[0],
4991                                     struct btrfs_extent_item);
4992
4993                 btrfs_set_extent_refs(leaf, ei, 0);
4994                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4995
4996                 if (back->is_data) {
4997                         btrfs_set_extent_flags(leaf, ei,
4998                                                BTRFS_EXTENT_FLAG_DATA);
4999                 } else {
5000                         struct btrfs_disk_key copy_key;;
5001
5002                         tback = (struct tree_backref *)back;
5003                         bi = (struct btrfs_tree_block_info *)(ei + 1);
5004                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
5005                                              sizeof(*bi));
5006
5007                         btrfs_set_disk_key_objectid(&copy_key,
5008                                                     rec->info_objectid);
5009                         btrfs_set_disk_key_type(&copy_key, 0);
5010                         btrfs_set_disk_key_offset(&copy_key, 0);
5011
5012                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
5013                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
5014
5015                         btrfs_set_extent_flags(leaf, ei,
5016                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
5017                 }
5018
5019                 btrfs_mark_buffer_dirty(leaf);
5020                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
5021                                                rec->max_size, 1, 0);
5022                 if (ret)
5023                         goto fail;
5024                 btrfs_release_path(path);
5025         }
5026
5027         if (back->is_data) {
5028                 u64 parent;
5029                 int i;
5030
5031                 dback = (struct data_backref *)back;
5032                 if (back->full_backref)
5033                         parent = dback->parent;
5034                 else
5035                         parent = 0;
5036
5037                 for (i = 0; i < dback->found_ref; i++) {
5038                         /* if parent != 0, we're doing a full backref
5039                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
5040                          * just makes the backref allocator create a data
5041                          * backref
5042                          */
5043                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
5044                                                    rec->start, rec->max_size,
5045                                                    parent,
5046                                                    dback->root,
5047                                                    parent ?
5048                                                    BTRFS_FIRST_FREE_OBJECTID :
5049                                                    dback->owner,
5050                                                    dback->offset);
5051                         if (ret)
5052                                 break;
5053                 }
5054                 fprintf(stderr, "adding new data backref"
5055                                 " on %llu %s %llu owner %llu"
5056                                 " offset %llu found %d\n",
5057                                 (unsigned long long)rec->start,
5058                                 back->full_backref ?
5059                                 "parent" : "root",
5060                                 back->full_backref ?
5061                                 (unsigned long long)parent :
5062                                 (unsigned long long)dback->root,
5063                                 (unsigned long long)dback->owner,
5064                                 (unsigned long long)dback->offset,
5065                                 dback->found_ref);
5066         } else {
5067                 u64 parent;
5068
5069                 tback = (struct tree_backref *)back;
5070                 if (back->full_backref)
5071                         parent = tback->parent;
5072                 else
5073                         parent = 0;
5074
5075                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
5076                                            rec->start, rec->max_size,
5077                                            parent, tback->root, 0, 0);
5078                 fprintf(stderr, "adding new tree backref on "
5079                         "start %llu len %llu parent %llu root %llu\n",
5080                         rec->start, rec->max_size, tback->parent, tback->root);
5081         }
5082         if (ret)
5083                 goto fail;
5084 fail:
5085         btrfs_release_path(path);
5086         return ret;
5087 }
5088
5089 struct extent_entry {
5090         u64 bytenr;
5091         u64 bytes;
5092         int count;
5093         int broken;
5094         struct list_head list;
5095 };
5096
5097 static struct extent_entry *find_entry(struct list_head *entries,
5098                                        u64 bytenr, u64 bytes)
5099 {
5100         struct extent_entry *entry = NULL;
5101
5102         list_for_each_entry(entry, entries, list) {
5103                 if (entry->bytenr == bytenr && entry->bytes == bytes)
5104                         return entry;
5105         }
5106
5107         return NULL;
5108 }
5109
5110 static struct extent_entry *find_most_right_entry(struct list_head *entries)
5111 {
5112         struct extent_entry *entry, *best = NULL, *prev = NULL;
5113
5114         list_for_each_entry(entry, entries, list) {
5115                 if (!prev) {
5116                         prev = entry;
5117                         continue;
5118                 }
5119
5120                 /*
5121                  * If there are as many broken entries as entries then we know
5122                  * not to trust this particular entry.
5123                  */
5124                 if (entry->broken == entry->count)
5125                         continue;
5126
5127                 /*
5128                  * If our current entry == best then we can't be sure our best
5129                  * is really the best, so we need to keep searching.
5130                  */
5131                 if (best && best->count == entry->count) {
5132                         prev = entry;
5133                         best = NULL;
5134                         continue;
5135                 }
5136
5137                 /* Prev == entry, not good enough, have to keep searching */
5138                 if (!prev->broken && prev->count == entry->count)
5139                         continue;
5140
5141                 if (!best)
5142                         best = (prev->count > entry->count) ? prev : entry;
5143                 else if (best->count < entry->count)
5144                         best = entry;
5145                 prev = entry;
5146         }
5147
5148         return best;
5149 }
5150
5151 static int repair_ref(struct btrfs_trans_handle *trans,
5152                       struct btrfs_fs_info *info, struct btrfs_path *path,
5153                       struct data_backref *dback, struct extent_entry *entry)
5154 {
5155         struct btrfs_root *root;
5156         struct btrfs_file_extent_item *fi;
5157         struct extent_buffer *leaf;
5158         struct btrfs_key key;
5159         u64 bytenr, bytes;
5160         int ret;
5161
5162         key.objectid = dback->root;
5163         key.type = BTRFS_ROOT_ITEM_KEY;
5164         key.offset = (u64)-1;
5165         root = btrfs_read_fs_root(info, &key);
5166         if (IS_ERR(root)) {
5167                 fprintf(stderr, "Couldn't find root for our ref\n");
5168                 return -EINVAL;
5169         }
5170
5171         /*
5172          * The backref points to the original offset of the extent if it was
5173          * split, so we need to search down to the offset we have and then walk
5174          * forward until we find the backref we're looking for.
5175          */
5176         key.objectid = dback->owner;
5177         key.type = BTRFS_EXTENT_DATA_KEY;
5178         key.offset = dback->offset;
5179         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5180         if (ret < 0) {
5181                 fprintf(stderr, "Error looking up ref %d\n", ret);
5182                 return ret;
5183         }
5184
5185         while (1) {
5186                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5187                         ret = btrfs_next_leaf(root, path);
5188                         if (ret) {
5189                                 fprintf(stderr, "Couldn't find our ref, next\n");
5190                                 return -EINVAL;
5191                         }
5192                 }
5193                 leaf = path->nodes[0];
5194                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5195                 if (key.objectid != dback->owner ||
5196                     key.type != BTRFS_EXTENT_DATA_KEY) {
5197                         fprintf(stderr, "Couldn't find our ref, search\n");
5198                         return -EINVAL;
5199                 }
5200                 fi = btrfs_item_ptr(leaf, path->slots[0],
5201                                     struct btrfs_file_extent_item);
5202                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5203                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
5204
5205                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
5206                         break;
5207                 path->slots[0]++;
5208         }
5209
5210         btrfs_release_path(path);
5211
5212         /*
5213          * Have to make sure that this root gets updated when we commit the
5214          * transaction
5215          */
5216         record_root_in_trans(trans, root);
5217
5218         /*
5219          * Ok we have the key of the file extent we want to fix, now we can cow
5220          * down to the thing and fix it.
5221          */
5222         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5223         if (ret < 0) {
5224                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
5225                         key.objectid, key.type, key.offset, ret);
5226                 return ret;
5227         }
5228         if (ret > 0) {
5229                 fprintf(stderr, "Well that's odd, we just found this key "
5230                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
5231                         key.offset);
5232                 return -EINVAL;
5233         }
5234         leaf = path->nodes[0];
5235         fi = btrfs_item_ptr(leaf, path->slots[0],
5236                             struct btrfs_file_extent_item);
5237
5238         if (btrfs_file_extent_compression(leaf, fi) &&
5239             dback->disk_bytenr != entry->bytenr) {
5240                 fprintf(stderr, "Ref doesn't match the record start and is "
5241                         "compressed, please take a btrfs-image of this file "
5242                         "system and send it to a btrfs developer so they can "
5243                         "complete this functionality for bytenr %Lu\n",
5244                         dback->disk_bytenr);
5245                 return -EINVAL;
5246         }
5247
5248         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
5249                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
5250         } else if (dback->disk_bytenr > entry->bytenr) {
5251                 u64 off_diff, offset;
5252
5253                 off_diff = dback->disk_bytenr - entry->bytenr;
5254                 offset = btrfs_file_extent_offset(leaf, fi);
5255                 if (dback->disk_bytenr + offset +
5256                     btrfs_file_extent_num_bytes(leaf, fi) >
5257                     entry->bytenr + entry->bytes) {
5258                         fprintf(stderr, "Ref is past the entry end, please "
5259                                 "take a btrfs-image of this file system and "
5260                                 "send it to a btrfs developer, ref %Lu\n",
5261                                 dback->disk_bytenr);
5262                         return -EINVAL;
5263                 }
5264                 offset += off_diff;
5265                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
5266                 btrfs_set_file_extent_offset(leaf, fi, offset);
5267         } else if (dback->disk_bytenr < entry->bytenr) {
5268                 u64 offset;
5269
5270                 offset = btrfs_file_extent_offset(leaf, fi);
5271                 if (dback->disk_bytenr + offset < entry->bytenr) {
5272                         fprintf(stderr, "Ref is before the entry start, please"
5273                                 " take a btrfs-image of this file system and "
5274                                 "send it to a btrfs developer, ref %Lu\n",
5275                                 dback->disk_bytenr);
5276                         return -EINVAL;
5277                 }
5278
5279                 offset += dback->disk_bytenr;
5280                 offset -= entry->bytenr;
5281                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
5282                 btrfs_set_file_extent_offset(leaf, fi, offset);
5283         }
5284
5285         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
5286
5287         /*
5288          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
5289          * only do this if we aren't using compression, otherwise it's a
5290          * trickier case.
5291          */
5292         if (!btrfs_file_extent_compression(leaf, fi))
5293                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
5294         else
5295                 printf("ram bytes may be wrong?\n");
5296         btrfs_mark_buffer_dirty(leaf);
5297         btrfs_release_path(path);
5298         return 0;
5299 }
5300
5301 static int verify_backrefs(struct btrfs_trans_handle *trans,
5302                            struct btrfs_fs_info *info, struct btrfs_path *path,
5303                            struct extent_record *rec)
5304 {
5305         struct extent_backref *back;
5306         struct data_backref *dback;
5307         struct extent_entry *entry, *best = NULL;
5308         LIST_HEAD(entries);
5309         int nr_entries = 0;
5310         int broken_entries = 0;
5311         int ret = 0;
5312         short mismatch = 0;
5313
5314         /*
5315          * Metadata is easy and the backrefs should always agree on bytenr and
5316          * size, if not we've got bigger issues.
5317          */
5318         if (rec->metadata)
5319                 return 0;
5320
5321         list_for_each_entry(back, &rec->backrefs, list) {
5322                 if (back->full_backref || !back->is_data)
5323                         continue;
5324
5325                 dback = (struct data_backref *)back;
5326
5327                 /*
5328                  * We only pay attention to backrefs that we found a real
5329                  * backref for.
5330                  */
5331                 if (dback->found_ref == 0)
5332                         continue;
5333
5334                 /*
5335                  * For now we only catch when the bytes don't match, not the
5336                  * bytenr.  We can easily do this at the same time, but I want
5337                  * to have a fs image to test on before we just add repair
5338                  * functionality willy-nilly so we know we won't screw up the
5339                  * repair.
5340                  */
5341
5342                 entry = find_entry(&entries, dback->disk_bytenr,
5343                                    dback->bytes);
5344                 if (!entry) {
5345                         entry = malloc(sizeof(struct extent_entry));
5346                         if (!entry) {
5347                                 ret = -ENOMEM;
5348                                 goto out;
5349                         }
5350                         memset(entry, 0, sizeof(*entry));
5351                         entry->bytenr = dback->disk_bytenr;
5352                         entry->bytes = dback->bytes;
5353                         list_add_tail(&entry->list, &entries);
5354                         nr_entries++;
5355                 }
5356
5357                 /*
5358                  * If we only have on entry we may think the entries agree when
5359                  * in reality they don't so we have to do some extra checking.
5360                  */
5361                 if (dback->disk_bytenr != rec->start ||
5362                     dback->bytes != rec->nr || back->broken)
5363                         mismatch = 1;
5364
5365                 if (back->broken) {
5366                         entry->broken++;
5367                         broken_entries++;
5368                 }
5369
5370                 entry->count++;
5371         }
5372
5373         /* Yay all the backrefs agree, carry on good sir */
5374         if (nr_entries <= 1 && !mismatch)
5375                 goto out;
5376
5377         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
5378                 "%Lu\n", rec->start);
5379
5380         /*
5381          * First we want to see if the backrefs can agree amongst themselves who
5382          * is right, so figure out which one of the entries has the highest
5383          * count.
5384          */
5385         best = find_most_right_entry(&entries);
5386
5387         /*
5388          * Ok so we may have an even split between what the backrefs think, so
5389          * this is where we use the extent ref to see what it thinks.
5390          */
5391         if (!best) {
5392                 entry = find_entry(&entries, rec->start, rec->nr);
5393                 if (!entry && (!broken_entries || !rec->found_rec)) {
5394                         fprintf(stderr, "Backrefs don't agree with each other "
5395                                 "and extent record doesn't agree with anybody,"
5396                                 " so we can't fix bytenr %Lu bytes %Lu\n",
5397                                 rec->start, rec->nr);
5398                         ret = -EINVAL;
5399                         goto out;
5400                 } else if (!entry) {
5401                         /*
5402                          * Ok our backrefs were broken, we'll assume this is the
5403                          * correct value and add an entry for this range.
5404                          */
5405                         entry = malloc(sizeof(struct extent_entry));
5406                         if (!entry) {
5407                                 ret = -ENOMEM;
5408                                 goto out;
5409                         }
5410                         memset(entry, 0, sizeof(*entry));
5411                         entry->bytenr = rec->start;
5412                         entry->bytes = rec->nr;
5413                         list_add_tail(&entry->list, &entries);
5414                         nr_entries++;
5415                 }
5416                 entry->count++;
5417                 best = find_most_right_entry(&entries);
5418                 if (!best) {
5419                         fprintf(stderr, "Backrefs and extent record evenly "
5420                                 "split on who is right, this is going to "
5421                                 "require user input to fix bytenr %Lu bytes "
5422                                 "%Lu\n", rec->start, rec->nr);
5423                         ret = -EINVAL;
5424                         goto out;
5425                 }
5426         }
5427
5428         /*
5429          * I don't think this can happen currently as we'll abort() if we catch
5430          * this case higher up, but in case somebody removes that we still can't
5431          * deal with it properly here yet, so just bail out of that's the case.
5432          */
5433         if (best->bytenr != rec->start) {
5434                 fprintf(stderr, "Extent start and backref starts don't match, "
5435                         "please use btrfs-image on this file system and send "
5436                         "it to a btrfs developer so they can make fsck fix "
5437                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
5438                         rec->start, rec->nr);
5439                 ret = -EINVAL;
5440                 goto out;
5441         }
5442
5443         /*
5444          * Ok great we all agreed on an extent record, let's go find the real
5445          * references and fix up the ones that don't match.
5446          */
5447         list_for_each_entry(back, &rec->backrefs, list) {
5448                 if (back->full_backref || !back->is_data)
5449                         continue;
5450
5451                 dback = (struct data_backref *)back;
5452
5453                 /*
5454                  * Still ignoring backrefs that don't have a real ref attached
5455                  * to them.
5456                  */
5457                 if (dback->found_ref == 0)
5458                         continue;
5459
5460                 if (dback->bytes == best->bytes &&
5461                     dback->disk_bytenr == best->bytenr)
5462                         continue;
5463
5464                 ret = repair_ref(trans, info, path, dback, best);
5465                 if (ret)
5466                         goto out;
5467         }
5468
5469         /*
5470          * Ok we messed with the actual refs, which means we need to drop our
5471          * entire cache and go back and rescan.  I know this is a huge pain and
5472          * adds a lot of extra work, but it's the only way to be safe.  Once all
5473          * the backrefs agree we may not need to do anything to the extent
5474          * record itself.
5475          */
5476         ret = -EAGAIN;
5477 out:
5478         while (!list_empty(&entries)) {
5479                 entry = list_entry(entries.next, struct extent_entry, list);
5480                 list_del_init(&entry->list);
5481                 free(entry);
5482         }
5483         return ret;
5484 }
5485
5486 static int process_duplicates(struct btrfs_root *root,
5487                               struct cache_tree *extent_cache,
5488                               struct extent_record *rec)
5489 {
5490         struct extent_record *good, *tmp;
5491         struct cache_extent *cache;
5492         int ret;
5493
5494         /*
5495          * If we found a extent record for this extent then return, or if we
5496          * have more than one duplicate we are likely going to need to delete
5497          * something.
5498          */
5499         if (rec->found_rec || rec->num_duplicates > 1)
5500                 return 0;
5501
5502         /* Shouldn't happen but just in case */
5503         BUG_ON(!rec->num_duplicates);
5504
5505         /*
5506          * So this happens if we end up with a backref that doesn't match the
5507          * actual extent entry.  So either the backref is bad or the extent
5508          * entry is bad.  Either way we want to have the extent_record actually
5509          * reflect what we found in the extent_tree, so we need to take the
5510          * duplicate out and use that as the extent_record since the only way we
5511          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
5512          */
5513         remove_cache_extent(extent_cache, &rec->cache);
5514
5515         good = list_entry(rec->dups.next, struct extent_record, list);
5516         list_del_init(&good->list);
5517         INIT_LIST_HEAD(&good->backrefs);
5518         INIT_LIST_HEAD(&good->dups);
5519         good->cache.start = good->start;
5520         good->cache.size = good->nr;
5521         good->content_checked = 0;
5522         good->owner_ref_checked = 0;
5523         good->num_duplicates = 0;
5524         good->refs = rec->refs;
5525         list_splice_init(&rec->backrefs, &good->backrefs);
5526         while (1) {
5527                 cache = lookup_cache_extent(extent_cache, good->start,
5528                                             good->nr);
5529                 if (!cache)
5530                         break;
5531                 tmp = container_of(cache, struct extent_record, cache);
5532
5533                 /*
5534                  * If we find another overlapping extent and it's found_rec is
5535                  * set then it's a duplicate and we need to try and delete
5536                  * something.
5537                  */
5538                 if (tmp->found_rec || tmp->num_duplicates > 0) {
5539                         if (list_empty(&good->list))
5540                                 list_add_tail(&good->list,
5541                                               &duplicate_extents);
5542                         good->num_duplicates += tmp->num_duplicates + 1;
5543                         list_splice_init(&tmp->dups, &good->dups);
5544                         list_del_init(&tmp->list);
5545                         list_add_tail(&tmp->list, &good->dups);
5546                         remove_cache_extent(extent_cache, &tmp->cache);
5547                         continue;
5548                 }
5549
5550                 /*
5551                  * Ok we have another non extent item backed extent rec, so lets
5552                  * just add it to this extent and carry on like we did above.
5553                  */
5554                 good->refs += tmp->refs;
5555                 list_splice_init(&tmp->backrefs, &good->backrefs);
5556                 remove_cache_extent(extent_cache, &tmp->cache);
5557                 free(tmp);
5558         }
5559         ret = insert_cache_extent(extent_cache, &good->cache);
5560         BUG_ON(ret);
5561         free(rec);
5562         return good->num_duplicates ? 0 : 1;
5563 }
5564
5565 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5566                                     struct btrfs_root *root,
5567                                     struct extent_record *rec)
5568 {
5569         LIST_HEAD(delete_list);
5570         struct btrfs_path *path;
5571         struct extent_record *tmp, *good, *n;
5572         int nr_del = 0;
5573         int ret = 0;
5574         struct btrfs_key key;
5575
5576         path = btrfs_alloc_path();
5577         if (!path) {
5578                 ret = -ENOMEM;
5579                 goto out;
5580         }
5581
5582         good = rec;
5583         /* Find the record that covers all of the duplicates. */
5584         list_for_each_entry(tmp, &rec->dups, list) {
5585                 if (good->start < tmp->start)
5586                         continue;
5587                 if (good->nr > tmp->nr)
5588                         continue;
5589
5590                 if (tmp->start + tmp->nr < good->start + good->nr) {
5591                         fprintf(stderr, "Ok we have overlapping extents that "
5592                                 "aren't completely covered by eachother, this "
5593                                 "is going to require more careful thought.  "
5594                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5595                                 tmp->start, tmp->nr, good->start, good->nr);
5596                         abort();
5597                 }
5598                 good = tmp;
5599         }
5600
5601         if (good != rec)
5602                 list_add_tail(&rec->list, &delete_list);
5603
5604         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5605                 if (tmp == good)
5606                         continue;
5607                 list_move_tail(&tmp->list, &delete_list);
5608         }
5609
5610         root = root->fs_info->extent_root;
5611         list_for_each_entry(tmp, &delete_list, list) {
5612                 if (tmp->found_rec == 0)
5613                         continue;
5614                 key.objectid = tmp->start;
5615                 key.type = BTRFS_EXTENT_ITEM_KEY;
5616                 key.offset = tmp->nr;
5617
5618                 /* Shouldn't happen but just in case */
5619                 if (tmp->metadata) {
5620                         fprintf(stderr, "Well this shouldn't happen, extent "
5621                                 "record overlaps but is metadata? "
5622                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5623                         abort();
5624                 }
5625
5626                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5627                 if (ret) {
5628                         if (ret > 0)
5629                                 ret = -EINVAL;
5630                         goto out;
5631                 }
5632                 ret = btrfs_del_item(trans, root, path);
5633                 if (ret)
5634                         goto out;
5635                 btrfs_release_path(path);
5636                 nr_del++;
5637         }
5638
5639 out:
5640         while (!list_empty(&delete_list)) {
5641                 tmp = list_entry(delete_list.next, struct extent_record, list);
5642                 list_del_init(&tmp->list);
5643                 if (tmp == rec)
5644                         continue;
5645                 free(tmp);
5646         }
5647
5648         while (!list_empty(&rec->dups)) {
5649                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5650                 list_del_init(&tmp->list);
5651                 free(tmp);
5652         }
5653
5654         btrfs_free_path(path);
5655
5656         if (!ret && !nr_del)
5657                 rec->num_duplicates = 0;
5658
5659         return ret ? ret : nr_del;
5660 }
5661
5662 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5663                                   struct btrfs_fs_info *info,
5664                                   struct btrfs_path *path,
5665                                   struct cache_tree *extent_cache,
5666                                   struct extent_record *rec)
5667 {
5668         struct btrfs_root *root;
5669         struct extent_backref *back;
5670         struct data_backref *dback;
5671         struct cache_extent *cache;
5672         struct btrfs_file_extent_item *fi;
5673         struct btrfs_key key;
5674         u64 bytenr, bytes;
5675         int ret;
5676
5677         list_for_each_entry(back, &rec->backrefs, list) {
5678                 /* Don't care about full backrefs (poor unloved backrefs) */
5679                 if (back->full_backref || !back->is_data)
5680                         continue;
5681
5682                 dback = (struct data_backref *)back;
5683
5684                 /* We found this one, we don't need to do a lookup */
5685                 if (dback->found_ref)
5686                         continue;
5687
5688                 key.objectid = dback->root;
5689                 key.type = BTRFS_ROOT_ITEM_KEY;
5690                 key.offset = (u64)-1;
5691
5692                 root = btrfs_read_fs_root(info, &key);
5693
5694                 /* No root, definitely a bad ref, skip */
5695                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5696                         continue;
5697                 /* Other err, exit */
5698                 if (IS_ERR(root))
5699                         return PTR_ERR(root);
5700
5701                 key.objectid = dback->owner;
5702                 key.type = BTRFS_EXTENT_DATA_KEY;
5703                 key.offset = dback->offset;
5704                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5705                 if (ret) {
5706                         btrfs_release_path(path);
5707                         if (ret < 0)
5708                                 return ret;
5709                         /* Didn't find it, we can carry on */
5710                         ret = 0;
5711                         continue;
5712                 }
5713
5714                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5715                                     struct btrfs_file_extent_item);
5716                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5717                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5718                 btrfs_release_path(path);
5719                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5720                 if (cache) {
5721                         struct extent_record *tmp;
5722                         tmp = container_of(cache, struct extent_record, cache);
5723
5724                         /*
5725                          * If we found an extent record for the bytenr for this
5726                          * particular backref then we can't add it to our
5727                          * current extent record.  We only want to add backrefs
5728                          * that don't have a corresponding extent item in the
5729                          * extent tree since they likely belong to this record
5730                          * and we need to fix it if it doesn't match bytenrs.
5731                          */
5732                         if  (tmp->found_rec)
5733                                 continue;
5734                 }
5735
5736                 dback->found_ref += 1;
5737                 dback->disk_bytenr = bytenr;
5738                 dback->bytes = bytes;
5739
5740                 /*
5741                  * Set this so the verify backref code knows not to trust the
5742                  * values in this backref.
5743                  */
5744                 back->broken = 1;
5745         }
5746
5747         return 0;
5748 }
5749
5750 /*
5751  * when an incorrect extent item is found, this will delete
5752  * all of the existing entries for it and recreate them
5753  * based on what the tree scan found.
5754  */
5755 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5756                              struct btrfs_fs_info *info,
5757                              struct cache_tree *extent_cache,
5758                              struct extent_record *rec)
5759 {
5760         int ret;
5761         struct btrfs_path *path;
5762         struct list_head *cur = rec->backrefs.next;
5763         struct cache_extent *cache;
5764         struct extent_backref *back;
5765         int allocated = 0;
5766         u64 flags = 0;
5767
5768         /*
5769          * remember our flags for recreating the extent.
5770          * FIXME, if we have cleared extent tree, we can not
5771          * lookup extent info in extent tree.
5772          */
5773         if (!init_extent_tree) {
5774                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5775                                         rec->start, rec->max_size,
5776                                         rec->metadata, NULL, &flags);
5777                 if (ret < 0)
5778                         flags = 0;
5779         } else {
5780                 flags = 0;
5781         }
5782
5783         path = btrfs_alloc_path();
5784         if (!path)
5785                 return -ENOMEM;
5786
5787         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5788                 /*
5789                  * Sometimes the backrefs themselves are so broken they don't
5790                  * get attached to any meaningful rec, so first go back and
5791                  * check any of our backrefs that we couldn't find and throw
5792                  * them into the list if we find the backref so that
5793                  * verify_backrefs can figure out what to do.
5794                  */
5795                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5796                                              rec);
5797                 if (ret < 0)
5798                         goto out;
5799         }
5800
5801         /* step one, make sure all of the backrefs agree */
5802         ret = verify_backrefs(trans, info, path, rec);
5803         if (ret < 0)
5804                 goto out;
5805
5806         /* step two, delete all the existing records */
5807         ret = delete_extent_records(trans, info->extent_root, path,
5808                                     rec->start, rec->max_size);
5809
5810         if (ret < 0)
5811                 goto out;
5812
5813         /* was this block corrupt?  If so, don't add references to it */
5814         cache = lookup_cache_extent(info->corrupt_blocks,
5815                                     rec->start, rec->max_size);
5816         if (cache) {
5817                 ret = 0;
5818                 goto out;
5819         }
5820
5821         /* step three, recreate all the refs we did find */
5822         while(cur != &rec->backrefs) {
5823                 back = list_entry(cur, struct extent_backref, list);
5824                 cur = cur->next;
5825
5826                 /*
5827                  * if we didn't find any references, don't create a
5828                  * new extent record
5829                  */
5830                 if (!back->found_ref)
5831                         continue;
5832
5833                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5834                 allocated = 1;
5835
5836                 if (ret)
5837                         goto out;
5838         }
5839 out:
5840         btrfs_free_path(path);
5841         return ret;
5842 }
5843
5844 /* right now we only prune from the extent allocation tree */
5845 static int prune_one_block(struct btrfs_trans_handle *trans,
5846                            struct btrfs_fs_info *info,
5847                            struct btrfs_corrupt_block *corrupt)
5848 {
5849         int ret;
5850         struct btrfs_path path;
5851         struct extent_buffer *eb;
5852         u64 found;
5853         int slot;
5854         int nritems;
5855         int level = corrupt->level + 1;
5856
5857         btrfs_init_path(&path);
5858 again:
5859         /* we want to stop at the parent to our busted block */
5860         path.lowest_level = level;
5861
5862         ret = btrfs_search_slot(trans, info->extent_root,
5863                                 &corrupt->key, &path, -1, 1);
5864
5865         if (ret < 0)
5866                 goto out;
5867
5868         eb = path.nodes[level];
5869         if (!eb) {
5870                 ret = -ENOENT;
5871                 goto out;
5872         }
5873
5874         /*
5875          * hopefully the search gave us the block we want to prune,
5876          * lets try that first
5877          */
5878         slot = path.slots[level];
5879         found =  btrfs_node_blockptr(eb, slot);
5880         if (found == corrupt->cache.start)
5881                 goto del_ptr;
5882
5883         nritems = btrfs_header_nritems(eb);
5884
5885         /* the search failed, lets scan this node and hope we find it */
5886         for (slot = 0; slot < nritems; slot++) {
5887                 found =  btrfs_node_blockptr(eb, slot);
5888                 if (found == corrupt->cache.start)
5889                         goto del_ptr;
5890         }
5891         /*
5892          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5893          * to this block
5894          */
5895         if (eb == info->extent_root->node) {
5896                 ret = -ENOENT;
5897                 goto out;
5898         } else {
5899                 level++;
5900                 btrfs_release_path(&path);
5901                 goto again;
5902         }
5903
5904 del_ptr:
5905         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5906         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5907
5908 out:
5909         btrfs_release_path(&path);
5910         return ret;
5911 }
5912
5913 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5914                                 struct btrfs_fs_info *info)
5915 {
5916         struct cache_extent *cache;
5917         struct btrfs_corrupt_block *corrupt;
5918
5919         cache = search_cache_extent(info->corrupt_blocks, 0);
5920         while (1) {
5921                 if (!cache)
5922                         break;
5923                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5924                 prune_one_block(trans, info, corrupt);
5925                 cache = next_cache_extent(cache);
5926         }
5927         return 0;
5928 }
5929
5930 static void free_corrupt_block(struct cache_extent *cache)
5931 {
5932         struct btrfs_corrupt_block *corrupt;
5933
5934         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5935         free(corrupt);
5936 }
5937
5938 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5939
5940 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5941 {
5942         struct btrfs_block_group_cache *cache;
5943         u64 start, end;
5944         int ret;
5945
5946         while (1) {
5947                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5948                                             &start, &end, EXTENT_DIRTY);
5949                 if (ret)
5950                         break;
5951                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5952                                    GFP_NOFS);
5953         }
5954
5955         start = 0;
5956         while (1) {
5957                 cache = btrfs_lookup_first_block_group(fs_info, start);
5958                 if (!cache)
5959                         break;
5960                 if (cache->cached)
5961                         cache->cached = 0;
5962                 start = cache->key.objectid + cache->key.offset;
5963         }
5964 }
5965
5966 static int check_extent_refs(struct btrfs_trans_handle *trans,
5967                              struct btrfs_root *root,
5968                              struct cache_tree *extent_cache)
5969 {
5970         struct extent_record *rec;
5971         struct cache_extent *cache;
5972         int err = 0;
5973         int ret = 0;
5974         int fixed = 0;
5975         int had_dups = 0;
5976
5977         if (repair) {
5978                 /*
5979                  * if we're doing a repair, we have to make sure
5980                  * we don't allocate from the problem extents.
5981                  * In the worst case, this will be all the
5982                  * extents in the FS
5983                  */
5984                 cache = search_cache_extent(extent_cache, 0);
5985                 while(cache) {
5986                         rec = container_of(cache, struct extent_record, cache);
5987                         btrfs_pin_extent(root->fs_info,
5988                                          rec->start, rec->max_size);
5989                         cache = next_cache_extent(cache);
5990                 }
5991
5992                 /* pin down all the corrupted blocks too */
5993                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5994                 while(cache) {
5995                         btrfs_pin_extent(root->fs_info,
5996                                          cache->start, cache->size);
5997                         cache = next_cache_extent(cache);
5998                 }
5999                 prune_corrupt_blocks(trans, root->fs_info);
6000                 reset_cached_block_groups(root->fs_info);
6001         }
6002
6003         /*
6004          * We need to delete any duplicate entries we find first otherwise we
6005          * could mess up the extent tree when we have backrefs that actually
6006          * belong to a different extent item and not the weird duplicate one.
6007          */
6008         while (repair && !list_empty(&duplicate_extents)) {
6009                 rec = list_entry(duplicate_extents.next, struct extent_record,
6010                                  list);
6011                 list_del_init(&rec->list);
6012
6013                 /* Sometimes we can find a backref before we find an actual
6014                  * extent, so we need to process it a little bit to see if there
6015                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
6016                  * if this is a backref screwup.  If we need to delete stuff
6017                  * process_duplicates() will return 0, otherwise it will return
6018                  * 1 and we
6019                  */
6020                 if (process_duplicates(root, extent_cache, rec))
6021                         continue;
6022                 ret = delete_duplicate_records(trans, root, rec);
6023                 if (ret < 0)
6024                         return ret;
6025                 /*
6026                  * delete_duplicate_records will return the number of entries
6027                  * deleted, so if it's greater than 0 then we know we actually
6028                  * did something and we need to remove.
6029                  */
6030                 if (ret)
6031                         had_dups = 1;
6032         }
6033
6034         if (had_dups)
6035                 return -EAGAIN;
6036
6037         while(1) {
6038                 fixed = 0;
6039                 cache = search_cache_extent(extent_cache, 0);
6040                 if (!cache)
6041                         break;
6042                 rec = container_of(cache, struct extent_record, cache);
6043                 if (rec->num_duplicates) {
6044                         fprintf(stderr, "extent item %llu has multiple extent "
6045                                 "items\n", (unsigned long long)rec->start);
6046                         err = 1;
6047                 }
6048
6049                 if (rec->refs != rec->extent_item_refs) {
6050                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
6051                                 (unsigned long long)rec->start,
6052                                 (unsigned long long)rec->nr);
6053                         fprintf(stderr, "extent item %llu, found %llu\n",
6054                                 (unsigned long long)rec->extent_item_refs,
6055                                 (unsigned long long)rec->refs);
6056                         if (!fixed && repair) {
6057                                 ret = fixup_extent_refs(trans, root->fs_info,
6058                                                         extent_cache, rec);
6059                                 if (ret)
6060                                         goto repair_abort;
6061                                 fixed = 1;
6062                         }
6063                         err = 1;
6064
6065                 }
6066                 if (all_backpointers_checked(rec, 1)) {
6067                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
6068                                 (unsigned long long)rec->start,
6069                                 (unsigned long long)rec->nr);
6070
6071                         if (!fixed && repair) {
6072                                 ret = fixup_extent_refs(trans, root->fs_info,
6073                                                         extent_cache, rec);
6074                                 if (ret)
6075                                         goto repair_abort;
6076                                 fixed = 1;
6077                         }
6078
6079                         err = 1;
6080                 }
6081                 if (!rec->owner_ref_checked) {
6082                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
6083                                 (unsigned long long)rec->start,
6084                                 (unsigned long long)rec->nr);
6085                         if (!fixed && repair) {
6086                                 ret = fixup_extent_refs(trans, root->fs_info,
6087                                                         extent_cache, rec);
6088                                 if (ret)
6089                                         goto repair_abort;
6090                                 fixed = 1;
6091                         }
6092                         err = 1;
6093                 }
6094
6095                 remove_cache_extent(extent_cache, cache);
6096                 free_all_extent_backrefs(rec);
6097                 free(rec);
6098         }
6099 repair_abort:
6100         if (repair) {
6101                 if (ret && ret != -EAGAIN) {
6102                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
6103                         exit(1);
6104                 } else if (!ret) {
6105                         btrfs_fix_block_accounting(trans, root);
6106                 }
6107                 if (err)
6108                         fprintf(stderr, "repaired damaged extent references\n");
6109                 return ret;
6110         }
6111         return err;
6112 }
6113
6114 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
6115 {
6116         u64 stripe_size;
6117
6118         if (type & BTRFS_BLOCK_GROUP_RAID0) {
6119                 stripe_size = length;
6120                 stripe_size /= num_stripes;
6121         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
6122                 stripe_size = length * 2;
6123                 stripe_size /= num_stripes;
6124         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
6125                 stripe_size = length;
6126                 stripe_size /= (num_stripes - 1);
6127         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
6128                 stripe_size = length;
6129                 stripe_size /= (num_stripes - 2);
6130         } else {
6131                 stripe_size = length;
6132         }
6133         return stripe_size;
6134 }
6135
6136 static int check_chunk_refs(struct chunk_record *chunk_rec,
6137                             struct block_group_tree *block_group_cache,
6138                             struct device_extent_tree *dev_extent_cache,
6139                             int silent)
6140 {
6141         struct cache_extent *block_group_item;
6142         struct block_group_record *block_group_rec;
6143         struct cache_extent *dev_extent_item;
6144         struct device_extent_record *dev_extent_rec;
6145         u64 devid;
6146         u64 offset;
6147         u64 length;
6148         int i;
6149         int ret = 0;
6150
6151         block_group_item = lookup_cache_extent(&block_group_cache->tree,
6152                                                chunk_rec->offset,
6153                                                chunk_rec->length);
6154         if (block_group_item) {
6155                 block_group_rec = container_of(block_group_item,
6156                                                struct block_group_record,
6157                                                cache);
6158                 if (chunk_rec->length != block_group_rec->offset ||
6159                     chunk_rec->offset != block_group_rec->objectid ||
6160                     chunk_rec->type_flags != block_group_rec->flags) {
6161                         if (!silent)
6162                                 fprintf(stderr,
6163                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
6164                                         chunk_rec->objectid,
6165                                         chunk_rec->type,
6166                                         chunk_rec->offset,
6167                                         chunk_rec->length,
6168                                         chunk_rec->offset,
6169                                         chunk_rec->type_flags,
6170                                         block_group_rec->objectid,
6171                                         block_group_rec->type,
6172                                         block_group_rec->offset,
6173                                         block_group_rec->offset,
6174                                         block_group_rec->objectid,
6175                                         block_group_rec->flags);
6176                         ret = -1;
6177                 } else {
6178                         list_del_init(&block_group_rec->list);
6179                         chunk_rec->bg_rec = block_group_rec;
6180                 }
6181         } else {
6182                 if (!silent)
6183                         fprintf(stderr,
6184                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
6185                                 chunk_rec->objectid,
6186                                 chunk_rec->type,
6187                                 chunk_rec->offset,
6188                                 chunk_rec->length,
6189                                 chunk_rec->offset,
6190                                 chunk_rec->type_flags);
6191                 ret = -1;
6192         }
6193
6194         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
6195                                     chunk_rec->num_stripes);
6196         for (i = 0; i < chunk_rec->num_stripes; ++i) {
6197                 devid = chunk_rec->stripes[i].devid;
6198                 offset = chunk_rec->stripes[i].offset;
6199                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
6200                                                        devid, offset, length);
6201                 if (dev_extent_item) {
6202                         dev_extent_rec = container_of(dev_extent_item,
6203                                                 struct device_extent_record,
6204                                                 cache);
6205                         if (dev_extent_rec->objectid != devid ||
6206                             dev_extent_rec->offset != offset ||
6207                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
6208                             dev_extent_rec->length != length) {
6209                                 if (!silent)
6210                                         fprintf(stderr,
6211                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
6212                                                 chunk_rec->objectid,
6213                                                 chunk_rec->type,
6214                                                 chunk_rec->offset,
6215                                                 chunk_rec->stripes[i].devid,
6216                                                 chunk_rec->stripes[i].offset,
6217                                                 dev_extent_rec->objectid,
6218                                                 dev_extent_rec->offset,
6219                                                 dev_extent_rec->length);
6220                                 ret = -1;
6221                         } else {
6222                                 list_move(&dev_extent_rec->chunk_list,
6223                                           &chunk_rec->dextents);
6224                         }
6225                 } else {
6226                         if (!silent)
6227                                 fprintf(stderr,
6228                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
6229                                         chunk_rec->objectid,
6230                                         chunk_rec->type,
6231                                         chunk_rec->offset,
6232                                         chunk_rec->stripes[i].devid,
6233                                         chunk_rec->stripes[i].offset);
6234                         ret = -1;
6235                 }
6236         }
6237         return ret;
6238 }
6239
6240 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
6241 int check_chunks(struct cache_tree *chunk_cache,
6242                  struct block_group_tree *block_group_cache,
6243                  struct device_extent_tree *dev_extent_cache,
6244                  struct list_head *good, struct list_head *bad, int silent)
6245 {
6246         struct cache_extent *chunk_item;
6247         struct chunk_record *chunk_rec;
6248         struct block_group_record *bg_rec;
6249         struct device_extent_record *dext_rec;
6250         int err;
6251         int ret = 0;
6252
6253         chunk_item = first_cache_extent(chunk_cache);
6254         while (chunk_item) {
6255                 chunk_rec = container_of(chunk_item, struct chunk_record,
6256                                          cache);
6257                 err = check_chunk_refs(chunk_rec, block_group_cache,
6258                                        dev_extent_cache, silent);
6259                 if (err) {
6260                         ret = err;
6261                         if (bad)
6262                                 list_add_tail(&chunk_rec->list, bad);
6263                 } else {
6264                         if (good)
6265                                 list_add_tail(&chunk_rec->list, good);
6266                 }
6267
6268                 chunk_item = next_cache_extent(chunk_item);
6269         }
6270
6271         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
6272                 if (!silent)
6273                         fprintf(stderr,
6274                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
6275                                 bg_rec->objectid,
6276                                 bg_rec->offset,
6277                                 bg_rec->flags);
6278                 if (!ret)
6279                         ret = 1;
6280         }
6281
6282         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
6283                             chunk_list) {
6284                 if (!silent)
6285                         fprintf(stderr,
6286                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
6287                                 dext_rec->objectid,
6288                                 dext_rec->offset,
6289                                 dext_rec->length);
6290                 if (!ret)
6291                         ret = 1;
6292         }
6293         return ret;
6294 }
6295
6296
6297 static int check_device_used(struct device_record *dev_rec,
6298                              struct device_extent_tree *dext_cache)
6299 {
6300         struct cache_extent *cache;
6301         struct device_extent_record *dev_extent_rec;
6302         u64 total_byte = 0;
6303
6304         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
6305         while (cache) {
6306                 dev_extent_rec = container_of(cache,
6307                                               struct device_extent_record,
6308                                               cache);
6309                 if (dev_extent_rec->objectid != dev_rec->devid)
6310                         break;
6311
6312                 list_del_init(&dev_extent_rec->device_list);
6313                 total_byte += dev_extent_rec->length;
6314                 cache = next_cache_extent(cache);
6315         }
6316
6317         if (total_byte != dev_rec->byte_used) {
6318                 fprintf(stderr,
6319                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
6320                         total_byte, dev_rec->byte_used, dev_rec->objectid,
6321                         dev_rec->type, dev_rec->offset);
6322                 return -1;
6323         } else {
6324                 return 0;
6325         }
6326 }
6327
6328 /* check btrfs_dev_item -> btrfs_dev_extent */
6329 static int check_devices(struct rb_root *dev_cache,
6330                          struct device_extent_tree *dev_extent_cache)
6331 {
6332         struct rb_node *dev_node;
6333         struct device_record *dev_rec;
6334         struct device_extent_record *dext_rec;
6335         int err;
6336         int ret = 0;
6337
6338         dev_node = rb_first(dev_cache);
6339         while (dev_node) {
6340                 dev_rec = container_of(dev_node, struct device_record, node);
6341                 err = check_device_used(dev_rec, dev_extent_cache);
6342                 if (err)
6343                         ret = err;
6344
6345                 dev_node = rb_next(dev_node);
6346         }
6347         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
6348                             device_list) {
6349                 fprintf(stderr,
6350                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
6351                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
6352                 if (!ret)
6353                         ret = 1;
6354         }
6355         return ret;
6356 }
6357
6358 static int check_chunks_and_extents(struct btrfs_root *root)
6359 {
6360         struct rb_root dev_cache;
6361         struct cache_tree chunk_cache;
6362         struct block_group_tree block_group_cache;
6363         struct device_extent_tree dev_extent_cache;
6364         struct cache_tree extent_cache;
6365         struct cache_tree seen;
6366         struct cache_tree pending;
6367         struct cache_tree reada;
6368         struct cache_tree nodes;
6369         struct cache_tree corrupt_blocks;
6370         struct btrfs_path path;
6371         struct btrfs_key key;
6372         struct btrfs_key found_key;
6373         int ret, err = 0;
6374         u64 last = 0;
6375         struct block_info *bits;
6376         int bits_nr;
6377         struct extent_buffer *leaf;
6378         struct btrfs_trans_handle *trans = NULL;
6379         int slot;
6380         struct btrfs_root_item ri;
6381         struct list_head dropping_trees;
6382
6383         dev_cache = RB_ROOT;
6384         cache_tree_init(&chunk_cache);
6385         block_group_tree_init(&block_group_cache);
6386         device_extent_tree_init(&dev_extent_cache);
6387
6388         cache_tree_init(&extent_cache);
6389         cache_tree_init(&seen);
6390         cache_tree_init(&pending);
6391         cache_tree_init(&nodes);
6392         cache_tree_init(&reada);
6393         cache_tree_init(&corrupt_blocks);
6394         INIT_LIST_HEAD(&dropping_trees);
6395
6396         if (repair) {
6397                 trans = btrfs_start_transaction(root, 1);
6398                 if (IS_ERR(trans)) {
6399                         fprintf(stderr, "Error starting transaction\n");
6400                         return PTR_ERR(trans);
6401                 }
6402                 root->fs_info->fsck_extent_cache = &extent_cache;
6403                 root->fs_info->free_extent_hook = free_extent_hook;
6404                 root->fs_info->corrupt_blocks = &corrupt_blocks;
6405         }
6406
6407         bits_nr = 1024;
6408         bits = malloc(bits_nr * sizeof(struct block_info));
6409         if (!bits) {
6410                 perror("malloc");
6411                 exit(1);
6412         }
6413
6414 again:
6415         add_root_to_pending(root->fs_info->tree_root->node,
6416                             &extent_cache, &pending, &seen, &nodes,
6417                             &root->fs_info->tree_root->root_key);
6418
6419         add_root_to_pending(root->fs_info->chunk_root->node,
6420                             &extent_cache, &pending, &seen, &nodes,
6421                             &root->fs_info->chunk_root->root_key);
6422
6423         btrfs_init_path(&path);
6424         key.offset = 0;
6425         key.objectid = 0;
6426         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
6427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
6428                                         &key, &path, 0, 0);
6429         if (ret < 0)
6430                 goto out;
6431         while(1) {
6432                 leaf = path.nodes[0];
6433                 slot = path.slots[0];
6434                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
6435                         ret = btrfs_next_leaf(root, &path);
6436                         if (ret != 0)
6437                                 break;
6438                         leaf = path.nodes[0];
6439                         slot = path.slots[0];
6440                 }
6441                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
6442                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
6443                         unsigned long offset;
6444                         struct extent_buffer *buf;
6445
6446                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6447                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
6448                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
6449                                 buf = read_tree_block(root->fs_info->tree_root,
6450                                                       btrfs_root_bytenr(&ri),
6451                                                       btrfs_level_size(root,
6452                                                       btrfs_root_level(&ri)),
6453                                                       0);
6454                                 if (!buf) {
6455                                         ret = -EIO;
6456                                         goto out;
6457                                 }
6458                                 add_root_to_pending(buf, &extent_cache,
6459                                                     &pending, &seen, &nodes,
6460                                                     &found_key);
6461                                 free_extent_buffer(buf);
6462                         } else {
6463                                 struct dropping_root_item_record *dri_rec;
6464                                 dri_rec = malloc(sizeof(*dri_rec));
6465                                 if (!dri_rec) {
6466                                         perror("malloc");
6467                                         exit(1);
6468                                 }
6469                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
6470                                 memcpy(&dri_rec->found_key, &found_key,
6471                                        sizeof(found_key));
6472                                 list_add_tail(&dri_rec->list, &dropping_trees);
6473                         }
6474                 }
6475                 path.slots[0]++;
6476         }
6477         btrfs_release_path(&path);
6478         while (1) {
6479                 ret = run_next_block(trans, root, bits, bits_nr, &last,
6480                                      &pending, &seen, &reada, &nodes,
6481                                      &extent_cache, &chunk_cache, &dev_cache,
6482                                      &block_group_cache, &dev_extent_cache,
6483                                      NULL);
6484                 if (ret != 0)
6485                         break;
6486         }
6487
6488         while (!list_empty(&dropping_trees)) {
6489                 struct dropping_root_item_record *rec;
6490                 struct extent_buffer *buf;
6491                 rec = list_entry(dropping_trees.next,
6492                                  struct dropping_root_item_record, list);
6493                 last = 0;
6494                 if (!bits) {
6495                         perror("realloc");
6496                         exit(1);
6497                 }
6498                 buf = read_tree_block(root->fs_info->tree_root,
6499                                       btrfs_root_bytenr(&rec->ri),
6500                                       btrfs_level_size(root,
6501                                       btrfs_root_level(&rec->ri)), 0);
6502                 if (!buf) {
6503                         ret = -EIO;
6504                         goto out;
6505                 }
6506                 add_root_to_pending(buf, &extent_cache, &pending,
6507                                     &seen, &nodes, &rec->found_key);
6508                 while (1) {
6509                         ret = run_next_block(trans, root, bits, bits_nr, &last,
6510                                              &pending, &seen, &reada,
6511                                              &nodes, &extent_cache,
6512                                              &chunk_cache, &dev_cache,
6513                                              &block_group_cache,
6514                                              &dev_extent_cache,
6515                                              &rec->ri);
6516                         if (ret != 0)
6517                                 break;
6518                 }
6519                 free_extent_buffer(buf);
6520                 list_del(&rec->list);
6521                 free(rec);
6522         }
6523
6524         if (ret >= 0)
6525                 ret = check_extent_refs(trans, root, &extent_cache);
6526         if (ret == -EAGAIN) {
6527                 ret = btrfs_commit_transaction(trans, root);
6528                 if (ret)
6529                         goto out;
6530
6531                 trans = btrfs_start_transaction(root, 1);
6532                 if (IS_ERR(trans)) {
6533                         ret = PTR_ERR(trans);
6534                         goto out;
6535                 }
6536
6537                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6538                 free_extent_cache_tree(&seen);
6539                 free_extent_cache_tree(&pending);
6540                 free_extent_cache_tree(&reada);
6541                 free_extent_cache_tree(&nodes);
6542                 free_chunk_cache_tree(&chunk_cache);
6543                 free_block_group_tree(&block_group_cache);
6544                 free_device_cache_tree(&dev_cache);
6545                 free_device_extent_tree(&dev_extent_cache);
6546                 free_extent_record_cache(root->fs_info, &extent_cache);
6547                 goto again;
6548         }
6549
6550         err = check_chunks(&chunk_cache, &block_group_cache,
6551                            &dev_extent_cache, NULL, NULL, 0);
6552         if (err && !ret)
6553                 ret = err;
6554
6555         err = check_devices(&dev_cache, &dev_extent_cache);
6556         if (err && !ret)
6557                 ret = err;
6558
6559 out:
6560         if (trans) {
6561                 err = btrfs_commit_transaction(trans, root);
6562                 if (!ret)
6563                         ret = err;
6564         }
6565         if (repair) {
6566                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6567                 root->fs_info->fsck_extent_cache = NULL;
6568                 root->fs_info->free_extent_hook = NULL;
6569                 root->fs_info->corrupt_blocks = NULL;
6570         }
6571         free(bits);
6572         free_chunk_cache_tree(&chunk_cache);
6573         free_device_cache_tree(&dev_cache);
6574         free_block_group_tree(&block_group_cache);
6575         free_device_extent_tree(&dev_extent_cache);
6576         free_extent_cache_tree(&seen);
6577         free_extent_cache_tree(&pending);
6578         free_extent_cache_tree(&reada);
6579         free_extent_cache_tree(&nodes);
6580         return ret;
6581 }
6582
6583 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6584                            struct btrfs_root *root, int overwrite)
6585 {
6586         struct extent_buffer *c;
6587         struct extent_buffer *old = root->node;
6588         int level;
6589         int ret;
6590         struct btrfs_disk_key disk_key = {0,0,0};
6591
6592         level = 0;
6593
6594         if (overwrite) {
6595                 c = old;
6596                 extent_buffer_get(c);
6597                 goto init;
6598         }
6599         c = btrfs_alloc_free_block(trans, root,
6600                                    btrfs_level_size(root, 0),
6601                                    root->root_key.objectid,
6602                                    &disk_key, level, 0, 0);
6603         if (IS_ERR(c)) {
6604                 c = old;
6605                 extent_buffer_get(c);
6606                 overwrite = 1;
6607         }
6608 init:
6609         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6610         btrfs_set_header_level(c, level);
6611         btrfs_set_header_bytenr(c, c->start);
6612         btrfs_set_header_generation(c, trans->transid);
6613         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6614         btrfs_set_header_owner(c, root->root_key.objectid);
6615
6616         write_extent_buffer(c, root->fs_info->fsid,
6617                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6618
6619         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6620                             btrfs_header_chunk_tree_uuid(c),
6621                             BTRFS_UUID_SIZE);
6622
6623         btrfs_mark_buffer_dirty(c);
6624         /*
6625          * this case can happen in the following case:
6626          *
6627          * 1.overwrite previous root.
6628          *
6629          * 2.reinit reloc data root, this is because we skip pin
6630          * down reloc data tree before which means we can allocate
6631          * same block bytenr here.
6632          */
6633         if (old->start == c->start) {
6634                 btrfs_set_root_generation(&root->root_item,
6635                                           trans->transid);
6636                 root->root_item.level = btrfs_header_level(root->node);
6637                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6638                                         &root->root_key, &root->root_item);
6639                 if (ret) {
6640                         free_extent_buffer(c);
6641                         return ret;
6642                 }
6643         }
6644         free_extent_buffer(old);
6645         root->node = c;
6646         add_root_to_dirty_list(root);
6647         return 0;
6648 }
6649
6650 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6651                                 struct extent_buffer *eb, int tree_root)
6652 {
6653         struct extent_buffer *tmp;
6654         struct btrfs_root_item *ri;
6655         struct btrfs_key key;
6656         u64 bytenr;
6657         u32 leafsize;
6658         int level = btrfs_header_level(eb);
6659         int nritems;
6660         int ret;
6661         int i;
6662
6663         /*
6664          * If we have pinned this block before, don't pin it again.
6665          * This can not only avoid forever loop with broken filesystem
6666          * but also give us some speedups.
6667          */
6668         if (test_range_bit(&fs_info->pinned_extents, eb->start,
6669                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
6670                 return 0;
6671
6672         btrfs_pin_extent(fs_info, eb->start, eb->len);
6673
6674         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6675         nritems = btrfs_header_nritems(eb);
6676         for (i = 0; i < nritems; i++) {
6677                 if (level == 0) {
6678                         btrfs_item_key_to_cpu(eb, &key, i);
6679                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6680                                 continue;
6681                         /* Skip the extent root and reloc roots */
6682                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6683                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6684                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6685                                 continue;
6686                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6687                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6688
6689                         /*
6690                          * If at any point we start needing the real root we
6691                          * will have to build a stump root for the root we are
6692                          * in, but for now this doesn't actually use the root so
6693                          * just pass in extent_root.
6694                          */
6695                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6696                                               leafsize, 0);
6697                         if (!tmp) {
6698                                 fprintf(stderr, "Error reading root block\n");
6699                                 return -EIO;
6700                         }
6701                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6702                         free_extent_buffer(tmp);
6703                         if (ret)
6704                                 return ret;
6705                 } else {
6706                         bytenr = btrfs_node_blockptr(eb, i);
6707
6708                         /* If we aren't the tree root don't read the block */
6709                         if (level == 1 && !tree_root) {
6710                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6711                                 continue;
6712                         }
6713
6714                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6715                                               leafsize, 0);
6716                         if (!tmp) {
6717                                 fprintf(stderr, "Error reading tree block\n");
6718                                 return -EIO;
6719                         }
6720                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6721                         free_extent_buffer(tmp);
6722                         if (ret)
6723                                 return ret;
6724                 }
6725         }
6726
6727         return 0;
6728 }
6729
6730 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6731 {
6732         int ret;
6733
6734         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6735         if (ret)
6736                 return ret;
6737
6738         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6739 }
6740
6741 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6742 {
6743         struct btrfs_block_group_cache *cache;
6744         struct btrfs_path *path;
6745         struct extent_buffer *leaf;
6746         struct btrfs_chunk *chunk;
6747         struct btrfs_key key;
6748         int ret;
6749         u64 start;
6750
6751         path = btrfs_alloc_path();
6752         if (!path)
6753                 return -ENOMEM;
6754
6755         key.objectid = 0;
6756         key.type = BTRFS_CHUNK_ITEM_KEY;
6757         key.offset = 0;
6758
6759         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6760         if (ret < 0) {
6761                 btrfs_free_path(path);
6762                 return ret;
6763         }
6764
6765         /*
6766          * We do this in case the block groups were screwed up and had alloc
6767          * bits that aren't actually set on the chunks.  This happens with
6768          * restored images every time and could happen in real life I guess.
6769          */
6770         fs_info->avail_data_alloc_bits = 0;
6771         fs_info->avail_metadata_alloc_bits = 0;
6772         fs_info->avail_system_alloc_bits = 0;
6773
6774         /* First we need to create the in-memory block groups */
6775         while (1) {
6776                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6777                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6778                         if (ret < 0) {
6779                                 btrfs_free_path(path);
6780                                 return ret;
6781                         }
6782                         if (ret) {
6783                                 ret = 0;
6784                                 break;
6785                         }
6786                 }
6787                 leaf = path->nodes[0];
6788                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6789                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6790                         path->slots[0]++;
6791                         continue;
6792                 }
6793
6794                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6795                                        struct btrfs_chunk);
6796                 btrfs_add_block_group(fs_info, 0,
6797                                       btrfs_chunk_type(leaf, chunk),
6798                                       key.objectid, key.offset,
6799                                       btrfs_chunk_length(leaf, chunk));
6800                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6801                                  key.offset + btrfs_chunk_length(leaf, chunk),
6802                                  GFP_NOFS);
6803                 path->slots[0]++;
6804         }
6805         start = 0;
6806         while (1) {
6807                 cache = btrfs_lookup_first_block_group(fs_info, start);
6808                 if (!cache)
6809                         break;
6810                 cache->cached = 1;
6811                 start = cache->key.objectid + cache->key.offset;
6812         }
6813
6814         btrfs_free_path(path);
6815         return 0;
6816 }
6817
6818 static int reset_balance(struct btrfs_trans_handle *trans,
6819                          struct btrfs_fs_info *fs_info)
6820 {
6821         struct btrfs_root *root = fs_info->tree_root;
6822         struct btrfs_path *path;
6823         struct extent_buffer *leaf;
6824         struct btrfs_key key;
6825         int del_slot, del_nr = 0;
6826         int ret;
6827         int found = 0;
6828
6829         path = btrfs_alloc_path();
6830         if (!path)
6831                 return -ENOMEM;
6832
6833         key.objectid = BTRFS_BALANCE_OBJECTID;
6834         key.type = BTRFS_BALANCE_ITEM_KEY;
6835         key.offset = 0;
6836
6837         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6838         if (ret) {
6839                 if (ret > 0)
6840                         ret = 0;
6841                 if (!ret)
6842                         goto reinit_data_reloc;
6843                 else
6844                         goto out;
6845         }
6846
6847         ret = btrfs_del_item(trans, root, path);
6848         if (ret)
6849                 goto out;
6850         btrfs_release_path(path);
6851
6852         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6853         key.type = BTRFS_ROOT_ITEM_KEY;
6854         key.offset = 0;
6855
6856         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6857         if (ret < 0)
6858                 goto out;
6859         while (1) {
6860                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6861                         if (!found)
6862                                 break;
6863
6864                         if (del_nr) {
6865                                 ret = btrfs_del_items(trans, root, path,
6866                                                       del_slot, del_nr);
6867                                 del_nr = 0;
6868                                 if (ret)
6869                                         goto out;
6870                         }
6871                         key.offset++;
6872                         btrfs_release_path(path);
6873
6874                         found = 0;
6875                         ret = btrfs_search_slot(trans, root, &key, path,
6876                                                 -1, 1);
6877                         if (ret < 0)
6878                                 goto out;
6879                         continue;
6880                 }
6881                 found = 1;
6882                 leaf = path->nodes[0];
6883                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6884                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6885                         break;
6886                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6887                         path->slots[0]++;
6888                         continue;
6889                 }
6890                 if (!del_nr) {
6891                         del_slot = path->slots[0];
6892                         del_nr = 1;
6893                 } else {
6894                         del_nr++;
6895                 }
6896                 path->slots[0]++;
6897         }
6898
6899         if (del_nr) {
6900                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6901                 if (ret)
6902                         goto out;
6903         }
6904         btrfs_release_path(path);
6905
6906 reinit_data_reloc:
6907         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6908         key.type = BTRFS_ROOT_ITEM_KEY;
6909         key.offset = (u64)-1;
6910         root = btrfs_read_fs_root(fs_info, &key);
6911         if (IS_ERR(root)) {
6912                 fprintf(stderr, "Error reading data reloc tree\n");
6913                 return PTR_ERR(root);
6914         }
6915         record_root_in_trans(trans, root);
6916         ret = btrfs_fsck_reinit_root(trans, root, 0);
6917         if (ret)
6918                 goto out;
6919         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6920 out:
6921         btrfs_free_path(path);
6922         return ret;
6923 }
6924
6925 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6926                               struct btrfs_fs_info *fs_info)
6927 {
6928         u64 start = 0;
6929         int ret;
6930
6931         /*
6932          * The only reason we don't do this is because right now we're just
6933          * walking the trees we find and pinning down their bytes, we don't look
6934          * at any of the leaves.  In order to do mixed groups we'd have to check
6935          * the leaves of any fs roots and pin down the bytes for any file
6936          * extents we find.  Not hard but why do it if we don't have to?
6937          */
6938         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6939                 fprintf(stderr, "We don't support re-initing the extent tree "
6940                         "for mixed block groups yet, please notify a btrfs "
6941                         "developer you want to do this so they can add this "
6942                         "functionality.\n");
6943                 return -EINVAL;
6944         }
6945
6946         /*
6947          * first we need to walk all of the trees except the extent tree and pin
6948          * down the bytes that are in use so we don't overwrite any existing
6949          * metadata.
6950          */
6951         ret = pin_metadata_blocks(fs_info);
6952         if (ret) {
6953                 fprintf(stderr, "error pinning down used bytes\n");
6954                 return ret;
6955         }
6956
6957         /*
6958          * Need to drop all the block groups since we're going to recreate all
6959          * of them again.
6960          */
6961         btrfs_free_block_groups(fs_info);
6962         ret = reset_block_groups(fs_info);
6963         if (ret) {
6964                 fprintf(stderr, "error resetting the block groups\n");
6965                 return ret;
6966         }
6967
6968         /* Ok we can allocate now, reinit the extent root */
6969         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6970         if (ret) {
6971                 fprintf(stderr, "extent root initialization failed\n");
6972                 /*
6973                  * When the transaction code is updated we should end the
6974                  * transaction, but for now progs only knows about commit so
6975                  * just return an error.
6976                  */
6977                 return ret;
6978         }
6979
6980         /*
6981          * Now we have all the in-memory block groups setup so we can make
6982          * allocations properly, and the metadata we care about is safe since we
6983          * pinned all of it above.
6984          */
6985         while (1) {
6986                 struct btrfs_block_group_cache *cache;
6987
6988                 cache = btrfs_lookup_first_block_group(fs_info, start);
6989                 if (!cache)
6990                         break;
6991                 start = cache->key.objectid + cache->key.offset;
6992                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6993                                         &cache->key, &cache->item,
6994                                         sizeof(cache->item));
6995                 if (ret) {
6996                         fprintf(stderr, "Error adding block group\n");
6997                         return ret;
6998                 }
6999                 btrfs_extent_post_op(trans, fs_info->extent_root);
7000         }
7001
7002         ret = reset_balance(trans, fs_info);
7003         if (ret)
7004                 fprintf(stderr, "error reseting the pending balance\n");
7005
7006         return ret;
7007 }
7008
7009 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
7010 {
7011         struct btrfs_path *path;
7012         struct btrfs_trans_handle *trans;
7013         struct btrfs_key key;
7014         int ret;
7015
7016         printf("Recowing metadata block %llu\n", eb->start);
7017         key.objectid = btrfs_header_owner(eb);
7018         key.type = BTRFS_ROOT_ITEM_KEY;
7019         key.offset = (u64)-1;
7020
7021         root = btrfs_read_fs_root(root->fs_info, &key);
7022         if (IS_ERR(root)) {
7023                 fprintf(stderr, "Couldn't find owner root %llu\n",
7024                         key.objectid);
7025                 return PTR_ERR(root);
7026         }
7027
7028         path = btrfs_alloc_path();
7029         if (!path)
7030                 return -ENOMEM;
7031
7032         trans = btrfs_start_transaction(root, 1);
7033         if (IS_ERR(trans)) {
7034                 btrfs_free_path(path);
7035                 return PTR_ERR(trans);
7036         }
7037
7038         path->lowest_level = btrfs_header_level(eb);
7039         if (path->lowest_level)
7040                 btrfs_node_key_to_cpu(eb, &key, 0);
7041         else
7042                 btrfs_item_key_to_cpu(eb, &key, 0);
7043
7044         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7045         btrfs_commit_transaction(trans, root);
7046         btrfs_free_path(path);
7047         return ret;
7048 }
7049
7050 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
7051 {
7052         struct btrfs_path *path;
7053         struct btrfs_trans_handle *trans;
7054         struct btrfs_key key;
7055         int ret;
7056
7057         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
7058                bad->key.type, bad->key.offset);
7059         key.objectid = bad->root_id;
7060         key.type = BTRFS_ROOT_ITEM_KEY;
7061         key.offset = (u64)-1;
7062
7063         root = btrfs_read_fs_root(root->fs_info, &key);
7064         if (IS_ERR(root)) {
7065                 fprintf(stderr, "Couldn't find owner root %llu\n",
7066                         key.objectid);
7067                 return PTR_ERR(root);
7068         }
7069
7070         path = btrfs_alloc_path();
7071         if (!path)
7072                 return -ENOMEM;
7073
7074         trans = btrfs_start_transaction(root, 1);
7075         if (IS_ERR(trans)) {
7076                 btrfs_free_path(path);
7077                 return PTR_ERR(trans);
7078         }
7079
7080         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
7081         if (ret) {
7082                 if (ret > 0)
7083                         ret = 0;
7084                 goto out;
7085         }
7086         ret = btrfs_del_item(trans, root, path);
7087 out:
7088         btrfs_commit_transaction(trans, root);
7089         btrfs_free_path(path);
7090         return ret;
7091 }
7092
7093 static int zero_log_tree(struct btrfs_root *root)
7094 {
7095         struct btrfs_trans_handle *trans;
7096         int ret;
7097
7098         trans = btrfs_start_transaction(root, 1);
7099         if (IS_ERR(trans)) {
7100                 ret = PTR_ERR(trans);
7101                 return ret;
7102         }
7103         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
7104         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
7105         ret = btrfs_commit_transaction(trans, root);
7106         return ret;
7107 }
7108
7109 static int populate_csum(struct btrfs_trans_handle *trans,
7110                          struct btrfs_root *csum_root, char *buf, u64 start,
7111                          u64 len)
7112 {
7113         u64 offset = 0;
7114         u64 sectorsize;
7115         int ret = 0;
7116
7117         while (offset < len) {
7118                 sectorsize = csum_root->sectorsize;
7119                 ret = read_extent_data(csum_root, buf, start + offset,
7120                                        &sectorsize, 0);
7121                 if (ret)
7122                         break;
7123                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
7124                                             start + offset, buf, sectorsize);
7125                 if (ret)
7126                         break;
7127                 offset += sectorsize;
7128         }
7129         return ret;
7130 }
7131
7132 static int fill_csum_tree(struct btrfs_trans_handle *trans,
7133                           struct btrfs_root *csum_root)
7134 {
7135         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
7136         struct btrfs_path *path;
7137         struct btrfs_extent_item *ei;
7138         struct extent_buffer *leaf;
7139         char *buf;
7140         struct btrfs_key key;
7141         int ret;
7142
7143         path = btrfs_alloc_path();
7144         if (!path)
7145                 return -ENOMEM;
7146
7147         key.objectid = 0;
7148         key.type = BTRFS_EXTENT_ITEM_KEY;
7149         key.offset = 0;
7150
7151         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
7152         if (ret < 0) {
7153                 btrfs_free_path(path);
7154                 return ret;
7155         }
7156
7157         buf = malloc(csum_root->sectorsize);
7158         if (!buf) {
7159                 btrfs_free_path(path);
7160                 return -ENOMEM;
7161         }
7162
7163         while (1) {
7164                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7165                         ret = btrfs_next_leaf(extent_root, path);
7166                         if (ret < 0)
7167                                 break;
7168                         if (ret) {
7169                                 ret = 0;
7170                                 break;
7171                         }
7172                 }
7173                 leaf = path->nodes[0];
7174
7175                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7176                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7177                         path->slots[0]++;
7178                         continue;
7179                 }
7180
7181                 ei = btrfs_item_ptr(leaf, path->slots[0],
7182                                     struct btrfs_extent_item);
7183                 if (!(btrfs_extent_flags(leaf, ei) &
7184                       BTRFS_EXTENT_FLAG_DATA)) {
7185                         path->slots[0]++;
7186                         continue;
7187                 }
7188
7189                 ret = populate_csum(trans, csum_root, buf, key.objectid,
7190                                     key.offset);
7191                 if (ret)
7192                         break;
7193                 path->slots[0]++;
7194         }
7195
7196         btrfs_free_path(path);
7197         free(buf);
7198         return ret;
7199 }
7200
7201 struct root_item_info {
7202         /* level of the root */
7203         u8 level;
7204         /* number of nodes at this level, must be 1 for a root */
7205         int node_count;
7206         u64 bytenr;
7207         u64 gen;
7208         struct cache_extent cache_extent;
7209 };
7210
7211 static struct cache_tree *roots_info_cache = NULL;
7212
7213 static void free_roots_info_cache(void)
7214 {
7215         if (!roots_info_cache)
7216                 return;
7217
7218         while (!cache_tree_empty(roots_info_cache)) {
7219                 struct cache_extent *entry;
7220                 struct root_item_info *rii;
7221
7222                 entry = first_cache_extent(roots_info_cache);
7223                 remove_cache_extent(roots_info_cache, entry);
7224                 rii = container_of(entry, struct root_item_info, cache_extent);
7225                 free(rii);
7226         }
7227
7228         free(roots_info_cache);
7229         roots_info_cache = NULL;
7230 }
7231
7232 static int build_roots_info_cache(struct btrfs_fs_info *info)
7233 {
7234         int ret = 0;
7235         struct btrfs_key key;
7236         struct extent_buffer *leaf;
7237         struct btrfs_path *path;
7238
7239         if (!roots_info_cache) {
7240                 roots_info_cache = malloc(sizeof(*roots_info_cache));
7241                 if (!roots_info_cache)
7242                         return -ENOMEM;
7243                 cache_tree_init(roots_info_cache);
7244         }
7245
7246         path = btrfs_alloc_path();
7247         if (!path)
7248                 return -ENOMEM;
7249
7250         key.objectid = 0;
7251         key.type = BTRFS_EXTENT_ITEM_KEY;
7252         key.offset = 0;
7253
7254         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
7255         if (ret < 0)
7256                 goto out;
7257         leaf = path->nodes[0];
7258
7259         while (1) {
7260                 struct btrfs_key found_key;
7261                 struct btrfs_extent_item *ei;
7262                 struct btrfs_extent_inline_ref *iref;
7263                 int slot = path->slots[0];
7264                 int type;
7265                 u64 flags;
7266                 u64 root_id;
7267                 u8 level;
7268                 struct cache_extent *entry;
7269                 struct root_item_info *rii;
7270
7271                 if (slot >= btrfs_header_nritems(leaf)) {
7272                         ret = btrfs_next_leaf(info->extent_root, path);
7273                         if (ret < 0) {
7274                                 break;
7275                         } else if (ret) {
7276                                 ret = 0;
7277                                 break;
7278                         }
7279                         leaf = path->nodes[0];
7280                         slot = path->slots[0];
7281                 }
7282
7283                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7284
7285                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7286                     found_key.type != BTRFS_METADATA_ITEM_KEY)
7287                         goto next;
7288
7289                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
7290                 flags = btrfs_extent_flags(leaf, ei);
7291
7292                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
7293                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
7294                         goto next;
7295
7296                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
7297                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
7298                         level = found_key.offset;
7299                 } else {
7300                         struct btrfs_tree_block_info *info;
7301
7302                         info = (struct btrfs_tree_block_info *)(ei + 1);
7303                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
7304                         level = btrfs_tree_block_level(leaf, info);
7305                 }
7306
7307                 /*
7308                  * For a root extent, it must be of the following type and the
7309                  * first (and only one) iref in the item.
7310                  */
7311                 type = btrfs_extent_inline_ref_type(leaf, iref);
7312                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
7313                         goto next;
7314
7315                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
7316                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
7317                 if (!entry) {
7318                         rii = malloc(sizeof(struct root_item_info));
7319                         if (!rii) {
7320                                 ret = -ENOMEM;
7321                                 goto out;
7322                         }
7323                         rii->cache_extent.start = root_id;
7324                         rii->cache_extent.size = 1;
7325                         rii->level = (u8)-1;
7326                         entry = &rii->cache_extent;
7327                         ret = insert_cache_extent(roots_info_cache, entry);
7328                         ASSERT(ret == 0);
7329                 } else {
7330                         rii = container_of(entry, struct root_item_info,
7331                                            cache_extent);
7332                 }
7333
7334                 ASSERT(rii->cache_extent.start == root_id);
7335                 ASSERT(rii->cache_extent.size == 1);
7336
7337                 if (level > rii->level || rii->level == (u8)-1) {
7338                         rii->level = level;
7339                         rii->bytenr = found_key.objectid;
7340                         rii->gen = btrfs_extent_generation(leaf, ei);
7341                         rii->node_count = 1;
7342                 } else if (level == rii->level) {
7343                         rii->node_count++;
7344                 }
7345 next:
7346                 path->slots[0]++;
7347         }
7348
7349 out:
7350         btrfs_free_path(path);
7351
7352         return ret;
7353 }
7354
7355 static int maybe_repair_root_item(struct btrfs_fs_info *info,
7356                                   struct btrfs_path *path,
7357                                   const struct btrfs_key *root_key,
7358                                   const int read_only_mode)
7359 {
7360         const u64 root_id = root_key->objectid;
7361         struct cache_extent *entry;
7362         struct root_item_info *rii;
7363         struct btrfs_root_item ri;
7364         unsigned long offset;
7365
7366         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
7367         if (!entry) {
7368                 fprintf(stderr,
7369                         "Error: could not find extent items for root %llu\n",
7370                         root_key->objectid);
7371                 return -ENOENT;
7372         }
7373
7374         rii = container_of(entry, struct root_item_info, cache_extent);
7375         ASSERT(rii->cache_extent.start == root_id);
7376         ASSERT(rii->cache_extent.size == 1);
7377
7378         if (rii->node_count != 1) {
7379                 fprintf(stderr,
7380                         "Error: could not find btree root extent for root %llu\n",
7381                         root_id);
7382                 return -ENOENT;
7383         }
7384
7385         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
7386         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
7387
7388         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
7389             btrfs_root_level(&ri) != rii->level ||
7390             btrfs_root_generation(&ri) != rii->gen) {
7391
7392                 /*
7393                  * If we're in repair mode but our caller told us to not update
7394                  * the root item, i.e. just check if it needs to be updated, don't
7395                  * print this message, since the caller will call us again shortly
7396                  * for the same root item without read only mode (the caller will
7397                  * open a transaction first).
7398                  */
7399                 if (!(read_only_mode && repair))
7400                         fprintf(stderr,
7401                                 "%sroot item for root %llu,"
7402                                 " current bytenr %llu, current gen %llu, current level %u,"
7403                                 " new bytenr %llu, new gen %llu, new level %u\n",
7404                                 (read_only_mode ? "" : "fixing "),
7405                                 root_id,
7406                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
7407                                 btrfs_root_level(&ri),
7408                                 rii->bytenr, rii->gen, rii->level);
7409
7410                 if (btrfs_root_generation(&ri) > rii->gen) {
7411                         fprintf(stderr,
7412                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
7413                                 root_id, btrfs_root_generation(&ri), rii->gen);
7414                         return -EINVAL;
7415                 }
7416
7417                 if (!read_only_mode) {
7418                         btrfs_set_root_bytenr(&ri, rii->bytenr);
7419                         btrfs_set_root_level(&ri, rii->level);
7420                         btrfs_set_root_generation(&ri, rii->gen);
7421                         write_extent_buffer(path->nodes[0], &ri,
7422                                             offset, sizeof(ri));
7423                 }
7424
7425                 return 1;
7426         }
7427
7428         return 0;
7429 }
7430
7431 /*
7432  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
7433  * caused read-only snapshots to be corrupted if they were created at a moment
7434  * when the source subvolume/snapshot had orphan items. The issue was that the
7435  * on-disk root items became incorrect, referring to the pre orphan cleanup root
7436  * node instead of the post orphan cleanup root node.
7437  * So this function, and its callees, just detects and fixes those cases. Even
7438  * though the regression was for read-only snapshots, this function applies to
7439  * any snapshot/subvolume root.
7440  * This must be run before any other repair code - not doing it so, makes other
7441  * repair code delete or modify backrefs in the extent tree for example, which
7442  * will result in an inconsistent fs after repairing the root items.
7443  */
7444 static int repair_root_items(struct btrfs_fs_info *info)
7445 {
7446         struct btrfs_path *path = NULL;
7447         struct btrfs_key key;
7448         struct extent_buffer *leaf;
7449         struct btrfs_trans_handle *trans = NULL;
7450         int ret = 0;
7451         int bad_roots = 0;
7452         int need_trans = 0;
7453
7454         ret = build_roots_info_cache(info);
7455         if (ret)
7456                 goto out;
7457
7458         path = btrfs_alloc_path();
7459         if (!path) {
7460                 ret = -ENOMEM;
7461                 goto out;
7462         }
7463
7464         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
7465         key.type = BTRFS_ROOT_ITEM_KEY;
7466         key.offset = 0;
7467
7468 again:
7469         /*
7470          * Avoid opening and committing transactions if a leaf doesn't have
7471          * any root items that need to be fixed, so that we avoid rotating
7472          * backup roots unnecessarily.
7473          */
7474         if (need_trans) {
7475                 trans = btrfs_start_transaction(info->tree_root, 1);
7476                 if (IS_ERR(trans)) {
7477                         ret = PTR_ERR(trans);
7478                         goto out;
7479                 }
7480         }
7481
7482         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
7483                                 0, trans ? 1 : 0);
7484         if (ret < 0)
7485                 goto out;
7486         leaf = path->nodes[0];
7487
7488         while (1) {
7489                 struct btrfs_key found_key;
7490
7491                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
7492                         int no_more_keys = find_next_key(path, &key);
7493
7494                         btrfs_release_path(path);
7495                         if (trans) {
7496                                 ret = btrfs_commit_transaction(trans,
7497                                                                info->tree_root);
7498                                 trans = NULL;
7499                                 if (ret < 0)
7500                                         goto out;
7501                         }
7502                         need_trans = 0;
7503                         if (no_more_keys)
7504                                 break;
7505                         goto again;
7506                 }
7507
7508                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7509
7510                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
7511                         goto next;
7512
7513                 ret = maybe_repair_root_item(info, path, &found_key,
7514                                              trans ? 0 : 1);
7515                 if (ret < 0)
7516                         goto out;
7517                 if (ret) {
7518                         if (!trans && repair) {
7519                                 need_trans = 1;
7520                                 key = found_key;
7521                                 btrfs_release_path(path);
7522                                 goto again;
7523                         }
7524                         bad_roots++;
7525                 }
7526 next:
7527                 path->slots[0]++;
7528         }
7529         ret = 0;
7530 out:
7531         free_roots_info_cache();
7532         if (path)
7533                 btrfs_free_path(path);
7534         if (ret < 0)
7535                 return ret;
7536
7537         return bad_roots;
7538 }
7539
7540 static struct option long_options[] = {
7541         { "super", 1, NULL, 's' },
7542         { "repair", 0, NULL, 0 },
7543         { "init-csum-tree", 0, NULL, 0 },
7544         { "init-extent-tree", 0, NULL, 0 },
7545         { "check-data-csum", 0, NULL, 0 },
7546         { "backup", 0, NULL, 0 },
7547         { "subvol-extents", 1, NULL, 'E' },
7548         { "qgroup-report", 0, NULL, 'Q' },
7549         { NULL, 0, NULL, 0}
7550 };
7551
7552 const char * const cmd_check_usage[] = {
7553         "btrfs check [options] <device>",
7554         "Check an unmounted btrfs filesystem.",
7555         "",
7556         "-s|--super <superblock>     use this superblock copy",
7557         "-b|--backup                 use the backup root copy",
7558         "--repair                    try to repair the filesystem",
7559         "--init-csum-tree            create a new CRC tree",
7560         "--init-extent-tree          create a new extent tree",
7561         "--check-data-csum           verify checkums of data blocks",
7562         "--qgroup-report             print a report on qgroup consistency",
7563         "--subvol-extents <subvolid> print subvolume extents and sharing state",
7564         NULL
7565 };
7566
7567 int cmd_check(int argc, char **argv)
7568 {
7569         struct cache_tree root_cache;
7570         struct btrfs_root *root;
7571         struct btrfs_fs_info *info;
7572         u64 bytenr = 0;
7573         u64 subvolid = 0;
7574         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
7575         int ret;
7576         u64 num;
7577         int option_index = 0;
7578         int init_csum_tree = 0;
7579         int qgroup_report = 0;
7580         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
7581
7582         while(1) {
7583                 int c;
7584                 c = getopt_long(argc, argv, "as:b", long_options,
7585                                 &option_index);
7586                 if (c < 0)
7587                         break;
7588                 switch(c) {
7589                         case 'a': /* ignored */ break;
7590                         case 'b':
7591                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
7592                                 break;
7593                         case 's':
7594                                 num = arg_strtou64(optarg);
7595                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
7596                                         fprintf(stderr,
7597                                                 "ERROR: super mirror should be less than: %d\n",
7598                                                 BTRFS_SUPER_MIRROR_MAX);
7599                                         exit(1);
7600                                 }
7601                                 bytenr = btrfs_sb_offset(((int)num));
7602                                 printf("using SB copy %llu, bytenr %llu\n", num,
7603                                        (unsigned long long)bytenr);
7604                                 break;
7605                         case 'Q':
7606                                 qgroup_report = 1;
7607                                 break;
7608                         case 'E':
7609                                 subvolid = arg_strtou64(optarg);
7610                                 break;
7611                         case '?':
7612                         case 'h':
7613                                 usage(cmd_check_usage);
7614                 }
7615                 if (option_index == 1) {
7616                         printf("enabling repair mode\n");
7617                         repair = 1;
7618                         ctree_flags |= OPEN_CTREE_WRITES;
7619                 } else if (option_index == 2) {
7620                         printf("Creating a new CRC tree\n");
7621                         init_csum_tree = 1;
7622                         repair = 1;
7623                         ctree_flags |= OPEN_CTREE_WRITES;
7624                 } else if (option_index == 3) {
7625                         init_extent_tree = 1;
7626                         ctree_flags |= (OPEN_CTREE_WRITES |
7627                                         OPEN_CTREE_NO_BLOCK_GROUPS);
7628                         repair = 1;
7629                 } else if (option_index == 4) {
7630                         check_data_csum = 1;
7631                 }
7632         }
7633         argc = argc - optind;
7634
7635         if (check_argc_exact(argc, 1))
7636                 usage(cmd_check_usage);
7637
7638         radix_tree_init();
7639         cache_tree_init(&root_cache);
7640
7641         if((ret = check_mounted(argv[optind])) < 0) {
7642                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
7643                 goto err_out;
7644         } else if(ret) {
7645                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
7646                 ret = -EBUSY;
7647                 goto err_out;
7648         }
7649
7650         /* only allow partial opening under repair mode */
7651         if (repair)
7652                 ctree_flags |= OPEN_CTREE_PARTIAL;
7653
7654         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
7655         if (!info) {
7656                 fprintf(stderr, "Couldn't open file system\n");
7657                 ret = -EIO;
7658                 goto err_out;
7659         }
7660
7661         root = info->fs_root;
7662
7663         ret = repair_root_items(info);
7664         if (ret < 0)
7665                 goto close_out;
7666         if (repair) {
7667                 fprintf(stderr, "Fixed %d roots.\n", ret);
7668                 ret = 0;
7669         } else if (ret > 0) {
7670                 fprintf(stderr,
7671                        "Found %d roots with an outdated root item.\n",
7672                        ret);
7673                 fprintf(stderr,
7674                         "Please run a filesystem check with the option --repair to fix them.\n");
7675                 ret = 1;
7676                 goto close_out;
7677         }
7678
7679         /*
7680          * repair mode will force us to commit transaction which
7681          * will make us fail to load log tree when mounting.
7682          */
7683         if (repair && btrfs_super_log_root(info->super_copy)) {
7684                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
7685                 if (!ret) {
7686                         ret = 1;
7687                         goto close_out;
7688                 }
7689                 ret = zero_log_tree(root);
7690                 if (ret) {
7691                         fprintf(stderr, "fail to zero log tree\n");
7692                         goto close_out;
7693                 }
7694         }
7695
7696         uuid_unparse(info->super_copy->fsid, uuidbuf);
7697         if (qgroup_report) {
7698                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
7699                        uuidbuf);
7700                 ret = qgroup_verify_all(info);
7701                 if (ret == 0)
7702                         print_qgroup_report(1);
7703                 goto close_out;
7704         }
7705         if (subvolid) {
7706                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
7707                        subvolid, argv[optind], uuidbuf);
7708                 ret = print_extent_state(info, subvolid);
7709                 goto close_out;
7710         }
7711         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
7712
7713         if (!extent_buffer_uptodate(info->tree_root->node) ||
7714             !extent_buffer_uptodate(info->dev_root->node) ||
7715             !extent_buffer_uptodate(info->chunk_root->node)) {
7716                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
7717                 ret = -EIO;
7718                 goto close_out;
7719         }
7720
7721         if (init_extent_tree || init_csum_tree) {
7722                 struct btrfs_trans_handle *trans;
7723
7724                 trans = btrfs_start_transaction(info->extent_root, 0);
7725                 if (IS_ERR(trans)) {
7726                         fprintf(stderr, "Error starting transaction\n");
7727                         ret = PTR_ERR(trans);
7728                         goto close_out;
7729                 }
7730
7731                 if (init_extent_tree) {
7732                         printf("Creating a new extent tree\n");
7733                         ret = reinit_extent_tree(trans, info);
7734                         if (ret)
7735                                 goto close_out;
7736                 }
7737
7738                 if (init_csum_tree) {
7739                         fprintf(stderr, "Reinit crc root\n");
7740                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
7741                         if (ret) {
7742                                 fprintf(stderr, "crc root initialization failed\n");
7743                                 ret = -EIO;
7744                                 goto close_out;
7745                         }
7746
7747                         ret = fill_csum_tree(trans, info->csum_root);
7748                         if (ret) {
7749                                 fprintf(stderr, "crc refilling failed\n");
7750                                 return -EIO;
7751                         }
7752                 }
7753                 /*
7754                  * Ok now we commit and run the normal fsck, which will add
7755                  * extent entries for all of the items it finds.
7756                  */
7757                 ret = btrfs_commit_transaction(trans, info->extent_root);
7758                 if (ret)
7759                         goto close_out;
7760         }
7761         if (!extent_buffer_uptodate(info->extent_root->node)) {
7762                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
7763                 ret = -EIO;
7764                 goto close_out;
7765         }
7766         if (!extent_buffer_uptodate(info->csum_root->node)) {
7767                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
7768                 ret = -EIO;
7769                 goto close_out;
7770         }
7771
7772         fprintf(stderr, "checking extents\n");
7773         ret = check_chunks_and_extents(root);
7774         if (ret)
7775                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
7776
7777         fprintf(stderr, "checking free space cache\n");
7778         ret = check_space_cache(root);
7779         if (ret)
7780                 goto out;
7781
7782         /*
7783          * We used to have to have these hole extents in between our real
7784          * extents so if we don't have this flag set we need to make sure there
7785          * are no gaps in the file extents for inodes, otherwise we can just
7786          * ignore it when this happens.
7787          */
7788         no_holes = btrfs_fs_incompat(root->fs_info,
7789                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
7790         fprintf(stderr, "checking fs roots\n");
7791         ret = check_fs_roots(root, &root_cache);
7792         if (ret)
7793                 goto out;
7794
7795         fprintf(stderr, "checking csums\n");
7796         ret = check_csums(root);
7797         if (ret)
7798                 goto out;
7799
7800         fprintf(stderr, "checking root refs\n");
7801         ret = check_root_refs(root, &root_cache);
7802         if (ret)
7803                 goto out;
7804
7805         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
7806                 struct extent_buffer *eb;
7807
7808                 eb = list_first_entry(&root->fs_info->recow_ebs,
7809                                       struct extent_buffer, recow);
7810                 list_del_init(&eb->recow);
7811                 ret = recow_extent_buffer(root, eb);
7812                 if (ret)
7813                         break;
7814         }
7815
7816         while (!list_empty(&delete_items)) {
7817                 struct bad_item *bad;
7818
7819                 bad = list_first_entry(&delete_items, struct bad_item, list);
7820                 list_del_init(&bad->list);
7821                 if (repair)
7822                         ret = delete_bad_item(root, bad);
7823                 free(bad);
7824         }
7825
7826         if (info->quota_enabled) {
7827                 int err;
7828                 fprintf(stderr, "checking quota groups\n");
7829                 err = qgroup_verify_all(info);
7830                 if (err)
7831                         goto out;
7832         }
7833
7834         if (!list_empty(&root->fs_info->recow_ebs)) {
7835                 fprintf(stderr, "Transid errors in file system\n");
7836                 ret = 1;
7837         }
7838 out:
7839         print_qgroup_report(0);
7840         if (found_old_backref) { /*
7841                  * there was a disk format change when mixed
7842                  * backref was in testing tree. The old format
7843                  * existed about one week.
7844                  */
7845                 printf("\n * Found old mixed backref format. "
7846                        "The old format is not supported! *"
7847                        "\n * Please mount the FS in readonly mode, "
7848                        "backup data and re-format the FS. *\n\n");
7849                 ret = 1;
7850         }
7851         printf("found %llu bytes used err is %d\n",
7852                (unsigned long long)bytes_used, ret);
7853         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
7854         printf("total tree bytes: %llu\n",
7855                (unsigned long long)total_btree_bytes);
7856         printf("total fs tree bytes: %llu\n",
7857                (unsigned long long)total_fs_tree_bytes);
7858         printf("total extent tree bytes: %llu\n",
7859                (unsigned long long)total_extent_tree_bytes);
7860         printf("btree space waste bytes: %llu\n",
7861                (unsigned long long)btree_space_waste);
7862         printf("file data blocks allocated: %llu\n referenced %llu\n",
7863                 (unsigned long long)data_bytes_allocated,
7864                 (unsigned long long)data_bytes_referenced);
7865         printf("%s\n", BTRFS_BUILD_VERSION);
7866
7867         free_root_recs_tree(&root_cache);
7868 close_out:
7869         close_ctree(root);
7870 err_out:
7871         return ret;
7872 }