Btrfs-progs: reset balance before reiniting extent root
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41
42 static u64 bytes_used = 0;
43 static u64 total_csum_bytes = 0;
44 static u64 total_btree_bytes = 0;
45 static u64 total_fs_tree_bytes = 0;
46 static u64 total_extent_tree_bytes = 0;
47 static u64 btree_space_waste = 0;
48 static u64 data_bytes_allocated = 0;
49 static u64 data_bytes_referenced = 0;
50 static int found_old_backref = 0;
51 static LIST_HEAD(duplicate_extents);
52 static int repair = 0;
53
54 struct extent_backref {
55         struct list_head list;
56         unsigned int is_data:1;
57         unsigned int found_extent_tree:1;
58         unsigned int full_backref:1;
59         unsigned int found_ref:1;
60         unsigned int broken:1;
61 };
62
63 struct data_backref {
64         struct extent_backref node;
65         union {
66                 u64 parent;
67                 u64 root;
68         };
69         u64 owner;
70         u64 offset;
71         u64 disk_bytenr;
72         u64 bytes;
73         u64 ram_bytes;
74         u32 num_refs;
75         u32 found_ref;
76 };
77
78 struct tree_backref {
79         struct extent_backref node;
80         union {
81                 u64 parent;
82                 u64 root;
83         };
84 };
85
86 struct extent_record {
87         struct list_head backrefs;
88         struct list_head dups;
89         struct list_head list;
90         struct cache_extent cache;
91         struct btrfs_disk_key parent_key;
92         unsigned int found_rec;
93         u64 start;
94         u64 max_size;
95         u64 nr;
96         u64 refs;
97         u64 extent_item_refs;
98         u64 generation;
99         u64 info_objectid;
100         u64 num_duplicates;
101         u8 info_level;
102         unsigned int content_checked:1;
103         unsigned int owner_ref_checked:1;
104         unsigned int is_root:1;
105         unsigned int metadata:1;
106 };
107
108 struct inode_backref {
109         struct list_head list;
110         unsigned int found_dir_item:1;
111         unsigned int found_dir_index:1;
112         unsigned int found_inode_ref:1;
113         unsigned int filetype:8;
114         int errors;
115         unsigned int ref_type;
116         u64 dir;
117         u64 index;
118         u16 namelen;
119         char name[0];
120 };
121
122 struct dropping_root_item_record {
123         struct list_head list;
124         struct btrfs_root_item ri;
125         struct btrfs_key found_key;
126 };
127
128 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
129 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
130 #define REF_ERR_NO_INODE_REF            (1 << 2)
131 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
132 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
133 #define REF_ERR_DUP_INODE_REF           (1 << 5)
134 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
135 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
136 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
137 #define REF_ERR_NO_ROOT_REF             (1 << 9)
138 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
139 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
140 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
141
142 struct inode_record {
143         struct list_head backrefs;
144         unsigned int checked:1;
145         unsigned int merging:1;
146         unsigned int found_inode_item:1;
147         unsigned int found_dir_item:1;
148         unsigned int found_file_extent:1;
149         unsigned int found_csum_item:1;
150         unsigned int some_csum_missing:1;
151         unsigned int nodatasum:1;
152         int errors;
153
154         u64 ino;
155         u32 nlink;
156         u32 imode;
157         u64 isize;
158         u64 nbytes;
159
160         u32 found_link;
161         u64 found_size;
162         u64 extent_start;
163         u64 extent_end;
164         u64 first_extent_gap;
165
166         u32 refs;
167 };
168
169 #define I_ERR_NO_INODE_ITEM             (1 << 0)
170 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
171 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
172 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
173 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
174 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
175 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
176 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
177 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
178 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
179 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
180 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
181 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
182 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
183
184 struct root_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_back_ref:1;
189         unsigned int found_forward_ref:1;
190         unsigned int reachable:1;
191         int errors;
192         u64 ref_root;
193         u64 dir;
194         u64 index;
195         u16 namelen;
196         char name[0];
197 };
198
199 struct root_record {
200         struct list_head backrefs;
201         struct cache_extent cache;
202         unsigned int found_root_item:1;
203         u64 objectid;
204         u32 found_ref;
205 };
206
207 struct ptr_node {
208         struct cache_extent cache;
209         void *data;
210 };
211
212 struct shared_node {
213         struct cache_extent cache;
214         struct cache_tree root_cache;
215         struct cache_tree inode_cache;
216         struct inode_record *current;
217         u32 refs;
218 };
219
220 struct block_info {
221         u64 start;
222         u32 size;
223 };
224
225 struct walk_control {
226         struct cache_tree shared;
227         struct shared_node *nodes[BTRFS_MAX_LEVEL];
228         int active_node;
229         int root_level;
230 };
231
232 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
233
234 static u8 imode_to_type(u32 imode)
235 {
236 #define S_SHIFT 12
237         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
238                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
239                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
240                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
241                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
242                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
243                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
244                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
245         };
246
247         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
248 #undef S_SHIFT
249 }
250
251 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
252 {
253         struct device_record *rec1;
254         struct device_record *rec2;
255
256         rec1 = rb_entry(node1, struct device_record, node);
257         rec2 = rb_entry(node2, struct device_record, node);
258         if (rec1->devid > rec2->devid)
259                 return -1;
260         else if (rec1->devid < rec2->devid)
261                 return 1;
262         else
263                 return 0;
264 }
265
266 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
267 {
268         struct inode_record *rec;
269         struct inode_backref *backref;
270         struct inode_backref *orig;
271         size_t size;
272
273         rec = malloc(sizeof(*rec));
274         memcpy(rec, orig_rec, sizeof(*rec));
275         rec->refs = 1;
276         INIT_LIST_HEAD(&rec->backrefs);
277
278         list_for_each_entry(orig, &orig_rec->backrefs, list) {
279                 size = sizeof(*orig) + orig->namelen + 1;
280                 backref = malloc(size);
281                 memcpy(backref, orig, size);
282                 list_add_tail(&backref->list, &rec->backrefs);
283         }
284         return rec;
285 }
286
287 static void print_inode_error(int errors)
288 {
289         if (errors & I_ERR_NO_INODE_ITEM)
290                 fprintf(stderr, ", no inode item");
291         if (errors & I_ERR_NO_ORPHAN_ITEM)
292                 fprintf(stderr, ", no orphan item");
293         if (errors & I_ERR_DUP_INODE_ITEM)
294                 fprintf(stderr, ", dup inode item");
295         if (errors & I_ERR_DUP_DIR_INDEX)
296                 fprintf(stderr, ", dup dir index");
297         if (errors & I_ERR_ODD_DIR_ITEM)
298                 fprintf(stderr, ", odd dir item");
299         if (errors & I_ERR_ODD_FILE_EXTENT)
300                 fprintf(stderr, ", odd file extent");
301         if (errors & I_ERR_BAD_FILE_EXTENT)
302                 fprintf(stderr, ", bad file extent");
303         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
304                 fprintf(stderr, ", file extent overlap");
305         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
306                 fprintf(stderr, ", file extent discount");
307         if (errors & I_ERR_DIR_ISIZE_WRONG)
308                 fprintf(stderr, ", dir isize wrong");
309         if (errors & I_ERR_FILE_NBYTES_WRONG)
310                 fprintf(stderr, ", nbytes wrong");
311         if (errors & I_ERR_ODD_CSUM_ITEM)
312                 fprintf(stderr, ", odd csum item");
313         if (errors & I_ERR_SOME_CSUM_MISSING)
314                 fprintf(stderr, ", some csum missing");
315         if (errors & I_ERR_LINK_COUNT_WRONG)
316                 fprintf(stderr, ", link count wrong");
317         fprintf(stderr, "\n");
318 }
319
320 static void print_ref_error(int errors)
321 {
322         if (errors & REF_ERR_NO_DIR_ITEM)
323                 fprintf(stderr, ", no dir item");
324         if (errors & REF_ERR_NO_DIR_INDEX)
325                 fprintf(stderr, ", no dir index");
326         if (errors & REF_ERR_NO_INODE_REF)
327                 fprintf(stderr, ", no inode ref");
328         if (errors & REF_ERR_DUP_DIR_ITEM)
329                 fprintf(stderr, ", dup dir item");
330         if (errors & REF_ERR_DUP_DIR_INDEX)
331                 fprintf(stderr, ", dup dir index");
332         if (errors & REF_ERR_DUP_INODE_REF)
333                 fprintf(stderr, ", dup inode ref");
334         if (errors & REF_ERR_INDEX_UNMATCH)
335                 fprintf(stderr, ", index unmatch");
336         if (errors & REF_ERR_FILETYPE_UNMATCH)
337                 fprintf(stderr, ", filetype unmatch");
338         if (errors & REF_ERR_NAME_TOO_LONG)
339                 fprintf(stderr, ", name too long");
340         if (errors & REF_ERR_NO_ROOT_REF)
341                 fprintf(stderr, ", no root ref");
342         if (errors & REF_ERR_NO_ROOT_BACKREF)
343                 fprintf(stderr, ", no root backref");
344         if (errors & REF_ERR_DUP_ROOT_REF)
345                 fprintf(stderr, ", dup root ref");
346         if (errors & REF_ERR_DUP_ROOT_BACKREF)
347                 fprintf(stderr, ", dup root backref");
348         fprintf(stderr, "\n");
349 }
350
351 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
352                                           u64 ino, int mod)
353 {
354         struct ptr_node *node;
355         struct cache_extent *cache;
356         struct inode_record *rec = NULL;
357         int ret;
358
359         cache = lookup_cache_extent(inode_cache, ino, 1);
360         if (cache) {
361                 node = container_of(cache, struct ptr_node, cache);
362                 rec = node->data;
363                 if (mod && rec->refs > 1) {
364                         node->data = clone_inode_rec(rec);
365                         rec->refs--;
366                         rec = node->data;
367                 }
368         } else if (mod) {
369                 rec = calloc(1, sizeof(*rec));
370                 rec->ino = ino;
371                 rec->extent_start = (u64)-1;
372                 rec->first_extent_gap = (u64)-1;
373                 rec->refs = 1;
374                 INIT_LIST_HEAD(&rec->backrefs);
375
376                 node = malloc(sizeof(*node));
377                 node->cache.start = ino;
378                 node->cache.size = 1;
379                 node->data = rec;
380
381                 if (ino == BTRFS_FREE_INO_OBJECTID)
382                         rec->found_link = 1;
383
384                 ret = insert_cache_extent(inode_cache, &node->cache);
385                 BUG_ON(ret);
386         }
387         return rec;
388 }
389
390 static void free_inode_rec(struct inode_record *rec)
391 {
392         struct inode_backref *backref;
393
394         if (--rec->refs > 0)
395                 return;
396
397         while (!list_empty(&rec->backrefs)) {
398                 backref = list_entry(rec->backrefs.next,
399                                      struct inode_backref, list);
400                 list_del(&backref->list);
401                 free(backref);
402         }
403         free(rec);
404 }
405
406 static int can_free_inode_rec(struct inode_record *rec)
407 {
408         if (!rec->errors && rec->checked && rec->found_inode_item &&
409             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
410                 return 1;
411         return 0;
412 }
413
414 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
415                                  struct inode_record *rec)
416 {
417         struct cache_extent *cache;
418         struct inode_backref *tmp, *backref;
419         struct ptr_node *node;
420         unsigned char filetype;
421
422         if (!rec->found_inode_item)
423                 return;
424
425         filetype = imode_to_type(rec->imode);
426         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
427                 if (backref->found_dir_item && backref->found_dir_index) {
428                         if (backref->filetype != filetype)
429                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
430                         if (!backref->errors && backref->found_inode_ref) {
431                                 list_del(&backref->list);
432                                 free(backref);
433                         }
434                 }
435         }
436
437         if (!rec->checked || rec->merging)
438                 return;
439
440         if (S_ISDIR(rec->imode)) {
441                 if (rec->found_size != rec->isize)
442                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
443                 if (rec->found_file_extent)
444                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
445         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
446                 if (rec->found_dir_item)
447                         rec->errors |= I_ERR_ODD_DIR_ITEM;
448                 if (rec->found_size != rec->nbytes)
449                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
450                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
451                         rec->first_extent_gap = 0;
452                 if (rec->nlink > 0 && (rec->extent_end < rec->isize ||
453                     rec->first_extent_gap < rec->isize))
454                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
455         }
456
457         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
458                 if (rec->found_csum_item && rec->nodatasum)
459                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
460                 if (rec->some_csum_missing && !rec->nodatasum)
461                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
462         }
463
464         BUG_ON(rec->refs != 1);
465         if (can_free_inode_rec(rec)) {
466                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
467                 node = container_of(cache, struct ptr_node, cache);
468                 BUG_ON(node->data != rec);
469                 remove_cache_extent(inode_cache, &node->cache);
470                 free(node);
471                 free_inode_rec(rec);
472         }
473 }
474
475 static int check_orphan_item(struct btrfs_root *root, u64 ino)
476 {
477         struct btrfs_path path;
478         struct btrfs_key key;
479         int ret;
480
481         key.objectid = BTRFS_ORPHAN_OBJECTID;
482         key.type = BTRFS_ORPHAN_ITEM_KEY;
483         key.offset = ino;
484
485         btrfs_init_path(&path);
486         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
487         btrfs_release_path(&path);
488         if (ret > 0)
489                 ret = -ENOENT;
490         return ret;
491 }
492
493 static int process_inode_item(struct extent_buffer *eb,
494                               int slot, struct btrfs_key *key,
495                               struct shared_node *active_node)
496 {
497         struct inode_record *rec;
498         struct btrfs_inode_item *item;
499
500         rec = active_node->current;
501         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
502         if (rec->found_inode_item) {
503                 rec->errors |= I_ERR_DUP_INODE_ITEM;
504                 return 1;
505         }
506         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
507         rec->nlink = btrfs_inode_nlink(eb, item);
508         rec->isize = btrfs_inode_size(eb, item);
509         rec->nbytes = btrfs_inode_nbytes(eb, item);
510         rec->imode = btrfs_inode_mode(eb, item);
511         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
512                 rec->nodatasum = 1;
513         rec->found_inode_item = 1;
514         if (rec->nlink == 0)
515                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
516         maybe_free_inode_rec(&active_node->inode_cache, rec);
517         return 0;
518 }
519
520 static struct inode_backref *get_inode_backref(struct inode_record *rec,
521                                                 const char *name,
522                                                 int namelen, u64 dir)
523 {
524         struct inode_backref *backref;
525
526         list_for_each_entry(backref, &rec->backrefs, list) {
527                 if (backref->dir != dir || backref->namelen != namelen)
528                         continue;
529                 if (memcmp(name, backref->name, namelen))
530                         continue;
531                 return backref;
532         }
533
534         backref = malloc(sizeof(*backref) + namelen + 1);
535         memset(backref, 0, sizeof(*backref));
536         backref->dir = dir;
537         backref->namelen = namelen;
538         memcpy(backref->name, name, namelen);
539         backref->name[namelen] = '\0';
540         list_add_tail(&backref->list, &rec->backrefs);
541         return backref;
542 }
543
544 static int add_inode_backref(struct cache_tree *inode_cache,
545                              u64 ino, u64 dir, u64 index,
546                              const char *name, int namelen,
547                              int filetype, int itemtype, int errors)
548 {
549         struct inode_record *rec;
550         struct inode_backref *backref;
551
552         rec = get_inode_rec(inode_cache, ino, 1);
553         backref = get_inode_backref(rec, name, namelen, dir);
554         if (errors)
555                 backref->errors |= errors;
556         if (itemtype == BTRFS_DIR_INDEX_KEY) {
557                 if (backref->found_dir_index)
558                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
559                 if (backref->found_inode_ref && backref->index != index)
560                         backref->errors |= REF_ERR_INDEX_UNMATCH;
561                 if (backref->found_dir_item && backref->filetype != filetype)
562                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
563
564                 backref->index = index;
565                 backref->filetype = filetype;
566                 backref->found_dir_index = 1;
567         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
568                 rec->found_link++;
569                 if (backref->found_dir_item)
570                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
571                 if (backref->found_dir_index && backref->filetype != filetype)
572                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
573
574                 backref->filetype = filetype;
575                 backref->found_dir_item = 1;
576         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
577                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
578                 if (backref->found_inode_ref)
579                         backref->errors |= REF_ERR_DUP_INODE_REF;
580                 if (backref->found_dir_index && backref->index != index)
581                         backref->errors |= REF_ERR_INDEX_UNMATCH;
582
583                 backref->ref_type = itemtype;
584                 backref->index = index;
585                 backref->found_inode_ref = 1;
586         } else {
587                 BUG_ON(1);
588         }
589
590         maybe_free_inode_rec(inode_cache, rec);
591         return 0;
592 }
593
594 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
595                             struct cache_tree *dst_cache)
596 {
597         struct inode_backref *backref;
598         u32 dir_count = 0;
599
600         dst->merging = 1;
601         list_for_each_entry(backref, &src->backrefs, list) {
602                 if (backref->found_dir_index) {
603                         add_inode_backref(dst_cache, dst->ino, backref->dir,
604                                         backref->index, backref->name,
605                                         backref->namelen, backref->filetype,
606                                         BTRFS_DIR_INDEX_KEY, backref->errors);
607                 }
608                 if (backref->found_dir_item) {
609                         dir_count++;
610                         add_inode_backref(dst_cache, dst->ino,
611                                         backref->dir, 0, backref->name,
612                                         backref->namelen, backref->filetype,
613                                         BTRFS_DIR_ITEM_KEY, backref->errors);
614                 }
615                 if (backref->found_inode_ref) {
616                         add_inode_backref(dst_cache, dst->ino,
617                                         backref->dir, backref->index,
618                                         backref->name, backref->namelen, 0,
619                                         backref->ref_type, backref->errors);
620                 }
621         }
622
623         if (src->found_dir_item)
624                 dst->found_dir_item = 1;
625         if (src->found_file_extent)
626                 dst->found_file_extent = 1;
627         if (src->found_csum_item)
628                 dst->found_csum_item = 1;
629         if (src->some_csum_missing)
630                 dst->some_csum_missing = 1;
631         if (dst->first_extent_gap > src->first_extent_gap)
632                 dst->first_extent_gap = src->first_extent_gap;
633
634         BUG_ON(src->found_link < dir_count);
635         dst->found_link += src->found_link - dir_count;
636         dst->found_size += src->found_size;
637         if (src->extent_start != (u64)-1) {
638                 if (dst->extent_start == (u64)-1) {
639                         dst->extent_start = src->extent_start;
640                         dst->extent_end = src->extent_end;
641                 } else {
642                         if (dst->extent_end > src->extent_start)
643                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
644                         else if (dst->extent_end < src->extent_start &&
645                                  dst->extent_end < dst->first_extent_gap)
646                                 dst->first_extent_gap = dst->extent_end;
647                         if (dst->extent_end < src->extent_end)
648                                 dst->extent_end = src->extent_end;
649                 }
650         }
651
652         dst->errors |= src->errors;
653         if (src->found_inode_item) {
654                 if (!dst->found_inode_item) {
655                         dst->nlink = src->nlink;
656                         dst->isize = src->isize;
657                         dst->nbytes = src->nbytes;
658                         dst->imode = src->imode;
659                         dst->nodatasum = src->nodatasum;
660                         dst->found_inode_item = 1;
661                 } else {
662                         dst->errors |= I_ERR_DUP_INODE_ITEM;
663                 }
664         }
665         dst->merging = 0;
666
667         return 0;
668 }
669
670 static int splice_shared_node(struct shared_node *src_node,
671                               struct shared_node *dst_node)
672 {
673         struct cache_extent *cache;
674         struct ptr_node *node, *ins;
675         struct cache_tree *src, *dst;
676         struct inode_record *rec, *conflict;
677         u64 current_ino = 0;
678         int splice = 0;
679         int ret;
680
681         if (--src_node->refs == 0)
682                 splice = 1;
683         if (src_node->current)
684                 current_ino = src_node->current->ino;
685
686         src = &src_node->root_cache;
687         dst = &dst_node->root_cache;
688 again:
689         cache = search_cache_extent(src, 0);
690         while (cache) {
691                 node = container_of(cache, struct ptr_node, cache);
692                 rec = node->data;
693                 cache = next_cache_extent(cache);
694
695                 if (splice) {
696                         remove_cache_extent(src, &node->cache);
697                         ins = node;
698                 } else {
699                         ins = malloc(sizeof(*ins));
700                         ins->cache.start = node->cache.start;
701                         ins->cache.size = node->cache.size;
702                         ins->data = rec;
703                         rec->refs++;
704                 }
705                 ret = insert_cache_extent(dst, &ins->cache);
706                 if (ret == -EEXIST) {
707                         conflict = get_inode_rec(dst, rec->ino, 1);
708                         merge_inode_recs(rec, conflict, dst);
709                         if (rec->checked) {
710                                 conflict->checked = 1;
711                                 if (dst_node->current == conflict)
712                                         dst_node->current = NULL;
713                         }
714                         maybe_free_inode_rec(dst, conflict);
715                         free_inode_rec(rec);
716                         free(ins);
717                 } else {
718                         BUG_ON(ret);
719                 }
720         }
721
722         if (src == &src_node->root_cache) {
723                 src = &src_node->inode_cache;
724                 dst = &dst_node->inode_cache;
725                 goto again;
726         }
727
728         if (current_ino > 0 && (!dst_node->current ||
729             current_ino > dst_node->current->ino)) {
730                 if (dst_node->current) {
731                         dst_node->current->checked = 1;
732                         maybe_free_inode_rec(dst, dst_node->current);
733                 }
734                 dst_node->current = get_inode_rec(dst, current_ino, 1);
735         }
736         return 0;
737 }
738
739 static void free_inode_ptr(struct cache_extent *cache)
740 {
741         struct ptr_node *node;
742         struct inode_record *rec;
743
744         node = container_of(cache, struct ptr_node, cache);
745         rec = node->data;
746         free_inode_rec(rec);
747         free(node);
748 }
749
750 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
751
752 static struct shared_node *find_shared_node(struct cache_tree *shared,
753                                             u64 bytenr)
754 {
755         struct cache_extent *cache;
756         struct shared_node *node;
757
758         cache = lookup_cache_extent(shared, bytenr, 1);
759         if (cache) {
760                 node = container_of(cache, struct shared_node, cache);
761                 return node;
762         }
763         return NULL;
764 }
765
766 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
767 {
768         int ret;
769         struct shared_node *node;
770
771         node = calloc(1, sizeof(*node));
772         node->cache.start = bytenr;
773         node->cache.size = 1;
774         cache_tree_init(&node->root_cache);
775         cache_tree_init(&node->inode_cache);
776         node->refs = refs;
777
778         ret = insert_cache_extent(shared, &node->cache);
779         BUG_ON(ret);
780         return 0;
781 }
782
783 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
784                              struct walk_control *wc, int level)
785 {
786         struct shared_node *node;
787         struct shared_node *dest;
788
789         if (level == wc->active_node)
790                 return 0;
791
792         BUG_ON(wc->active_node <= level);
793         node = find_shared_node(&wc->shared, bytenr);
794         if (!node) {
795                 add_shared_node(&wc->shared, bytenr, refs);
796                 node = find_shared_node(&wc->shared, bytenr);
797                 wc->nodes[level] = node;
798                 wc->active_node = level;
799                 return 0;
800         }
801
802         if (wc->root_level == wc->active_node &&
803             btrfs_root_refs(&root->root_item) == 0) {
804                 if (--node->refs == 0) {
805                         free_inode_recs_tree(&node->root_cache);
806                         free_inode_recs_tree(&node->inode_cache);
807                         remove_cache_extent(&wc->shared, &node->cache);
808                         free(node);
809                 }
810                 return 1;
811         }
812
813         dest = wc->nodes[wc->active_node];
814         splice_shared_node(node, dest);
815         if (node->refs == 0) {
816                 remove_cache_extent(&wc->shared, &node->cache);
817                 free(node);
818         }
819         return 1;
820 }
821
822 static int leave_shared_node(struct btrfs_root *root,
823                              struct walk_control *wc, int level)
824 {
825         struct shared_node *node;
826         struct shared_node *dest;
827         int i;
828
829         if (level == wc->root_level)
830                 return 0;
831
832         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
833                 if (wc->nodes[i])
834                         break;
835         }
836         BUG_ON(i >= BTRFS_MAX_LEVEL);
837
838         node = wc->nodes[wc->active_node];
839         wc->nodes[wc->active_node] = NULL;
840         wc->active_node = i;
841
842         dest = wc->nodes[wc->active_node];
843         if (wc->active_node < wc->root_level ||
844             btrfs_root_refs(&root->root_item) > 0) {
845                 BUG_ON(node->refs <= 1);
846                 splice_shared_node(node, dest);
847         } else {
848                 BUG_ON(node->refs < 2);
849                 node->refs--;
850         }
851         return 0;
852 }
853
854 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
855                          u64 child_root_id)
856 {
857         struct btrfs_path path;
858         struct btrfs_key key;
859         struct extent_buffer *leaf;
860         int has_parent = 0;
861         int ret;
862
863         btrfs_init_path(&path);
864
865         key.objectid = parent_root_id;
866         key.type = BTRFS_ROOT_REF_KEY;
867         key.offset = child_root_id;
868         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
869                                 0, 0);
870         BUG_ON(ret < 0);
871         btrfs_release_path(&path);
872         if (!ret)
873                 return 1;
874
875         key.objectid = child_root_id;
876         key.type = BTRFS_ROOT_BACKREF_KEY;
877         key.offset = 0;
878         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
879                                 0, 0);
880         BUG_ON(ret <= 0);
881
882         while (1) {
883                 leaf = path.nodes[0];
884                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
885                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
886                         BUG_ON(ret < 0);
887
888                         if (ret > 0)
889                                 break;
890                         leaf = path.nodes[0];
891                 }
892
893                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
894                 if (key.objectid != child_root_id ||
895                     key.type != BTRFS_ROOT_BACKREF_KEY)
896                         break;
897
898                 has_parent = 1;
899
900                 if (key.offset == parent_root_id) {
901                         btrfs_release_path(&path);
902                         return 1;
903                 }
904
905                 path.slots[0]++;
906         }
907
908         btrfs_release_path(&path);
909         return has_parent? 0 : -1;
910 }
911
912 static int process_dir_item(struct btrfs_root *root,
913                             struct extent_buffer *eb,
914                             int slot, struct btrfs_key *key,
915                             struct shared_node *active_node)
916 {
917         u32 total;
918         u32 cur = 0;
919         u32 len;
920         u32 name_len;
921         u32 data_len;
922         int error;
923         int nritems = 0;
924         int filetype;
925         struct btrfs_dir_item *di;
926         struct inode_record *rec;
927         struct cache_tree *root_cache;
928         struct cache_tree *inode_cache;
929         struct btrfs_key location;
930         char namebuf[BTRFS_NAME_LEN];
931
932         root_cache = &active_node->root_cache;
933         inode_cache = &active_node->inode_cache;
934         rec = active_node->current;
935         rec->found_dir_item = 1;
936
937         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
938         total = btrfs_item_size_nr(eb, slot);
939         while (cur < total) {
940                 nritems++;
941                 btrfs_dir_item_key_to_cpu(eb, di, &location);
942                 name_len = btrfs_dir_name_len(eb, di);
943                 data_len = btrfs_dir_data_len(eb, di);
944                 filetype = btrfs_dir_type(eb, di);
945
946                 rec->found_size += name_len;
947                 if (name_len <= BTRFS_NAME_LEN) {
948                         len = name_len;
949                         error = 0;
950                 } else {
951                         len = BTRFS_NAME_LEN;
952                         error = REF_ERR_NAME_TOO_LONG;
953                 }
954                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
955
956                 if (location.type == BTRFS_INODE_ITEM_KEY) {
957                         add_inode_backref(inode_cache, location.objectid,
958                                           key->objectid, key->offset, namebuf,
959                                           len, filetype, key->type, error);
960                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
961                         add_inode_backref(root_cache, location.objectid,
962                                           key->objectid, key->offset,
963                                           namebuf, len, filetype,
964                                           key->type, error);
965                 } else {
966                         fprintf(stderr, "warning line %d\n", __LINE__);
967                 }
968
969                 len = sizeof(*di) + name_len + data_len;
970                 di = (struct btrfs_dir_item *)((char *)di + len);
971                 cur += len;
972         }
973         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
974                 rec->errors |= I_ERR_DUP_DIR_INDEX;
975
976         return 0;
977 }
978
979 static int process_inode_ref(struct extent_buffer *eb,
980                              int slot, struct btrfs_key *key,
981                              struct shared_node *active_node)
982 {
983         u32 total;
984         u32 cur = 0;
985         u32 len;
986         u32 name_len;
987         u64 index;
988         int error;
989         struct cache_tree *inode_cache;
990         struct btrfs_inode_ref *ref;
991         char namebuf[BTRFS_NAME_LEN];
992
993         inode_cache = &active_node->inode_cache;
994
995         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
996         total = btrfs_item_size_nr(eb, slot);
997         while (cur < total) {
998                 name_len = btrfs_inode_ref_name_len(eb, ref);
999                 index = btrfs_inode_ref_index(eb, ref);
1000                 if (name_len <= BTRFS_NAME_LEN) {
1001                         len = name_len;
1002                         error = 0;
1003                 } else {
1004                         len = BTRFS_NAME_LEN;
1005                         error = REF_ERR_NAME_TOO_LONG;
1006                 }
1007                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1008                 add_inode_backref(inode_cache, key->objectid, key->offset,
1009                                   index, namebuf, len, 0, key->type, error);
1010
1011                 len = sizeof(*ref) + name_len;
1012                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1013                 cur += len;
1014         }
1015         return 0;
1016 }
1017
1018 static int process_inode_extref(struct extent_buffer *eb,
1019                                 int slot, struct btrfs_key *key,
1020                                 struct shared_node *active_node)
1021 {
1022         u32 total;
1023         u32 cur = 0;
1024         u32 len;
1025         u32 name_len;
1026         u64 index;
1027         u64 parent;
1028         int error;
1029         struct cache_tree *inode_cache;
1030         struct btrfs_inode_extref *extref;
1031         char namebuf[BTRFS_NAME_LEN];
1032
1033         inode_cache = &active_node->inode_cache;
1034
1035         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1036         total = btrfs_item_size_nr(eb, slot);
1037         while (cur < total) {
1038                 name_len = btrfs_inode_extref_name_len(eb, extref);
1039                 index = btrfs_inode_extref_index(eb, extref);
1040                 parent = btrfs_inode_extref_parent(eb, extref);
1041                 if (name_len <= BTRFS_NAME_LEN) {
1042                         len = name_len;
1043                         error = 0;
1044                 } else {
1045                         len = BTRFS_NAME_LEN;
1046                         error = REF_ERR_NAME_TOO_LONG;
1047                 }
1048                 read_extent_buffer(eb, namebuf,
1049                                    (unsigned long)(extref + 1), len);
1050                 add_inode_backref(inode_cache, key->objectid, parent,
1051                                   index, namebuf, len, 0, key->type, error);
1052
1053                 len = sizeof(*extref) + name_len;
1054                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1055                 cur += len;
1056         }
1057         return 0;
1058
1059 }
1060
1061 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
1062 {
1063         struct btrfs_key key;
1064         struct btrfs_path path;
1065         struct extent_buffer *leaf;
1066         int ret ;
1067         size_t size;
1068         u64 found = 0;
1069         u64 csum_end;
1070         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1071
1072         btrfs_init_path(&path);
1073
1074         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1075         key.offset = start;
1076         key.type = BTRFS_EXTENT_CSUM_KEY;
1077
1078         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1079                                 &key, &path, 0, 0);
1080         BUG_ON(ret < 0);
1081         if (ret > 0 && path.slots[0] > 0) {
1082                 leaf = path.nodes[0];
1083                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1084                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1085                     key.type == BTRFS_EXTENT_CSUM_KEY)
1086                         path.slots[0]--;
1087         }
1088
1089         while (len > 0) {
1090                 leaf = path.nodes[0];
1091                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1092                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1093                         BUG_ON(ret < 0);
1094                         if (ret > 0)
1095                                 break;
1096                         leaf = path.nodes[0];
1097                 }
1098
1099                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1100                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1101                     key.type != BTRFS_EXTENT_CSUM_KEY)
1102                         break;
1103
1104                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1105                 if (key.offset >= start + len)
1106                         break;
1107
1108                 if (key.offset > start)
1109                         start = key.offset;
1110
1111                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1112                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1113                 if (csum_end > start) {
1114                         size = min(csum_end - start, len);
1115                         len -= size;
1116                         start += size;
1117                         found += size;
1118                 }
1119
1120                 path.slots[0]++;
1121         }
1122         btrfs_release_path(&path);
1123         return found;
1124 }
1125
1126 static int process_file_extent(struct btrfs_root *root,
1127                                 struct extent_buffer *eb,
1128                                 int slot, struct btrfs_key *key,
1129                                 struct shared_node *active_node)
1130 {
1131         struct inode_record *rec;
1132         struct btrfs_file_extent_item *fi;
1133         u64 num_bytes = 0;
1134         u64 disk_bytenr = 0;
1135         u64 extent_offset = 0;
1136         u64 mask = root->sectorsize - 1;
1137         int extent_type;
1138
1139         rec = active_node->current;
1140         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1141         rec->found_file_extent = 1;
1142
1143         if (rec->extent_start == (u64)-1) {
1144                 rec->extent_start = key->offset;
1145                 rec->extent_end = key->offset;
1146         }
1147
1148         if (rec->extent_end > key->offset)
1149                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1150         else if (rec->extent_end < key->offset &&
1151                  rec->extent_end < rec->first_extent_gap)
1152                 rec->first_extent_gap = rec->extent_end;
1153
1154         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1155         extent_type = btrfs_file_extent_type(eb, fi);
1156
1157         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1158                 num_bytes = btrfs_file_extent_inline_len(eb, fi);
1159                 if (num_bytes == 0)
1160                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1161                 rec->found_size += num_bytes;
1162                 num_bytes = (num_bytes + mask) & ~mask;
1163         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1164                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1165                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1166                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1167                 extent_offset = btrfs_file_extent_offset(eb, fi);
1168                 if (num_bytes == 0 || (num_bytes & mask))
1169                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1170                 if (num_bytes + extent_offset >
1171                     btrfs_file_extent_ram_bytes(eb, fi))
1172                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1173                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1174                     (btrfs_file_extent_compression(eb, fi) ||
1175                      btrfs_file_extent_encryption(eb, fi) ||
1176                      btrfs_file_extent_other_encoding(eb, fi)))
1177                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1178                 if (disk_bytenr > 0)
1179                         rec->found_size += num_bytes;
1180         } else {
1181                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1182         }
1183         rec->extent_end = key->offset + num_bytes;
1184
1185         if (disk_bytenr > 0) {
1186                 u64 found;
1187                 if (btrfs_file_extent_compression(eb, fi))
1188                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1189                 else
1190                         disk_bytenr += extent_offset;
1191
1192                 found = count_csum_range(root, disk_bytenr, num_bytes);
1193                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1194                         if (found > 0)
1195                                 rec->found_csum_item = 1;
1196                         if (found < num_bytes)
1197                                 rec->some_csum_missing = 1;
1198                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1199                         if (found > 0)
1200                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1201                 }
1202         }
1203         return 0;
1204 }
1205
1206 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1207                             struct walk_control *wc)
1208 {
1209         struct btrfs_key key;
1210         u32 nritems;
1211         int i;
1212         int ret = 0;
1213         struct cache_tree *inode_cache;
1214         struct shared_node *active_node;
1215
1216         if (wc->root_level == wc->active_node &&
1217             btrfs_root_refs(&root->root_item) == 0)
1218                 return 0;
1219
1220         active_node = wc->nodes[wc->active_node];
1221         inode_cache = &active_node->inode_cache;
1222         nritems = btrfs_header_nritems(eb);
1223         for (i = 0; i < nritems; i++) {
1224                 btrfs_item_key_to_cpu(eb, &key, i);
1225
1226                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1227                         continue;
1228
1229                 if (active_node->current == NULL ||
1230                     active_node->current->ino < key.objectid) {
1231                         if (active_node->current) {
1232                                 active_node->current->checked = 1;
1233                                 maybe_free_inode_rec(inode_cache,
1234                                                      active_node->current);
1235                         }
1236                         active_node->current = get_inode_rec(inode_cache,
1237                                                              key.objectid, 1);
1238                 }
1239                 switch (key.type) {
1240                 case BTRFS_DIR_ITEM_KEY:
1241                 case BTRFS_DIR_INDEX_KEY:
1242                         ret = process_dir_item(root, eb, i, &key, active_node);
1243                         break;
1244                 case BTRFS_INODE_REF_KEY:
1245                         ret = process_inode_ref(eb, i, &key, active_node);
1246                         break;
1247                 case BTRFS_INODE_EXTREF_KEY:
1248                         ret = process_inode_extref(eb, i, &key, active_node);
1249                         break;
1250                 case BTRFS_INODE_ITEM_KEY:
1251                         ret = process_inode_item(eb, i, &key, active_node);
1252                         break;
1253                 case BTRFS_EXTENT_DATA_KEY:
1254                         ret = process_file_extent(root, eb, i, &key,
1255                                                   active_node);
1256                         break;
1257                 default:
1258                         break;
1259                 };
1260         }
1261         return ret;
1262 }
1263
1264 static void reada_walk_down(struct btrfs_root *root,
1265                             struct extent_buffer *node, int slot)
1266 {
1267         u64 bytenr;
1268         u64 ptr_gen;
1269         u32 nritems;
1270         u32 blocksize;
1271         int i;
1272         int ret;
1273         int level;
1274
1275         level = btrfs_header_level(node);
1276         if (level != 1)
1277                 return;
1278
1279         nritems = btrfs_header_nritems(node);
1280         blocksize = btrfs_level_size(root, level - 1);
1281         for (i = slot; i < nritems; i++) {
1282                 bytenr = btrfs_node_blockptr(node, i);
1283                 ptr_gen = btrfs_node_ptr_generation(node, i);
1284                 ret = readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1285                 if (ret)
1286                         break;
1287         }
1288 }
1289
1290 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1291                           struct walk_control *wc, int *level)
1292 {
1293         u64 bytenr;
1294         u64 ptr_gen;
1295         struct extent_buffer *next;
1296         struct extent_buffer *cur;
1297         u32 blocksize;
1298         int ret, err = 0;
1299         u64 refs;
1300
1301         WARN_ON(*level < 0);
1302         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1303         ret = btrfs_lookup_extent_info(NULL, root,
1304                                        path->nodes[*level]->start,
1305                                        *level, 1, &refs, NULL);
1306         if (ret < 0) {
1307                 err = ret;
1308                 goto out;
1309         }
1310
1311         if (refs > 1) {
1312                 ret = enter_shared_node(root, path->nodes[*level]->start,
1313                                         refs, wc, *level);
1314                 if (ret > 0) {
1315                         err = ret;
1316                         goto out;
1317                 }
1318         }
1319
1320         while (*level >= 0) {
1321                 WARN_ON(*level < 0);
1322                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1323                 cur = path->nodes[*level];
1324
1325                 if (btrfs_header_level(cur) != *level)
1326                         WARN_ON(1);
1327
1328                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1329                         break;
1330                 if (*level == 0) {
1331                         ret = process_one_leaf(root, cur, wc);
1332                         break;
1333                 }
1334                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1335                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1336                 blocksize = btrfs_level_size(root, *level - 1);
1337                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1338                                                1, &refs, NULL);
1339                 if (ret < 0)
1340                         refs = 0;
1341
1342                 if (refs > 1) {
1343                         ret = enter_shared_node(root, bytenr, refs,
1344                                                 wc, *level - 1);
1345                         if (ret > 0) {
1346                                 path->slots[*level]++;
1347                                 continue;
1348                         }
1349                 }
1350
1351                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1352                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1353                         free_extent_buffer(next);
1354                         reada_walk_down(root, cur, path->slots[*level]);
1355                         next = read_tree_block(root, bytenr, blocksize,
1356                                                ptr_gen);
1357                         if (!next) {
1358                                 err = -EIO;
1359                                 goto out;
1360                         }
1361                 }
1362
1363                 *level = *level - 1;
1364                 free_extent_buffer(path->nodes[*level]);
1365                 path->nodes[*level] = next;
1366                 path->slots[*level] = 0;
1367         }
1368 out:
1369         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1370         return err;
1371 }
1372
1373 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1374                         struct walk_control *wc, int *level)
1375 {
1376         int i;
1377         struct extent_buffer *leaf;
1378
1379         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1380                 leaf = path->nodes[i];
1381                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1382                         path->slots[i]++;
1383                         *level = i;
1384                         return 0;
1385                 } else {
1386                         free_extent_buffer(path->nodes[*level]);
1387                         path->nodes[*level] = NULL;
1388                         BUG_ON(*level > wc->active_node);
1389                         if (*level == wc->active_node)
1390                                 leave_shared_node(root, wc, *level);
1391                         *level = i + 1;
1392                 }
1393         }
1394         return 1;
1395 }
1396
1397 static int check_root_dir(struct inode_record *rec)
1398 {
1399         struct inode_backref *backref;
1400         int ret = -1;
1401
1402         if (!rec->found_inode_item || rec->errors)
1403                 goto out;
1404         if (rec->nlink != 1 || rec->found_link != 0)
1405                 goto out;
1406         if (list_empty(&rec->backrefs))
1407                 goto out;
1408         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1409         if (!backref->found_inode_ref)
1410                 goto out;
1411         if (backref->index != 0 || backref->namelen != 2 ||
1412             memcmp(backref->name, "..", 2))
1413                 goto out;
1414         if (backref->found_dir_index || backref->found_dir_item)
1415                 goto out;
1416         ret = 0;
1417 out:
1418         return ret;
1419 }
1420
1421 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1422 {
1423         struct btrfs_trans_handle *trans;
1424         struct btrfs_path *path;
1425         struct btrfs_inode_item *ei;
1426         struct btrfs_key key;
1427         int ret;
1428
1429         /* So far we just fix dir isize wrong */
1430         if (!(rec->errors & I_ERR_DIR_ISIZE_WRONG))
1431                 return 1;
1432
1433         path = btrfs_alloc_path();
1434         if (!path)
1435                 return -ENOMEM;
1436
1437         trans = btrfs_start_transaction(root, 1);
1438         if (IS_ERR(trans)) {
1439                 btrfs_free_path(path);
1440                 return PTR_ERR(trans);
1441         }
1442
1443         key.objectid = rec->ino;
1444         key.type = BTRFS_INODE_ITEM_KEY;
1445         key.offset = (u64)-1;
1446
1447         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1448         if (ret < 0)
1449                 goto out;
1450         if (ret) {
1451                 if (!path->slots[0]) {
1452                         ret = -ENOENT;
1453                         goto out;
1454                 }
1455                 path->slots[0]--;
1456                 ret = 0;
1457         }
1458         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1459         if (key.objectid != rec->ino) {
1460                 ret = -ENOENT;
1461                 goto out;
1462         }
1463
1464         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1465                             struct btrfs_inode_item);
1466         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1467         btrfs_mark_buffer_dirty(path->nodes[0]);
1468         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1469         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1470                root->root_key.objectid);
1471 out:
1472         btrfs_commit_transaction(trans, root);
1473         btrfs_free_path(path);
1474         return ret;
1475 }
1476
1477 static int check_inode_recs(struct btrfs_root *root,
1478                             struct cache_tree *inode_cache)
1479 {
1480         struct cache_extent *cache;
1481         struct ptr_node *node;
1482         struct inode_record *rec;
1483         struct inode_backref *backref;
1484         int ret;
1485         u64 error = 0;
1486         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1487
1488         if (btrfs_root_refs(&root->root_item) == 0) {
1489                 if (!cache_tree_empty(inode_cache))
1490                         fprintf(stderr, "warning line %d\n", __LINE__);
1491                 return 0;
1492         }
1493
1494         rec = get_inode_rec(inode_cache, root_dirid, 0);
1495         if (rec) {
1496                 ret = check_root_dir(rec);
1497                 if (ret) {
1498                         fprintf(stderr, "root %llu root dir %llu error\n",
1499                                 (unsigned long long)root->root_key.objectid,
1500                                 (unsigned long long)root_dirid);
1501                         error++;
1502                 }
1503         } else {
1504                 fprintf(stderr, "root %llu root dir %llu not found\n",
1505                         (unsigned long long)root->root_key.objectid,
1506                         (unsigned long long)root_dirid);
1507         }
1508
1509         while (1) {
1510                 cache = search_cache_extent(inode_cache, 0);
1511                 if (!cache)
1512                         break;
1513                 node = container_of(cache, struct ptr_node, cache);
1514                 rec = node->data;
1515                 remove_cache_extent(inode_cache, &node->cache);
1516                 free(node);
1517                 if (rec->ino == root_dirid ||
1518                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1519                         free_inode_rec(rec);
1520                         continue;
1521                 }
1522
1523                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1524                         ret = check_orphan_item(root, rec->ino);
1525                         if (ret == 0)
1526                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1527                         if (can_free_inode_rec(rec)) {
1528                                 free_inode_rec(rec);
1529                                 continue;
1530                         }
1531                 }
1532
1533                 if (repair) {
1534                         ret = try_repair_inode(root, rec);
1535                         if (ret == 0 && can_free_inode_rec(rec)) {
1536                                 free_inode_rec(rec);
1537                                 continue;
1538                         }
1539                         ret = 0;
1540                 }
1541
1542                 error++;
1543                 if (!rec->found_inode_item)
1544                         rec->errors |= I_ERR_NO_INODE_ITEM;
1545                 if (rec->found_link != rec->nlink)
1546                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1547                 fprintf(stderr, "root %llu inode %llu errors %x",
1548                         (unsigned long long) root->root_key.objectid,
1549                         (unsigned long long) rec->ino, rec->errors);
1550                 print_inode_error(rec->errors);
1551                 list_for_each_entry(backref, &rec->backrefs, list) {
1552                         if (!backref->found_dir_item)
1553                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1554                         if (!backref->found_dir_index)
1555                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1556                         if (!backref->found_inode_ref)
1557                                 backref->errors |= REF_ERR_NO_INODE_REF;
1558                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1559                                 " namelen %u name %s filetype %d error %x",
1560                                 (unsigned long long)backref->dir,
1561                                 (unsigned long long)backref->index,
1562                                 backref->namelen, backref->name,
1563                                 backref->filetype, backref->errors);
1564                         print_ref_error(backref->errors);
1565                 }
1566                 free_inode_rec(rec);
1567         }
1568         return (error > 0) ? -1 : 0;
1569 }
1570
1571 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1572                                         u64 objectid)
1573 {
1574         struct cache_extent *cache;
1575         struct root_record *rec = NULL;
1576         int ret;
1577
1578         cache = lookup_cache_extent(root_cache, objectid, 1);
1579         if (cache) {
1580                 rec = container_of(cache, struct root_record, cache);
1581         } else {
1582                 rec = calloc(1, sizeof(*rec));
1583                 rec->objectid = objectid;
1584                 INIT_LIST_HEAD(&rec->backrefs);
1585                 rec->cache.start = objectid;
1586                 rec->cache.size = 1;
1587
1588                 ret = insert_cache_extent(root_cache, &rec->cache);
1589                 BUG_ON(ret);
1590         }
1591         return rec;
1592 }
1593
1594 static struct root_backref *get_root_backref(struct root_record *rec,
1595                                              u64 ref_root, u64 dir, u64 index,
1596                                              const char *name, int namelen)
1597 {
1598         struct root_backref *backref;
1599
1600         list_for_each_entry(backref, &rec->backrefs, list) {
1601                 if (backref->ref_root != ref_root || backref->dir != dir ||
1602                     backref->namelen != namelen)
1603                         continue;
1604                 if (memcmp(name, backref->name, namelen))
1605                         continue;
1606                 return backref;
1607         }
1608
1609         backref = malloc(sizeof(*backref) + namelen + 1);
1610         memset(backref, 0, sizeof(*backref));
1611         backref->ref_root = ref_root;
1612         backref->dir = dir;
1613         backref->index = index;
1614         backref->namelen = namelen;
1615         memcpy(backref->name, name, namelen);
1616         backref->name[namelen] = '\0';
1617         list_add_tail(&backref->list, &rec->backrefs);
1618         return backref;
1619 }
1620
1621 static void free_root_record(struct cache_extent *cache)
1622 {
1623         struct root_record *rec;
1624         struct root_backref *backref;
1625
1626         rec = container_of(cache, struct root_record, cache);
1627         while (!list_empty(&rec->backrefs)) {
1628                 backref = list_entry(rec->backrefs.next,
1629                                      struct root_backref, list);
1630                 list_del(&backref->list);
1631                 free(backref);
1632         }
1633
1634         kfree(rec);
1635 }
1636
1637 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1638
1639 static int add_root_backref(struct cache_tree *root_cache,
1640                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1641                             const char *name, int namelen,
1642                             int item_type, int errors)
1643 {
1644         struct root_record *rec;
1645         struct root_backref *backref;
1646
1647         rec = get_root_rec(root_cache, root_id);
1648         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1649
1650         backref->errors |= errors;
1651
1652         if (item_type != BTRFS_DIR_ITEM_KEY) {
1653                 if (backref->found_dir_index || backref->found_back_ref ||
1654                     backref->found_forward_ref) {
1655                         if (backref->index != index)
1656                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1657                 } else {
1658                         backref->index = index;
1659                 }
1660         }
1661
1662         if (item_type == BTRFS_DIR_ITEM_KEY) {
1663                 if (backref->found_forward_ref)
1664                         rec->found_ref++;
1665                 backref->found_dir_item = 1;
1666         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1667                 backref->found_dir_index = 1;
1668         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1669                 if (backref->found_forward_ref)
1670                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1671                 else if (backref->found_dir_item)
1672                         rec->found_ref++;
1673                 backref->found_forward_ref = 1;
1674         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1675                 if (backref->found_back_ref)
1676                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1677                 backref->found_back_ref = 1;
1678         } else {
1679                 BUG_ON(1);
1680         }
1681
1682         if (backref->found_forward_ref && backref->found_dir_item)
1683                 backref->reachable = 1;
1684         return 0;
1685 }
1686
1687 static int merge_root_recs(struct btrfs_root *root,
1688                            struct cache_tree *src_cache,
1689                            struct cache_tree *dst_cache)
1690 {
1691         struct cache_extent *cache;
1692         struct ptr_node *node;
1693         struct inode_record *rec;
1694         struct inode_backref *backref;
1695
1696         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1697                 free_inode_recs_tree(src_cache);
1698                 return 0;
1699         }
1700
1701         while (1) {
1702                 cache = search_cache_extent(src_cache, 0);
1703                 if (!cache)
1704                         break;
1705                 node = container_of(cache, struct ptr_node, cache);
1706                 rec = node->data;
1707                 remove_cache_extent(src_cache, &node->cache);
1708                 free(node);
1709
1710                 if (!is_child_root(root, root->objectid, rec->ino))
1711                         goto skip;
1712
1713                 list_for_each_entry(backref, &rec->backrefs, list) {
1714                         BUG_ON(backref->found_inode_ref);
1715                         if (backref->found_dir_item)
1716                                 add_root_backref(dst_cache, rec->ino,
1717                                         root->root_key.objectid, backref->dir,
1718                                         backref->index, backref->name,
1719                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1720                                         backref->errors);
1721                         if (backref->found_dir_index)
1722                                 add_root_backref(dst_cache, rec->ino,
1723                                         root->root_key.objectid, backref->dir,
1724                                         backref->index, backref->name,
1725                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1726                                         backref->errors);
1727                 }
1728 skip:
1729                 free_inode_rec(rec);
1730         }
1731         return 0;
1732 }
1733
1734 static int check_root_refs(struct btrfs_root *root,
1735                            struct cache_tree *root_cache)
1736 {
1737         struct root_record *rec;
1738         struct root_record *ref_root;
1739         struct root_backref *backref;
1740         struct cache_extent *cache;
1741         int loop = 1;
1742         int ret;
1743         int error;
1744         int errors = 0;
1745
1746         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1747         rec->found_ref = 1;
1748
1749         /* fixme: this can not detect circular references */
1750         while (loop) {
1751                 loop = 0;
1752                 cache = search_cache_extent(root_cache, 0);
1753                 while (1) {
1754                         if (!cache)
1755                                 break;
1756                         rec = container_of(cache, struct root_record, cache);
1757                         cache = next_cache_extent(cache);
1758
1759                         if (rec->found_ref == 0)
1760                                 continue;
1761
1762                         list_for_each_entry(backref, &rec->backrefs, list) {
1763                                 if (!backref->reachable)
1764                                         continue;
1765
1766                                 ref_root = get_root_rec(root_cache,
1767                                                         backref->ref_root);
1768                                 if (ref_root->found_ref > 0)
1769                                         continue;
1770
1771                                 backref->reachable = 0;
1772                                 rec->found_ref--;
1773                                 if (rec->found_ref == 0)
1774                                         loop = 1;
1775                         }
1776                 }
1777         }
1778
1779         cache = search_cache_extent(root_cache, 0);
1780         while (1) {
1781                 if (!cache)
1782                         break;
1783                 rec = container_of(cache, struct root_record, cache);
1784                 cache = next_cache_extent(cache);
1785
1786                 if (rec->found_ref == 0 &&
1787                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1788                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1789                         ret = check_orphan_item(root->fs_info->tree_root,
1790                                                 rec->objectid);
1791                         if (ret == 0)
1792                                 continue;
1793
1794                         /*
1795                          * If we don't have a root item then we likely just have
1796                          * a dir item in a snapshot for this root but no actual
1797                          * ref key or anything so it's meaningless.
1798                          */
1799                         if (!rec->found_root_item)
1800                                 continue;
1801                         errors++;
1802                         fprintf(stderr, "fs tree %llu not referenced\n",
1803                                 (unsigned long long)rec->objectid);
1804                 }
1805
1806                 error = 0;
1807                 if (rec->found_ref > 0 && !rec->found_root_item)
1808                         error = 1;
1809                 list_for_each_entry(backref, &rec->backrefs, list) {
1810                         if (!backref->found_dir_item)
1811                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1812                         if (!backref->found_dir_index)
1813                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1814                         if (!backref->found_back_ref)
1815                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1816                         if (!backref->found_forward_ref)
1817                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1818                         if (backref->reachable && backref->errors)
1819                                 error = 1;
1820                 }
1821                 if (!error)
1822                         continue;
1823
1824                 errors++;
1825                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1826                         (unsigned long long)rec->objectid, rec->found_ref,
1827                          rec->found_root_item ? "" : "not found");
1828
1829                 list_for_each_entry(backref, &rec->backrefs, list) {
1830                         if (!backref->reachable)
1831                                 continue;
1832                         if (!backref->errors && rec->found_root_item)
1833                                 continue;
1834                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1835                                 " index %llu namelen %u name %s error %x\n",
1836                                 (unsigned long long)backref->ref_root,
1837                                 (unsigned long long)backref->dir,
1838                                 (unsigned long long)backref->index,
1839                                 backref->namelen, backref->name,
1840                                 backref->errors);
1841                 }
1842         }
1843         return errors > 0 ? 1 : 0;
1844 }
1845
1846 static int process_root_ref(struct extent_buffer *eb, int slot,
1847                             struct btrfs_key *key,
1848                             struct cache_tree *root_cache)
1849 {
1850         u64 dirid;
1851         u64 index;
1852         u32 len;
1853         u32 name_len;
1854         struct btrfs_root_ref *ref;
1855         char namebuf[BTRFS_NAME_LEN];
1856         int error;
1857
1858         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1859
1860         dirid = btrfs_root_ref_dirid(eb, ref);
1861         index = btrfs_root_ref_sequence(eb, ref);
1862         name_len = btrfs_root_ref_name_len(eb, ref);
1863
1864         if (name_len <= BTRFS_NAME_LEN) {
1865                 len = name_len;
1866                 error = 0;
1867         } else {
1868                 len = BTRFS_NAME_LEN;
1869                 error = REF_ERR_NAME_TOO_LONG;
1870         }
1871         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1872
1873         if (key->type == BTRFS_ROOT_REF_KEY) {
1874                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1875                                  index, namebuf, len, key->type, error);
1876         } else {
1877                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1878                                  index, namebuf, len, key->type, error);
1879         }
1880         return 0;
1881 }
1882
1883 static int check_fs_root(struct btrfs_root *root,
1884                          struct cache_tree *root_cache,
1885                          struct walk_control *wc)
1886 {
1887         int ret = 0;
1888         int wret;
1889         int level;
1890         struct btrfs_path path;
1891         struct shared_node root_node;
1892         struct root_record *rec;
1893         struct btrfs_root_item *root_item = &root->root_item;
1894
1895         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1896                 rec = get_root_rec(root_cache, root->root_key.objectid);
1897                 if (btrfs_root_refs(root_item) > 0)
1898                         rec->found_root_item = 1;
1899         }
1900
1901         btrfs_init_path(&path);
1902         memset(&root_node, 0, sizeof(root_node));
1903         cache_tree_init(&root_node.root_cache);
1904         cache_tree_init(&root_node.inode_cache);
1905
1906         level = btrfs_header_level(root->node);
1907         memset(wc->nodes, 0, sizeof(wc->nodes));
1908         wc->nodes[level] = &root_node;
1909         wc->active_node = level;
1910         wc->root_level = level;
1911
1912         if (btrfs_root_refs(root_item) > 0 ||
1913             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1914                 path.nodes[level] = root->node;
1915                 extent_buffer_get(root->node);
1916                 path.slots[level] = 0;
1917         } else {
1918                 struct btrfs_key key;
1919                 struct btrfs_disk_key found_key;
1920
1921                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1922                 level = root_item->drop_level;
1923                 path.lowest_level = level;
1924                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1925                 BUG_ON(wret < 0);
1926                 btrfs_node_key(path.nodes[level], &found_key,
1927                                 path.slots[level]);
1928                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1929                                         sizeof(found_key)));
1930         }
1931
1932         while (1) {
1933                 wret = walk_down_tree(root, &path, wc, &level);
1934                 if (wret < 0)
1935                         ret = wret;
1936                 if (wret != 0)
1937                         break;
1938
1939                 wret = walk_up_tree(root, &path, wc, &level);
1940                 if (wret < 0)
1941                         ret = wret;
1942                 if (wret != 0)
1943                         break;
1944         }
1945         btrfs_release_path(&path);
1946
1947         merge_root_recs(root, &root_node.root_cache, root_cache);
1948
1949         if (root_node.current) {
1950                 root_node.current->checked = 1;
1951                 maybe_free_inode_rec(&root_node.inode_cache,
1952                                 root_node.current);
1953         }
1954
1955         ret = check_inode_recs(root, &root_node.inode_cache);
1956         return ret;
1957 }
1958
1959 static int fs_root_objectid(u64 objectid)
1960 {
1961         if (objectid == BTRFS_FS_TREE_OBJECTID ||
1962             objectid == BTRFS_TREE_RELOC_OBJECTID ||
1963             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
1964             (objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1965              objectid <= BTRFS_LAST_FREE_OBJECTID))
1966                 return 1;
1967         return 0;
1968 }
1969
1970 static int check_fs_roots(struct btrfs_root *root,
1971                           struct cache_tree *root_cache)
1972 {
1973         struct btrfs_path path;
1974         struct btrfs_key key;
1975         struct walk_control wc;
1976         struct extent_buffer *leaf;
1977         struct btrfs_root *tmp_root;
1978         struct btrfs_root *tree_root = root->fs_info->tree_root;
1979         int ret;
1980         int err = 0;
1981
1982         /*
1983          * Just in case we made any changes to the extent tree that weren't
1984          * reflected into the free space cache yet.
1985          */
1986         if (repair)
1987                 reset_cached_block_groups(root->fs_info);
1988         memset(&wc, 0, sizeof(wc));
1989         cache_tree_init(&wc.shared);
1990         btrfs_init_path(&path);
1991
1992         key.offset = 0;
1993         key.objectid = 0;
1994         key.type = BTRFS_ROOT_ITEM_KEY;
1995         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
1996         BUG_ON(ret < 0);
1997         while (1) {
1998                 leaf = path.nodes[0];
1999                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2000                         ret = btrfs_next_leaf(tree_root, &path);
2001                         if (ret != 0)
2002                                 break;
2003                         leaf = path.nodes[0];
2004                 }
2005                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2006                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2007                     fs_root_objectid(key.objectid)) {
2008                         tmp_root = btrfs_read_fs_root_no_cache(root->fs_info,
2009                                                                &key);
2010                         if (IS_ERR(tmp_root)) {
2011                                 err = 1;
2012                                 goto next;
2013                         }
2014                         ret = check_fs_root(tmp_root, root_cache, &wc);
2015                         if (ret)
2016                                 err = 1;
2017                         btrfs_free_fs_root(tmp_root);
2018                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2019                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2020                         process_root_ref(leaf, path.slots[0], &key,
2021                                          root_cache);
2022                 }
2023 next:
2024                 path.slots[0]++;
2025         }
2026         btrfs_release_path(&path);
2027
2028         if (!cache_tree_empty(&wc.shared))
2029                 fprintf(stderr, "warning line %d\n", __LINE__);
2030
2031         return err;
2032 }
2033
2034 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2035 {
2036         struct list_head *cur = rec->backrefs.next;
2037         struct extent_backref *back;
2038         struct tree_backref *tback;
2039         struct data_backref *dback;
2040         u64 found = 0;
2041         int err = 0;
2042
2043         while(cur != &rec->backrefs) {
2044                 back = list_entry(cur, struct extent_backref, list);
2045                 cur = cur->next;
2046                 if (!back->found_extent_tree) {
2047                         err = 1;
2048                         if (!print_errs)
2049                                 goto out;
2050                         if (back->is_data) {
2051                                 dback = (struct data_backref *)back;
2052                                 fprintf(stderr, "Backref %llu %s %llu"
2053                                         " owner %llu offset %llu num_refs %lu"
2054                                         " not found in extent tree\n",
2055                                         (unsigned long long)rec->start,
2056                                         back->full_backref ?
2057                                         "parent" : "root",
2058                                         back->full_backref ?
2059                                         (unsigned long long)dback->parent:
2060                                         (unsigned long long)dback->root,
2061                                         (unsigned long long)dback->owner,
2062                                         (unsigned long long)dback->offset,
2063                                         (unsigned long)dback->num_refs);
2064                         } else {
2065                                 tback = (struct tree_backref *)back;
2066                                 fprintf(stderr, "Backref %llu parent %llu"
2067                                         " root %llu not found in extent tree\n",
2068                                         (unsigned long long)rec->start,
2069                                         (unsigned long long)tback->parent,
2070                                         (unsigned long long)tback->root);
2071                         }
2072                 }
2073                 if (!back->is_data && !back->found_ref) {
2074                         err = 1;
2075                         if (!print_errs)
2076                                 goto out;
2077                         tback = (struct tree_backref *)back;
2078                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2079                                 (unsigned long long)rec->start,
2080                                 back->full_backref ? "parent" : "root",
2081                                 back->full_backref ?
2082                                 (unsigned long long)tback->parent :
2083                                 (unsigned long long)tback->root, back);
2084                 }
2085                 if (back->is_data) {
2086                         dback = (struct data_backref *)back;
2087                         if (dback->found_ref != dback->num_refs) {
2088                                 err = 1;
2089                                 if (!print_errs)
2090                                         goto out;
2091                                 fprintf(stderr, "Incorrect local backref count"
2092                                         " on %llu %s %llu owner %llu"
2093                                         " offset %llu found %u wanted %u back %p\n",
2094                                         (unsigned long long)rec->start,
2095                                         back->full_backref ?
2096                                         "parent" : "root",
2097                                         back->full_backref ?
2098                                         (unsigned long long)dback->parent:
2099                                         (unsigned long long)dback->root,
2100                                         (unsigned long long)dback->owner,
2101                                         (unsigned long long)dback->offset,
2102                                         dback->found_ref, dback->num_refs, back);
2103                         }
2104                         if (dback->disk_bytenr != rec->start) {
2105                                 err = 1;
2106                                 if (!print_errs)
2107                                         goto out;
2108                                 fprintf(stderr, "Backref disk bytenr does not"
2109                                         " match extent record, bytenr=%llu, "
2110                                         "ref bytenr=%llu\n",
2111                                         (unsigned long long)rec->start,
2112                                         (unsigned long long)dback->disk_bytenr);
2113                         }
2114
2115                         if (dback->bytes != rec->nr) {
2116                                 err = 1;
2117                                 if (!print_errs)
2118                                         goto out;
2119                                 fprintf(stderr, "Backref bytes do not match "
2120                                         "extent backref, bytenr=%llu, ref "
2121                                         "bytes=%llu, backref bytes=%llu\n",
2122                                         (unsigned long long)rec->start,
2123                                         (unsigned long long)rec->nr,
2124                                         (unsigned long long)dback->bytes);
2125                         }
2126                 }
2127                 if (!back->is_data) {
2128                         found += 1;
2129                 } else {
2130                         dback = (struct data_backref *)back;
2131                         found += dback->found_ref;
2132                 }
2133         }
2134         if (found != rec->refs) {
2135                 err = 1;
2136                 if (!print_errs)
2137                         goto out;
2138                 fprintf(stderr, "Incorrect global backref count "
2139                         "on %llu found %llu wanted %llu\n",
2140                         (unsigned long long)rec->start,
2141                         (unsigned long long)found,
2142                         (unsigned long long)rec->refs);
2143         }
2144 out:
2145         return err;
2146 }
2147
2148 static int free_all_extent_backrefs(struct extent_record *rec)
2149 {
2150         struct extent_backref *back;
2151         struct list_head *cur;
2152         while (!list_empty(&rec->backrefs)) {
2153                 cur = rec->backrefs.next;
2154                 back = list_entry(cur, struct extent_backref, list);
2155                 list_del(cur);
2156                 free(back);
2157         }
2158         return 0;
2159 }
2160
2161 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2162                                      struct cache_tree *extent_cache)
2163 {
2164         struct cache_extent *cache;
2165         struct extent_record *rec;
2166
2167         while (1) {
2168                 cache = first_cache_extent(extent_cache);
2169                 if (!cache)
2170                         break;
2171                 rec = container_of(cache, struct extent_record, cache);
2172                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2173                 remove_cache_extent(extent_cache, cache);
2174                 free_all_extent_backrefs(rec);
2175                 free(rec);
2176         }
2177 }
2178
2179 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2180                                  struct extent_record *rec)
2181 {
2182         if (rec->content_checked && rec->owner_ref_checked &&
2183             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2184             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2185                 remove_cache_extent(extent_cache, &rec->cache);
2186                 free_all_extent_backrefs(rec);
2187                 list_del_init(&rec->list);
2188                 free(rec);
2189         }
2190         return 0;
2191 }
2192
2193 static int check_owner_ref(struct btrfs_root *root,
2194                             struct extent_record *rec,
2195                             struct extent_buffer *buf)
2196 {
2197         struct extent_backref *node;
2198         struct tree_backref *back;
2199         struct btrfs_root *ref_root;
2200         struct btrfs_key key;
2201         struct btrfs_path path;
2202         struct extent_buffer *parent;
2203         int level;
2204         int found = 0;
2205         int ret;
2206
2207         list_for_each_entry(node, &rec->backrefs, list) {
2208                 if (node->is_data)
2209                         continue;
2210                 if (!node->found_ref)
2211                         continue;
2212                 if (node->full_backref)
2213                         continue;
2214                 back = (struct tree_backref *)node;
2215                 if (btrfs_header_owner(buf) == back->root)
2216                         return 0;
2217         }
2218         BUG_ON(rec->is_root);
2219
2220         /* try to find the block by search corresponding fs tree */
2221         key.objectid = btrfs_header_owner(buf);
2222         key.type = BTRFS_ROOT_ITEM_KEY;
2223         key.offset = (u64)-1;
2224
2225         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2226         if (IS_ERR(ref_root))
2227                 return 1;
2228
2229         level = btrfs_header_level(buf);
2230         if (level == 0)
2231                 btrfs_item_key_to_cpu(buf, &key, 0);
2232         else
2233                 btrfs_node_key_to_cpu(buf, &key, 0);
2234
2235         btrfs_init_path(&path);
2236         path.lowest_level = level + 1;
2237         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2238         if (ret < 0)
2239                 return 0;
2240
2241         parent = path.nodes[level + 1];
2242         if (parent && buf->start == btrfs_node_blockptr(parent,
2243                                                         path.slots[level + 1]))
2244                 found = 1;
2245
2246         btrfs_release_path(&path);
2247         return found ? 0 : 1;
2248 }
2249
2250 static int is_extent_tree_record(struct extent_record *rec)
2251 {
2252         struct list_head *cur = rec->backrefs.next;
2253         struct extent_backref *node;
2254         struct tree_backref *back;
2255         int is_extent = 0;
2256
2257         while(cur != &rec->backrefs) {
2258                 node = list_entry(cur, struct extent_backref, list);
2259                 cur = cur->next;
2260                 if (node->is_data)
2261                         return 0;
2262                 back = (struct tree_backref *)node;
2263                 if (node->full_backref)
2264                         return 0;
2265                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2266                         is_extent = 1;
2267         }
2268         return is_extent;
2269 }
2270
2271
2272 static int record_bad_block_io(struct btrfs_fs_info *info,
2273                                struct cache_tree *extent_cache,
2274                                u64 start, u64 len)
2275 {
2276         struct extent_record *rec;
2277         struct cache_extent *cache;
2278         struct btrfs_key key;
2279
2280         cache = lookup_cache_extent(extent_cache, start, len);
2281         if (!cache)
2282                 return 0;
2283
2284         rec = container_of(cache, struct extent_record, cache);
2285         if (!is_extent_tree_record(rec))
2286                 return 0;
2287
2288         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2289         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2290 }
2291
2292 static int check_block(struct btrfs_root *root,
2293                        struct cache_tree *extent_cache,
2294                        struct extent_buffer *buf, u64 flags)
2295 {
2296         struct extent_record *rec;
2297         struct cache_extent *cache;
2298         struct btrfs_key key;
2299         int ret = 1;
2300         int level;
2301
2302         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2303         if (!cache)
2304                 return 1;
2305         rec = container_of(cache, struct extent_record, cache);
2306         rec->generation = btrfs_header_generation(buf);
2307
2308         level = btrfs_header_level(buf);
2309         if (btrfs_header_nritems(buf) > 0) {
2310
2311                 if (level == 0)
2312                         btrfs_item_key_to_cpu(buf, &key, 0);
2313                 else
2314                         btrfs_node_key_to_cpu(buf, &key, 0);
2315
2316                 rec->info_objectid = key.objectid;
2317         }
2318         rec->info_level = level;
2319
2320         if (btrfs_is_leaf(buf))
2321                 ret = btrfs_check_leaf(root, &rec->parent_key, buf);
2322         else
2323                 ret = btrfs_check_node(root, &rec->parent_key, buf);
2324
2325         if (ret) {
2326                 fprintf(stderr, "bad block %llu\n",
2327                         (unsigned long long)buf->start);
2328         } else {
2329                 rec->content_checked = 1;
2330                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2331                         rec->owner_ref_checked = 1;
2332                 else {
2333                         ret = check_owner_ref(root, rec, buf);
2334                         if (!ret)
2335                                 rec->owner_ref_checked = 1;
2336                 }
2337         }
2338         if (!ret)
2339                 maybe_free_extent_rec(extent_cache, rec);
2340         return ret;
2341 }
2342
2343 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2344                                                 u64 parent, u64 root)
2345 {
2346         struct list_head *cur = rec->backrefs.next;
2347         struct extent_backref *node;
2348         struct tree_backref *back;
2349
2350         while(cur != &rec->backrefs) {
2351                 node = list_entry(cur, struct extent_backref, list);
2352                 cur = cur->next;
2353                 if (node->is_data)
2354                         continue;
2355                 back = (struct tree_backref *)node;
2356                 if (parent > 0) {
2357                         if (!node->full_backref)
2358                                 continue;
2359                         if (parent == back->parent)
2360                                 return back;
2361                 } else {
2362                         if (node->full_backref)
2363                                 continue;
2364                         if (back->root == root)
2365                                 return back;
2366                 }
2367         }
2368         return NULL;
2369 }
2370
2371 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2372                                                 u64 parent, u64 root)
2373 {
2374         struct tree_backref *ref = malloc(sizeof(*ref));
2375         memset(&ref->node, 0, sizeof(ref->node));
2376         if (parent > 0) {
2377                 ref->parent = parent;
2378                 ref->node.full_backref = 1;
2379         } else {
2380                 ref->root = root;
2381                 ref->node.full_backref = 0;
2382         }
2383         list_add_tail(&ref->node.list, &rec->backrefs);
2384
2385         return ref;
2386 }
2387
2388 static struct data_backref *find_data_backref(struct extent_record *rec,
2389                                                 u64 parent, u64 root,
2390                                                 u64 owner, u64 offset,
2391                                                 int found_ref,
2392                                                 u64 disk_bytenr, u64 bytes)
2393 {
2394         struct list_head *cur = rec->backrefs.next;
2395         struct extent_backref *node;
2396         struct data_backref *back;
2397
2398         while(cur != &rec->backrefs) {
2399                 node = list_entry(cur, struct extent_backref, list);
2400                 cur = cur->next;
2401                 if (!node->is_data)
2402                         continue;
2403                 back = (struct data_backref *)node;
2404                 if (parent > 0) {
2405                         if (!node->full_backref)
2406                                 continue;
2407                         if (parent == back->parent)
2408                                 return back;
2409                 } else {
2410                         if (node->full_backref)
2411                                 continue;
2412                         if (back->root == root && back->owner == owner &&
2413                             back->offset == offset) {
2414                                 if (found_ref && node->found_ref &&
2415                                     (back->bytes != bytes ||
2416                                     back->disk_bytenr != disk_bytenr))
2417                                         continue;
2418                                 return back;
2419                         }
2420                 }
2421         }
2422         return NULL;
2423 }
2424
2425 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2426                                                 u64 parent, u64 root,
2427                                                 u64 owner, u64 offset,
2428                                                 u64 max_size)
2429 {
2430         struct data_backref *ref = malloc(sizeof(*ref));
2431         memset(&ref->node, 0, sizeof(ref->node));
2432         ref->node.is_data = 1;
2433
2434         if (parent > 0) {
2435                 ref->parent = parent;
2436                 ref->owner = 0;
2437                 ref->offset = 0;
2438                 ref->node.full_backref = 1;
2439         } else {
2440                 ref->root = root;
2441                 ref->owner = owner;
2442                 ref->offset = offset;
2443                 ref->node.full_backref = 0;
2444         }
2445         ref->bytes = max_size;
2446         ref->found_ref = 0;
2447         ref->num_refs = 0;
2448         list_add_tail(&ref->node.list, &rec->backrefs);
2449         if (max_size > rec->max_size)
2450                 rec->max_size = max_size;
2451         return ref;
2452 }
2453
2454 static int add_extent_rec(struct cache_tree *extent_cache,
2455                           struct btrfs_key *parent_key,
2456                           u64 start, u64 nr, u64 extent_item_refs,
2457                           int is_root, int inc_ref, int set_checked,
2458                           int metadata, int extent_rec, u64 max_size)
2459 {
2460         struct extent_record *rec;
2461         struct cache_extent *cache;
2462         int ret = 0;
2463         int dup = 0;
2464
2465         cache = lookup_cache_extent(extent_cache, start, nr);
2466         if (cache) {
2467                 rec = container_of(cache, struct extent_record, cache);
2468                 if (inc_ref)
2469                         rec->refs++;
2470                 if (rec->nr == 1)
2471                         rec->nr = max(nr, max_size);
2472
2473                 /*
2474                  * We need to make sure to reset nr to whatever the extent
2475                  * record says was the real size, this way we can compare it to
2476                  * the backrefs.
2477                  */
2478                 if (extent_rec) {
2479                         if (start != rec->start || rec->found_rec) {
2480                                 struct extent_record *tmp;
2481
2482                                 dup = 1;
2483                                 if (list_empty(&rec->list))
2484                                         list_add_tail(&rec->list,
2485                                                       &duplicate_extents);
2486
2487                                 /*
2488                                  * We have to do this song and dance in case we
2489                                  * find an extent record that falls inside of
2490                                  * our current extent record but does not have
2491                                  * the same objectid.
2492                                  */
2493                                 tmp = malloc(sizeof(*tmp));
2494                                 if (!tmp)
2495                                         return -ENOMEM;
2496                                 tmp->start = start;
2497                                 tmp->max_size = max_size;
2498                                 tmp->nr = nr;
2499                                 tmp->found_rec = 1;
2500                                 tmp->metadata = metadata;
2501                                 tmp->extent_item_refs = extent_item_refs;
2502                                 INIT_LIST_HEAD(&tmp->list);
2503                                 list_add_tail(&tmp->list, &rec->dups);
2504                                 rec->num_duplicates++;
2505                         } else {
2506                                 rec->nr = nr;
2507                                 rec->found_rec = 1;
2508                         }
2509                 }
2510
2511                 if (extent_item_refs && !dup) {
2512                         if (rec->extent_item_refs) {
2513                                 fprintf(stderr, "block %llu rec "
2514                                         "extent_item_refs %llu, passed %llu\n",
2515                                         (unsigned long long)start,
2516                                         (unsigned long long)
2517                                                         rec->extent_item_refs,
2518                                         (unsigned long long)extent_item_refs);
2519                         }
2520                         rec->extent_item_refs = extent_item_refs;
2521                 }
2522                 if (is_root)
2523                         rec->is_root = 1;
2524                 if (set_checked) {
2525                         rec->content_checked = 1;
2526                         rec->owner_ref_checked = 1;
2527                 }
2528
2529                 if (parent_key)
2530                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2531
2532                 if (rec->max_size < max_size)
2533                         rec->max_size = max_size;
2534
2535                 maybe_free_extent_rec(extent_cache, rec);
2536                 return ret;
2537         }
2538         rec = malloc(sizeof(*rec));
2539         rec->start = start;
2540         rec->max_size = max_size;
2541         rec->nr = max(nr, max_size);
2542         rec->found_rec = extent_rec;
2543         rec->content_checked = 0;
2544         rec->owner_ref_checked = 0;
2545         rec->num_duplicates = 0;
2546         rec->metadata = metadata;
2547         INIT_LIST_HEAD(&rec->backrefs);
2548         INIT_LIST_HEAD(&rec->dups);
2549         INIT_LIST_HEAD(&rec->list);
2550
2551         if (is_root)
2552                 rec->is_root = 1;
2553         else
2554                 rec->is_root = 0;
2555
2556         if (inc_ref)
2557                 rec->refs = 1;
2558         else
2559                 rec->refs = 0;
2560
2561         if (extent_item_refs)
2562                 rec->extent_item_refs = extent_item_refs;
2563         else
2564                 rec->extent_item_refs = 0;
2565
2566         if (parent_key)
2567                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2568         else
2569                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2570
2571         rec->cache.start = start;
2572         rec->cache.size = nr;
2573         ret = insert_cache_extent(extent_cache, &rec->cache);
2574         BUG_ON(ret);
2575         bytes_used += nr;
2576         if (set_checked) {
2577                 rec->content_checked = 1;
2578                 rec->owner_ref_checked = 1;
2579         }
2580         return ret;
2581 }
2582
2583 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2584                             u64 parent, u64 root, int found_ref)
2585 {
2586         struct extent_record *rec;
2587         struct tree_backref *back;
2588         struct cache_extent *cache;
2589
2590         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2591         if (!cache) {
2592                 add_extent_rec(extent_cache, NULL, bytenr,
2593                                1, 0, 0, 0, 0, 1, 0, 0);
2594                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2595                 if (!cache)
2596                         abort();
2597         }
2598
2599         rec = container_of(cache, struct extent_record, cache);
2600         if (rec->start != bytenr) {
2601                 abort();
2602         }
2603
2604         back = find_tree_backref(rec, parent, root);
2605         if (!back)
2606                 back = alloc_tree_backref(rec, parent, root);
2607
2608         if (found_ref) {
2609                 if (back->node.found_ref) {
2610                         fprintf(stderr, "Extent back ref already exists "
2611                                 "for %llu parent %llu root %llu \n",
2612                                 (unsigned long long)bytenr,
2613                                 (unsigned long long)parent,
2614                                 (unsigned long long)root);
2615                 }
2616                 back->node.found_ref = 1;
2617         } else {
2618                 if (back->node.found_extent_tree) {
2619                         fprintf(stderr, "Extent back ref already exists "
2620                                 "for %llu parent %llu root %llu \n",
2621                                 (unsigned long long)bytenr,
2622                                 (unsigned long long)parent,
2623                                 (unsigned long long)root);
2624                 }
2625                 back->node.found_extent_tree = 1;
2626         }
2627         return 0;
2628 }
2629
2630 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2631                             u64 parent, u64 root, u64 owner, u64 offset,
2632                             u32 num_refs, int found_ref, u64 max_size)
2633 {
2634         struct extent_record *rec;
2635         struct data_backref *back;
2636         struct cache_extent *cache;
2637
2638         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2639         if (!cache) {
2640                 add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0,
2641                                0, 0, max_size);
2642                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2643                 if (!cache)
2644                         abort();
2645         }
2646
2647         rec = container_of(cache, struct extent_record, cache);
2648         if (rec->max_size < max_size)
2649                 rec->max_size = max_size;
2650
2651         /*
2652          * If found_ref is set then max_size is the real size and must match the
2653          * existing refs.  So if we have already found a ref then we need to
2654          * make sure that this ref matches the existing one, otherwise we need
2655          * to add a new backref so we can notice that the backrefs don't match
2656          * and we need to figure out who is telling the truth.  This is to
2657          * account for that awful fsync bug I introduced where we'd end up with
2658          * a btrfs_file_extent_item that would have its length include multiple
2659          * prealloc extents or point inside of a prealloc extent.
2660          */
2661         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2662                                  bytenr, max_size);
2663         if (!back)
2664                 back = alloc_data_backref(rec, parent, root, owner, offset,
2665                                           max_size);
2666
2667         if (found_ref) {
2668                 BUG_ON(num_refs != 1);
2669                 if (back->node.found_ref)
2670                         BUG_ON(back->bytes != max_size);
2671                 back->node.found_ref = 1;
2672                 back->found_ref += 1;
2673                 back->bytes = max_size;
2674                 back->disk_bytenr = bytenr;
2675                 rec->refs += 1;
2676                 rec->content_checked = 1;
2677                 rec->owner_ref_checked = 1;
2678         } else {
2679                 if (back->node.found_extent_tree) {
2680                         fprintf(stderr, "Extent back ref already exists "
2681                                 "for %llu parent %llu root %llu"
2682                                 "owner %llu offset %llu num_refs %lu\n",
2683                                 (unsigned long long)bytenr,
2684                                 (unsigned long long)parent,
2685                                 (unsigned long long)root,
2686                                 (unsigned long long)owner,
2687                                 (unsigned long long)offset,
2688                                 (unsigned long)num_refs);
2689                 }
2690                 back->num_refs = num_refs;
2691                 back->node.found_extent_tree = 1;
2692         }
2693         return 0;
2694 }
2695
2696 static int add_pending(struct cache_tree *pending,
2697                        struct cache_tree *seen, u64 bytenr, u32 size)
2698 {
2699         int ret;
2700         ret = add_cache_extent(seen, bytenr, size);
2701         if (ret)
2702                 return ret;
2703         add_cache_extent(pending, bytenr, size);
2704         return 0;
2705 }
2706
2707 static int pick_next_pending(struct cache_tree *pending,
2708                         struct cache_tree *reada,
2709                         struct cache_tree *nodes,
2710                         u64 last, struct block_info *bits, int bits_nr,
2711                         int *reada_bits)
2712 {
2713         unsigned long node_start = last;
2714         struct cache_extent *cache;
2715         int ret;
2716
2717         cache = search_cache_extent(reada, 0);
2718         if (cache) {
2719                 bits[0].start = cache->start;
2720                 bits[1].size = cache->size;
2721                 *reada_bits = 1;
2722                 return 1;
2723         }
2724         *reada_bits = 0;
2725         if (node_start > 32768)
2726                 node_start -= 32768;
2727
2728         cache = search_cache_extent(nodes, node_start);
2729         if (!cache)
2730                 cache = search_cache_extent(nodes, 0);
2731
2732         if (!cache) {
2733                  cache = search_cache_extent(pending, 0);
2734                  if (!cache)
2735                          return 0;
2736                  ret = 0;
2737                  do {
2738                          bits[ret].start = cache->start;
2739                          bits[ret].size = cache->size;
2740                          cache = next_cache_extent(cache);
2741                          ret++;
2742                  } while (cache && ret < bits_nr);
2743                  return ret;
2744         }
2745
2746         ret = 0;
2747         do {
2748                 bits[ret].start = cache->start;
2749                 bits[ret].size = cache->size;
2750                 cache = next_cache_extent(cache);
2751                 ret++;
2752         } while (cache && ret < bits_nr);
2753
2754         if (bits_nr - ret > 8) {
2755                 u64 lookup = bits[0].start + bits[0].size;
2756                 struct cache_extent *next;
2757                 next = search_cache_extent(pending, lookup);
2758                 while(next) {
2759                         if (next->start - lookup > 32768)
2760                                 break;
2761                         bits[ret].start = next->start;
2762                         bits[ret].size = next->size;
2763                         lookup = next->start + next->size;
2764                         ret++;
2765                         if (ret == bits_nr)
2766                                 break;
2767                         next = next_cache_extent(next);
2768                         if (!next)
2769                                 break;
2770                 }
2771         }
2772         return ret;
2773 }
2774
2775 static void free_chunk_record(struct cache_extent *cache)
2776 {
2777         struct chunk_record *rec;
2778
2779         rec = container_of(cache, struct chunk_record, cache);
2780         free(rec);
2781 }
2782
2783 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
2784 {
2785         cache_tree_free_extents(chunk_cache, free_chunk_record);
2786 }
2787
2788 static void free_device_record(struct rb_node *node)
2789 {
2790         struct device_record *rec;
2791
2792         rec = container_of(node, struct device_record, node);
2793         free(rec);
2794 }
2795
2796 FREE_RB_BASED_TREE(device_cache, free_device_record);
2797
2798 int insert_block_group_record(struct block_group_tree *tree,
2799                               struct block_group_record *bg_rec)
2800 {
2801         int ret;
2802
2803         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
2804         if (ret)
2805                 return ret;
2806
2807         list_add_tail(&bg_rec->list, &tree->block_groups);
2808         return 0;
2809 }
2810
2811 static void free_block_group_record(struct cache_extent *cache)
2812 {
2813         struct block_group_record *rec;
2814
2815         rec = container_of(cache, struct block_group_record, cache);
2816         free(rec);
2817 }
2818
2819 void free_block_group_tree(struct block_group_tree *tree)
2820 {
2821         cache_tree_free_extents(&tree->tree, free_block_group_record);
2822 }
2823
2824 int insert_device_extent_record(struct device_extent_tree *tree,
2825                                 struct device_extent_record *de_rec)
2826 {
2827         int ret;
2828
2829         /*
2830          * Device extent is a bit different from the other extents, because
2831          * the extents which belong to the different devices may have the
2832          * same start and size, so we need use the special extent cache
2833          * search/insert functions.
2834          */
2835         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
2836         if (ret)
2837                 return ret;
2838
2839         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
2840         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
2841         return 0;
2842 }
2843
2844 static void free_device_extent_record(struct cache_extent *cache)
2845 {
2846         struct device_extent_record *rec;
2847
2848         rec = container_of(cache, struct device_extent_record, cache);
2849         free(rec);
2850 }
2851
2852 void free_device_extent_tree(struct device_extent_tree *tree)
2853 {
2854         cache_tree_free_extents(&tree->tree, free_device_extent_record);
2855 }
2856
2857 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2858 static int process_extent_ref_v0(struct cache_tree *extent_cache,
2859                                  struct extent_buffer *leaf, int slot)
2860 {
2861         struct btrfs_extent_ref_v0 *ref0;
2862         struct btrfs_key key;
2863
2864         btrfs_item_key_to_cpu(leaf, &key, slot);
2865         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
2866         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
2867                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
2868         } else {
2869                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
2870                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
2871         }
2872         return 0;
2873 }
2874 #endif
2875
2876 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
2877                                             struct btrfs_key *key,
2878                                             int slot)
2879 {
2880         struct btrfs_chunk *ptr;
2881         struct chunk_record *rec;
2882         int num_stripes, i;
2883
2884         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2885         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
2886
2887         rec = malloc(btrfs_chunk_record_size(num_stripes));
2888         if (!rec) {
2889                 fprintf(stderr, "memory allocation failed\n");
2890                 exit(-1);
2891         }
2892
2893         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
2894
2895         INIT_LIST_HEAD(&rec->list);
2896         INIT_LIST_HEAD(&rec->dextents);
2897         rec->bg_rec = NULL;
2898
2899         rec->cache.start = key->offset;
2900         rec->cache.size = btrfs_chunk_length(leaf, ptr);
2901
2902         rec->generation = btrfs_header_generation(leaf);
2903
2904         rec->objectid = key->objectid;
2905         rec->type = key->type;
2906         rec->offset = key->offset;
2907
2908         rec->length = rec->cache.size;
2909         rec->owner = btrfs_chunk_owner(leaf, ptr);
2910         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
2911         rec->type_flags = btrfs_chunk_type(leaf, ptr);
2912         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
2913         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
2914         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
2915         rec->num_stripes = num_stripes;
2916         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
2917
2918         for (i = 0; i < rec->num_stripes; ++i) {
2919                 rec->stripes[i].devid =
2920                         btrfs_stripe_devid_nr(leaf, ptr, i);
2921                 rec->stripes[i].offset =
2922                         btrfs_stripe_offset_nr(leaf, ptr, i);
2923                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
2924                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
2925                                 BTRFS_UUID_SIZE);
2926         }
2927
2928         return rec;
2929 }
2930
2931 static int process_chunk_item(struct cache_tree *chunk_cache,
2932                               struct btrfs_key *key, struct extent_buffer *eb,
2933                               int slot)
2934 {
2935         struct chunk_record *rec;
2936         int ret = 0;
2937
2938         rec = btrfs_new_chunk_record(eb, key, slot);
2939         ret = insert_cache_extent(chunk_cache, &rec->cache);
2940         if (ret) {
2941                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
2942                         rec->offset, rec->length);
2943                 free(rec);
2944         }
2945
2946         return ret;
2947 }
2948
2949 static int process_device_item(struct rb_root *dev_cache,
2950                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
2951 {
2952         struct btrfs_dev_item *ptr;
2953         struct device_record *rec;
2954         int ret = 0;
2955
2956         ptr = btrfs_item_ptr(eb,
2957                 slot, struct btrfs_dev_item);
2958
2959         rec = malloc(sizeof(*rec));
2960         if (!rec) {
2961                 fprintf(stderr, "memory allocation failed\n");
2962                 return -ENOMEM;
2963         }
2964
2965         rec->devid = key->offset;
2966         rec->generation = btrfs_header_generation(eb);
2967
2968         rec->objectid = key->objectid;
2969         rec->type = key->type;
2970         rec->offset = key->offset;
2971
2972         rec->devid = btrfs_device_id(eb, ptr);
2973         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
2974         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
2975
2976         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
2977         if (ret) {
2978                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
2979                 free(rec);
2980         }
2981
2982         return ret;
2983 }
2984
2985 struct block_group_record *
2986 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
2987                              int slot)
2988 {
2989         struct btrfs_block_group_item *ptr;
2990         struct block_group_record *rec;
2991
2992         rec = malloc(sizeof(*rec));
2993         if (!rec) {
2994                 fprintf(stderr, "memory allocation failed\n");
2995                 exit(-1);
2996         }
2997         memset(rec, 0, sizeof(*rec));
2998
2999         rec->cache.start = key->objectid;
3000         rec->cache.size = key->offset;
3001
3002         rec->generation = btrfs_header_generation(leaf);
3003
3004         rec->objectid = key->objectid;
3005         rec->type = key->type;
3006         rec->offset = key->offset;
3007
3008         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3009         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3010
3011         INIT_LIST_HEAD(&rec->list);
3012
3013         return rec;
3014 }
3015
3016 static int process_block_group_item(struct block_group_tree *block_group_cache,
3017                                     struct btrfs_key *key,
3018                                     struct extent_buffer *eb, int slot)
3019 {
3020         struct block_group_record *rec;
3021         int ret = 0;
3022
3023         rec = btrfs_new_block_group_record(eb, key, slot);
3024         ret = insert_block_group_record(block_group_cache, rec);
3025         if (ret) {
3026                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3027                         rec->objectid, rec->offset);
3028                 free(rec);
3029         }
3030
3031         return ret;
3032 }
3033
3034 struct device_extent_record *
3035 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3036                                struct btrfs_key *key, int slot)
3037 {
3038         struct device_extent_record *rec;
3039         struct btrfs_dev_extent *ptr;
3040
3041         rec = malloc(sizeof(*rec));
3042         if (!rec) {
3043                 fprintf(stderr, "memory allocation failed\n");
3044                 exit(-1);
3045         }
3046         memset(rec, 0, sizeof(*rec));
3047
3048         rec->cache.objectid = key->objectid;
3049         rec->cache.start = key->offset;
3050
3051         rec->generation = btrfs_header_generation(leaf);
3052
3053         rec->objectid = key->objectid;
3054         rec->type = key->type;
3055         rec->offset = key->offset;
3056
3057         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3058         rec->chunk_objecteid =
3059                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3060         rec->chunk_offset =
3061                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3062         rec->length = btrfs_dev_extent_length(leaf, ptr);
3063         rec->cache.size = rec->length;
3064
3065         INIT_LIST_HEAD(&rec->chunk_list);
3066         INIT_LIST_HEAD(&rec->device_list);
3067
3068         return rec;
3069 }
3070
3071 static int
3072 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3073                            struct btrfs_key *key, struct extent_buffer *eb,
3074                            int slot)
3075 {
3076         struct device_extent_record *rec;
3077         int ret;
3078
3079         rec = btrfs_new_device_extent_record(eb, key, slot);
3080         ret = insert_device_extent_record(dev_extent_cache, rec);
3081         if (ret) {
3082                 fprintf(stderr,
3083                         "Device extent[%llu, %llu, %llu] existed.\n",
3084                         rec->objectid, rec->offset, rec->length);
3085                 free(rec);
3086         }
3087
3088         return ret;
3089 }
3090
3091 static int process_extent_item(struct btrfs_root *root,
3092                                struct cache_tree *extent_cache,
3093                                struct extent_buffer *eb, int slot)
3094 {
3095         struct btrfs_extent_item *ei;
3096         struct btrfs_extent_inline_ref *iref;
3097         struct btrfs_extent_data_ref *dref;
3098         struct btrfs_shared_data_ref *sref;
3099         struct btrfs_key key;
3100         unsigned long end;
3101         unsigned long ptr;
3102         int type;
3103         u32 item_size = btrfs_item_size_nr(eb, slot);
3104         u64 refs = 0;
3105         u64 offset;
3106         u64 num_bytes;
3107         int metadata = 0;
3108
3109         btrfs_item_key_to_cpu(eb, &key, slot);
3110
3111         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3112                 metadata = 1;
3113                 num_bytes = root->leafsize;
3114         } else {
3115                 num_bytes = key.offset;
3116         }
3117
3118         if (item_size < sizeof(*ei)) {
3119 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3120                 struct btrfs_extent_item_v0 *ei0;
3121                 BUG_ON(item_size != sizeof(*ei0));
3122                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3123                 refs = btrfs_extent_refs_v0(eb, ei0);
3124 #else
3125                 BUG();
3126 #endif
3127                 return add_extent_rec(extent_cache, NULL, key.objectid,
3128                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3129                                       num_bytes);
3130         }
3131
3132         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3133         refs = btrfs_extent_refs(eb, ei);
3134
3135         add_extent_rec(extent_cache, NULL, key.objectid, num_bytes,
3136                        refs, 0, 0, 0, metadata, 1, num_bytes);
3137
3138         ptr = (unsigned long)(ei + 1);
3139         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3140             key.type == BTRFS_EXTENT_ITEM_KEY)
3141                 ptr += sizeof(struct btrfs_tree_block_info);
3142
3143         end = (unsigned long)ei + item_size;
3144         while (ptr < end) {
3145                 iref = (struct btrfs_extent_inline_ref *)ptr;
3146                 type = btrfs_extent_inline_ref_type(eb, iref);
3147                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3148                 switch (type) {
3149                 case BTRFS_TREE_BLOCK_REF_KEY:
3150                         add_tree_backref(extent_cache, key.objectid,
3151                                          0, offset, 0);
3152                         break;
3153                 case BTRFS_SHARED_BLOCK_REF_KEY:
3154                         add_tree_backref(extent_cache, key.objectid,
3155                                          offset, 0, 0);
3156                         break;
3157                 case BTRFS_EXTENT_DATA_REF_KEY:
3158                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3159                         add_data_backref(extent_cache, key.objectid, 0,
3160                                         btrfs_extent_data_ref_root(eb, dref),
3161                                         btrfs_extent_data_ref_objectid(eb,
3162                                                                        dref),
3163                                         btrfs_extent_data_ref_offset(eb, dref),
3164                                         btrfs_extent_data_ref_count(eb, dref),
3165                                         0, num_bytes);
3166                         break;
3167                 case BTRFS_SHARED_DATA_REF_KEY:
3168                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3169                         add_data_backref(extent_cache, key.objectid, offset,
3170                                         0, 0, 0,
3171                                         btrfs_shared_data_ref_count(eb, sref),
3172                                         0, num_bytes);
3173                         break;
3174                 default:
3175                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3176                                 key.objectid, key.type, num_bytes);
3177                         goto out;
3178                 }
3179                 ptr += btrfs_extent_inline_ref_size(type);
3180         }
3181         WARN_ON(ptr > end);
3182 out:
3183         return 0;
3184 }
3185
3186 static int check_cache_range(struct btrfs_root *root,
3187                              struct btrfs_block_group_cache *cache,
3188                              u64 offset, u64 bytes)
3189 {
3190         struct btrfs_free_space *entry;
3191         u64 *logical;
3192         u64 bytenr;
3193         int stripe_len;
3194         int i, nr, ret;
3195
3196         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3197                 bytenr = btrfs_sb_offset(i);
3198                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3199                                        cache->key.objectid, bytenr, 0,
3200                                        &logical, &nr, &stripe_len);
3201                 if (ret)
3202                         return ret;
3203
3204                 while (nr--) {
3205                         if (logical[nr] + stripe_len <= offset)
3206                                 continue;
3207                         if (offset + bytes <= logical[nr])
3208                                 continue;
3209                         if (logical[nr] == offset) {
3210                                 if (stripe_len >= bytes) {
3211                                         kfree(logical);
3212                                         return 0;
3213                                 }
3214                                 bytes -= stripe_len;
3215                                 offset += stripe_len;
3216                         } else if (logical[nr] < offset) {
3217                                 if (logical[nr] + stripe_len >=
3218                                     offset + bytes) {
3219                                         kfree(logical);
3220                                         return 0;
3221                                 }
3222                                 bytes = (offset + bytes) -
3223                                         (logical[nr] + stripe_len);
3224                                 offset = logical[nr] + stripe_len;
3225                         } else {
3226                                 /*
3227                                  * Could be tricky, the super may land in the
3228                                  * middle of the area we're checking.  First
3229                                  * check the easiest case, it's at the end.
3230                                  */
3231                                 if (logical[nr] + stripe_len >=
3232                                     bytes + offset) {
3233                                         bytes = logical[nr] - offset;
3234                                         continue;
3235                                 }
3236
3237                                 /* Check the left side */
3238                                 ret = check_cache_range(root, cache,
3239                                                         offset,
3240                                                         logical[nr] - offset);
3241                                 if (ret) {
3242                                         kfree(logical);
3243                                         return ret;
3244                                 }
3245
3246                                 /* Now we continue with the right side */
3247                                 bytes = (offset + bytes) -
3248                                         (logical[nr] + stripe_len);
3249                                 offset = logical[nr] + stripe_len;
3250                         }
3251                 }
3252
3253                 kfree(logical);
3254         }
3255
3256         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3257         if (!entry) {
3258                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3259                         offset, offset+bytes);
3260                 return -EINVAL;
3261         }
3262
3263         if (entry->offset != offset) {
3264                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3265                         entry->offset);
3266                 return -EINVAL;
3267         }
3268
3269         if (entry->bytes != bytes) {
3270                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3271                         bytes, entry->bytes, offset);
3272                 return -EINVAL;
3273         }
3274
3275         unlink_free_space(cache->free_space_ctl, entry);
3276         free(entry);
3277         return 0;
3278 }
3279
3280 static int verify_space_cache(struct btrfs_root *root,
3281                               struct btrfs_block_group_cache *cache)
3282 {
3283         struct btrfs_path *path;
3284         struct extent_buffer *leaf;
3285         struct btrfs_key key;
3286         u64 last;
3287         int ret = 0;
3288
3289         path = btrfs_alloc_path();
3290         if (!path)
3291                 return -ENOMEM;
3292
3293         root = root->fs_info->extent_root;
3294
3295         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3296
3297         key.objectid = last;
3298         key.offset = 0;
3299         key.type = BTRFS_EXTENT_ITEM_KEY;
3300
3301         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3302         if (ret < 0)
3303                 goto out;
3304         ret = 0;
3305         while (1) {
3306                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3307                         ret = btrfs_next_leaf(root, path);
3308                         if (ret < 0)
3309                                 goto out;
3310                         if (ret > 0) {
3311                                 ret = 0;
3312                                 break;
3313                         }
3314                 }
3315                 leaf = path->nodes[0];
3316                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3317                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3318                         break;
3319                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3320                     key.type != BTRFS_METADATA_ITEM_KEY) {
3321                         path->slots[0]++;
3322                         continue;
3323                 }
3324
3325                 if (last == key.objectid) {
3326                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3327                                 last = key.objectid + key.offset;
3328                         else
3329                                 last = key.objectid + root->leafsize;
3330                         path->slots[0]++;
3331                         continue;
3332                 }
3333
3334                 ret = check_cache_range(root, cache, last,
3335                                         key.objectid - last);
3336                 if (ret)
3337                         break;
3338                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3339                         last = key.objectid + key.offset;
3340                 else
3341                         last = key.objectid + root->leafsize;
3342                 path->slots[0]++;
3343         }
3344
3345         if (last < cache->key.objectid + cache->key.offset)
3346                 ret = check_cache_range(root, cache, last,
3347                                         cache->key.objectid +
3348                                         cache->key.offset - last);
3349
3350 out:
3351         btrfs_free_path(path);
3352
3353         if (!ret &&
3354             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3355                 fprintf(stderr, "There are still entries left in the space "
3356                         "cache\n");
3357                 ret = -EINVAL;
3358         }
3359
3360         return ret;
3361 }
3362
3363 static int check_space_cache(struct btrfs_root *root)
3364 {
3365         struct btrfs_block_group_cache *cache;
3366         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3367         int ret;
3368         int error = 0;
3369
3370         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3371             btrfs_super_generation(root->fs_info->super_copy) !=
3372             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3373                 printf("cache and super generation don't match, space cache "
3374                        "will be invalidated\n");
3375                 return 0;
3376         }
3377
3378         while (1) {
3379                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3380                 if (!cache)
3381                         break;
3382
3383                 start = cache->key.objectid + cache->key.offset;
3384                 if (!cache->free_space_ctl) {
3385                         if (btrfs_init_free_space_ctl(cache,
3386                                                       root->sectorsize)) {
3387                                 ret = -ENOMEM;
3388                                 break;
3389                         }
3390                 } else {
3391                         btrfs_remove_free_space_cache(cache);
3392                 }
3393
3394                 ret = load_free_space_cache(root->fs_info, cache);
3395                 if (!ret)
3396                         continue;
3397
3398                 ret = verify_space_cache(root, cache);
3399                 if (ret) {
3400                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3401                                 cache->key.objectid);
3402                         error++;
3403                 }
3404         }
3405
3406         return error ? -EINVAL : 0;
3407 }
3408
3409 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3410                                u64 num_bytes)
3411 {
3412         struct btrfs_path *path;
3413         struct extent_buffer *leaf;
3414         struct btrfs_key key;
3415         int ret;
3416
3417         path = btrfs_alloc_path();
3418         if (!path) {
3419                 fprintf(stderr, "Error allocing path\n");
3420                 return -ENOMEM;
3421         }
3422
3423         key.objectid = bytenr;
3424         key.type = BTRFS_EXTENT_ITEM_KEY;
3425         key.offset = 0;
3426
3427
3428 again:
3429         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3430                                 0, 0);
3431         if (ret < 0) {
3432                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3433                 btrfs_free_path(path);
3434                 return ret;
3435         } else if (ret) {
3436                 if (path->slots[0])
3437                         path->slots[0]--;
3438                 else
3439                         btrfs_prev_leaf(root, path);
3440         }
3441
3442         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3443
3444         /*
3445          * Block group items come before extent items if they have the same
3446          * bytenr, so walk back one more just in case.  Dear future traveler,
3447          * first congrats on mastering time travel.  Now if it's not too much
3448          * trouble could you go back to 2006 and tell Chris to make the
3449          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
3450          */
3451         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3452                 if (path->slots[0])
3453                         path->slots[0]--;
3454                 else
3455                         btrfs_prev_leaf(root, path);
3456         }
3457
3458         while (num_bytes) {
3459                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3460                         ret = btrfs_next_leaf(root, path);
3461                         if (ret < 0) {
3462                                 fprintf(stderr, "Error going to next leaf "
3463                                         "%d\n", ret);
3464                                 btrfs_free_path(path);
3465                                 return ret;
3466                         } else if (ret) {
3467                                 break;
3468                         }
3469                 }
3470                 leaf = path->nodes[0];
3471                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3472                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3473                         path->slots[0]++;
3474                         continue;
3475                 }
3476                 if (key.objectid + key.offset < bytenr) {
3477                         path->slots[0]++;
3478                         continue;
3479                 }
3480                 if (key.objectid > bytenr + num_bytes)
3481                         break;
3482
3483                 if (key.objectid == bytenr) {
3484                         if (key.offset >= num_bytes) {
3485                                 num_bytes = 0;
3486                                 break;
3487                         }
3488                         num_bytes -= key.offset;
3489                         bytenr += key.offset;
3490                 } else if (key.objectid < bytenr) {
3491                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3492                                 num_bytes = 0;
3493                                 break;
3494                         }
3495                         num_bytes = (bytenr + num_bytes) -
3496                                 (key.objectid + key.offset);
3497                         bytenr = key.objectid + key.offset;
3498                 } else {
3499                         if (key.objectid + key.offset < bytenr + num_bytes) {
3500                                 u64 new_start = key.objectid + key.offset;
3501                                 u64 new_bytes = bytenr + num_bytes - new_start;
3502
3503                                 /*
3504                                  * Weird case, the extent is in the middle of
3505                                  * our range, we'll have to search one side
3506                                  * and then the other.  Not sure if this happens
3507                                  * in real life, but no harm in coding it up
3508                                  * anyway just in case.
3509                                  */
3510                                 btrfs_release_path(path);
3511                                 ret = check_extent_exists(root, new_start,
3512                                                           new_bytes);
3513                                 if (ret) {
3514                                         fprintf(stderr, "Right section didn't "
3515                                                 "have a record\n");
3516                                         break;
3517                                 }
3518                                 num_bytes = key.objectid - bytenr;
3519                                 goto again;
3520                         }
3521                         num_bytes = key.objectid - bytenr;
3522                 }
3523                 path->slots[0]++;
3524         }
3525         ret = 0;
3526
3527         if (num_bytes) {
3528                 fprintf(stderr, "There are no extents for csum range "
3529                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3530                 ret = 1;
3531         }
3532
3533         btrfs_free_path(path);
3534         return ret;
3535 }
3536
3537 static int check_csums(struct btrfs_root *root)
3538 {
3539         struct btrfs_path *path;
3540         struct extent_buffer *leaf;
3541         struct btrfs_key key;
3542         u64 offset = 0, num_bytes = 0;
3543         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3544         int errors = 0;
3545         int ret;
3546
3547         root = root->fs_info->csum_root;
3548
3549         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3550         key.type = BTRFS_EXTENT_CSUM_KEY;
3551         key.offset = 0;
3552
3553         path = btrfs_alloc_path();
3554         if (!path)
3555                 return -ENOMEM;
3556
3557         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3558         if (ret < 0) {
3559                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3560                 btrfs_free_path(path);
3561                 return ret;
3562         }
3563
3564         if (ret > 0 && path->slots[0])
3565                 path->slots[0]--;
3566         ret = 0;
3567
3568         while (1) {
3569                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3570                         ret = btrfs_next_leaf(root, path);
3571                         if (ret < 0) {
3572                                 fprintf(stderr, "Error going to next leaf "
3573                                         "%d\n", ret);
3574                                 break;
3575                         }
3576                         if (ret)
3577                                 break;
3578                 }
3579                 leaf = path->nodes[0];
3580
3581                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3582                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3583                         path->slots[0]++;
3584                         continue;
3585                 }
3586
3587                 if (!num_bytes) {
3588                         offset = key.offset;
3589                 } else if (key.offset != offset + num_bytes) {
3590                         ret = check_extent_exists(root, offset, num_bytes);
3591                         if (ret) {
3592                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3593                                         "there is no extent record\n",
3594                                         offset, offset+num_bytes);
3595                                 errors++;
3596                         }
3597                         offset = key.offset;
3598                         num_bytes = 0;
3599                 }
3600
3601                 num_bytes += (btrfs_item_size_nr(leaf, path->slots[0]) /
3602                               csum_size) * root->sectorsize;
3603                 path->slots[0]++;
3604         }
3605
3606         btrfs_free_path(path);
3607         return errors;
3608 }
3609
3610 static int is_dropped_key(struct btrfs_key *key,
3611                           struct btrfs_key *drop_key) {
3612         if (key->objectid < drop_key->objectid)
3613                 return 1;
3614         else if (key->objectid == drop_key->objectid) {
3615                 if (key->type < drop_key->type)
3616                         return 1;
3617                 else if (key->type == drop_key->type) {
3618                         if (key->offset < drop_key->offset)
3619                                 return 1;
3620                 }
3621         }
3622         return 0;
3623 }
3624
3625 static int run_next_block(struct btrfs_root *root,
3626                           struct block_info *bits,
3627                           int bits_nr,
3628                           u64 *last,
3629                           struct cache_tree *pending,
3630                           struct cache_tree *seen,
3631                           struct cache_tree *reada,
3632                           struct cache_tree *nodes,
3633                           struct cache_tree *extent_cache,
3634                           struct cache_tree *chunk_cache,
3635                           struct rb_root *dev_cache,
3636                           struct block_group_tree *block_group_cache,
3637                           struct device_extent_tree *dev_extent_cache,
3638                           struct btrfs_root_item *ri)
3639 {
3640         struct extent_buffer *buf;
3641         u64 bytenr;
3642         u32 size;
3643         u64 parent;
3644         u64 owner;
3645         u64 flags;
3646         u64 ptr;
3647         int ret;
3648         int i;
3649         int nritems;
3650         struct btrfs_key key;
3651         struct cache_extent *cache;
3652         int reada_bits;
3653
3654         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3655                                     bits_nr, &reada_bits);
3656         if (nritems == 0)
3657                 return 1;
3658
3659         if (!reada_bits) {
3660                 for(i = 0; i < nritems; i++) {
3661                         ret = add_cache_extent(reada, bits[i].start,
3662                                                bits[i].size);
3663                         if (ret == -EEXIST)
3664                                 continue;
3665
3666                         /* fixme, get the parent transid */
3667                         readahead_tree_block(root, bits[i].start,
3668                                              bits[i].size, 0);
3669                 }
3670         }
3671         *last = bits[0].start;
3672         bytenr = bits[0].start;
3673         size = bits[0].size;
3674
3675         cache = lookup_cache_extent(pending, bytenr, size);
3676         if (cache) {
3677                 remove_cache_extent(pending, cache);
3678                 free(cache);
3679         }
3680         cache = lookup_cache_extent(reada, bytenr, size);
3681         if (cache) {
3682                 remove_cache_extent(reada, cache);
3683                 free(cache);
3684         }
3685         cache = lookup_cache_extent(nodes, bytenr, size);
3686         if (cache) {
3687                 remove_cache_extent(nodes, cache);
3688                 free(cache);
3689         }
3690         cache = lookup_cache_extent(seen, bytenr, size);
3691         if (cache) {
3692                 remove_cache_extent(seen, cache);
3693                 free(cache);
3694         }
3695
3696         /* fixme, get the real parent transid */
3697         buf = read_tree_block(root, bytenr, size, 0);
3698         if (!extent_buffer_uptodate(buf)) {
3699                 record_bad_block_io(root->fs_info,
3700                                     extent_cache, bytenr, size);
3701                 goto out;
3702         }
3703
3704         nritems = btrfs_header_nritems(buf);
3705
3706         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
3707                                        btrfs_header_level(buf), 1, NULL,
3708                                        &flags);
3709         if (ret < 0)
3710                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
3711
3712         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
3713                 parent = bytenr;
3714                 owner = 0;
3715         } else {
3716                 parent = 0;
3717                 owner = btrfs_header_owner(buf);
3718         }
3719
3720         ret = check_block(root, extent_cache, buf, flags);
3721         if (ret)
3722                 goto out;
3723
3724         if (btrfs_is_leaf(buf)) {
3725                 btree_space_waste += btrfs_leaf_free_space(root, buf);
3726                 for (i = 0; i < nritems; i++) {
3727                         struct btrfs_file_extent_item *fi;
3728                         btrfs_item_key_to_cpu(buf, &key, i);
3729                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3730                                 process_extent_item(root, extent_cache, buf,
3731                                                     i);
3732                                 continue;
3733                         }
3734                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3735                                 process_extent_item(root, extent_cache, buf,
3736                                                     i);
3737                                 continue;
3738                         }
3739                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
3740                                 total_csum_bytes +=
3741                                         btrfs_item_size_nr(buf, i);
3742                                 continue;
3743                         }
3744                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
3745                                 process_chunk_item(chunk_cache, &key, buf, i);
3746                                 continue;
3747                         }
3748                         if (key.type == BTRFS_DEV_ITEM_KEY) {
3749                                 process_device_item(dev_cache, &key, buf, i);
3750                                 continue;
3751                         }
3752                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3753                                 process_block_group_item(block_group_cache,
3754                                         &key, buf, i);
3755                                 continue;
3756                         }
3757                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
3758                                 process_device_extent_item(dev_extent_cache,
3759                                         &key, buf, i);
3760                                 continue;
3761
3762                         }
3763                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
3764 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3765                                 process_extent_ref_v0(extent_cache, buf, i);
3766 #else
3767                                 BUG();
3768 #endif
3769                                 continue;
3770                         }
3771
3772                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
3773                                 add_tree_backref(extent_cache, key.objectid, 0,
3774                                                  key.offset, 0);
3775                                 continue;
3776                         }
3777                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3778                                 add_tree_backref(extent_cache, key.objectid,
3779                                                  key.offset, 0, 0);
3780                                 continue;
3781                         }
3782                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3783                                 struct btrfs_extent_data_ref *ref;
3784                                 ref = btrfs_item_ptr(buf, i,
3785                                                 struct btrfs_extent_data_ref);
3786                                 add_data_backref(extent_cache,
3787                                         key.objectid, 0,
3788                                         btrfs_extent_data_ref_root(buf, ref),
3789                                         btrfs_extent_data_ref_objectid(buf,
3790                                                                        ref),
3791                                         btrfs_extent_data_ref_offset(buf, ref),
3792                                         btrfs_extent_data_ref_count(buf, ref),
3793                                         0, root->sectorsize);
3794                                 continue;
3795                         }
3796                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3797                                 struct btrfs_shared_data_ref *ref;
3798                                 ref = btrfs_item_ptr(buf, i,
3799                                                 struct btrfs_shared_data_ref);
3800                                 add_data_backref(extent_cache,
3801                                         key.objectid, key.offset, 0, 0, 0,
3802                                         btrfs_shared_data_ref_count(buf, ref),
3803                                         0, root->sectorsize);
3804                                 continue;
3805                         }
3806                         if (key.type != BTRFS_EXTENT_DATA_KEY)
3807                                 continue;
3808                         fi = btrfs_item_ptr(buf, i,
3809                                             struct btrfs_file_extent_item);
3810                         if (btrfs_file_extent_type(buf, fi) ==
3811                             BTRFS_FILE_EXTENT_INLINE)
3812                                 continue;
3813                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
3814                                 continue;
3815
3816                         data_bytes_allocated +=
3817                                 btrfs_file_extent_disk_num_bytes(buf, fi);
3818                         if (data_bytes_allocated < root->sectorsize) {
3819                                 abort();
3820                         }
3821                         data_bytes_referenced +=
3822                                 btrfs_file_extent_num_bytes(buf, fi);
3823                         add_data_backref(extent_cache,
3824                                 btrfs_file_extent_disk_bytenr(buf, fi),
3825                                 parent, owner, key.objectid, key.offset -
3826                                 btrfs_file_extent_offset(buf, fi), 1, 1,
3827                                 btrfs_file_extent_disk_num_bytes(buf, fi));
3828                         BUG_ON(ret);
3829                 }
3830         } else {
3831                 int level;
3832                 struct btrfs_key first_key;
3833
3834                 first_key.objectid = 0;
3835
3836                 if (nritems > 0)
3837                         btrfs_item_key_to_cpu(buf, &first_key, 0);
3838                 level = btrfs_header_level(buf);
3839                 for (i = 0; i < nritems; i++) {
3840                         ptr = btrfs_node_blockptr(buf, i);
3841                         size = btrfs_level_size(root, level - 1);
3842                         btrfs_node_key_to_cpu(buf, &key, i);
3843                         if (ri != NULL) {
3844                                 struct btrfs_key drop_key;
3845                                 btrfs_disk_key_to_cpu(&drop_key,
3846                                                       &ri->drop_progress);
3847                                 if ((level == ri->drop_level)
3848                                     && is_dropped_key(&key, &drop_key)) {
3849                                         continue;
3850                                 }
3851                         }
3852                         ret = add_extent_rec(extent_cache, &key,
3853                                              ptr, size, 0, 0, 1, 0, 1, 0,
3854                                              size);
3855                         BUG_ON(ret);
3856
3857                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
3858
3859                         if (level > 1) {
3860                                 add_pending(nodes, seen, ptr, size);
3861                         } else {
3862                                 add_pending(pending, seen, ptr, size);
3863                         }
3864                 }
3865                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
3866                                       nritems) * sizeof(struct btrfs_key_ptr);
3867         }
3868         total_btree_bytes += buf->len;
3869         if (fs_root_objectid(btrfs_header_owner(buf)))
3870                 total_fs_tree_bytes += buf->len;
3871         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
3872                 total_extent_tree_bytes += buf->len;
3873         if (!found_old_backref &&
3874             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
3875             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
3876             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
3877                 found_old_backref = 1;
3878 out:
3879         free_extent_buffer(buf);
3880         return 0;
3881 }
3882
3883 static int add_root_to_pending(struct extent_buffer *buf,
3884                                struct cache_tree *extent_cache,
3885                                struct cache_tree *pending,
3886                                struct cache_tree *seen,
3887                                struct cache_tree *nodes,
3888                                struct btrfs_key *root_key)
3889 {
3890         if (btrfs_header_level(buf) > 0)
3891                 add_pending(nodes, seen, buf->start, buf->len);
3892         else
3893                 add_pending(pending, seen, buf->start, buf->len);
3894         add_extent_rec(extent_cache, NULL, buf->start, buf->len,
3895                        0, 1, 1, 0, 1, 0, buf->len);
3896
3897         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
3898             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
3899                 add_tree_backref(extent_cache, buf->start, buf->start,
3900                                  0, 1);
3901         else
3902                 add_tree_backref(extent_cache, buf->start, 0,
3903                                  root_key->objectid, 1);
3904         return 0;
3905 }
3906
3907 /* as we fix the tree, we might be deleting blocks that
3908  * we're tracking for repair.  This hook makes sure we
3909  * remove any backrefs for blocks as we are fixing them.
3910  */
3911 static int free_extent_hook(struct btrfs_trans_handle *trans,
3912                             struct btrfs_root *root,
3913                             u64 bytenr, u64 num_bytes, u64 parent,
3914                             u64 root_objectid, u64 owner, u64 offset,
3915                             int refs_to_drop)
3916 {
3917         struct extent_record *rec;
3918         struct cache_extent *cache;
3919         int is_data;
3920         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
3921
3922         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
3923         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
3924         if (!cache)
3925                 return 0;
3926
3927         rec = container_of(cache, struct extent_record, cache);
3928         if (is_data) {
3929                 struct data_backref *back;
3930                 back = find_data_backref(rec, parent, root_objectid, owner,
3931                                          offset, 1, bytenr, num_bytes);
3932                 if (!back)
3933                         goto out;
3934                 if (back->node.found_ref) {
3935                         back->found_ref -= refs_to_drop;
3936                         if (rec->refs)
3937                                 rec->refs -= refs_to_drop;
3938                 }
3939                 if (back->node.found_extent_tree) {
3940                         back->num_refs -= refs_to_drop;
3941                         if (rec->extent_item_refs)
3942                                 rec->extent_item_refs -= refs_to_drop;
3943                 }
3944                 if (back->found_ref == 0)
3945                         back->node.found_ref = 0;
3946                 if (back->num_refs == 0)
3947                         back->node.found_extent_tree = 0;
3948
3949                 if (!back->node.found_extent_tree && back->node.found_ref) {
3950                         list_del(&back->node.list);
3951                         free(back);
3952                 }
3953         } else {
3954                 struct tree_backref *back;
3955                 back = find_tree_backref(rec, parent, root_objectid);
3956                 if (!back)
3957                         goto out;
3958                 if (back->node.found_ref) {
3959                         if (rec->refs)
3960                                 rec->refs--;
3961                         back->node.found_ref = 0;
3962                 }
3963                 if (back->node.found_extent_tree) {
3964                         if (rec->extent_item_refs)
3965                                 rec->extent_item_refs--;
3966                         back->node.found_extent_tree = 0;
3967                 }
3968                 if (!back->node.found_extent_tree && back->node.found_ref) {
3969                         list_del(&back->node.list);
3970                         free(back);
3971                 }
3972         }
3973         maybe_free_extent_rec(extent_cache, rec);
3974 out:
3975         return 0;
3976 }
3977
3978 static int delete_extent_records(struct btrfs_trans_handle *trans,
3979                                  struct btrfs_root *root,
3980                                  struct btrfs_path *path,
3981                                  u64 bytenr, u64 new_len)
3982 {
3983         struct btrfs_key key;
3984         struct btrfs_key found_key;
3985         struct extent_buffer *leaf;
3986         int ret;
3987         int slot;
3988
3989
3990         key.objectid = bytenr;
3991         key.type = (u8)-1;
3992         key.offset = (u64)-1;
3993
3994         while(1) {
3995                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3996                                         &key, path, 0, 1);
3997                 if (ret < 0)
3998                         break;
3999
4000                 if (ret > 0) {
4001                         ret = 0;
4002                         if (path->slots[0] == 0)
4003                                 break;
4004                         path->slots[0]--;
4005                 }
4006                 ret = 0;
4007
4008                 leaf = path->nodes[0];
4009                 slot = path->slots[0];
4010
4011                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4012                 if (found_key.objectid != bytenr)
4013                         break;
4014
4015                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4016                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4017                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4018                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4019                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4020                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4021                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4022                         btrfs_release_path(path);
4023                         if (found_key.type == 0) {
4024                                 if (found_key.offset == 0)
4025                                         break;
4026                                 key.offset = found_key.offset - 1;
4027                                 key.type = found_key.type;
4028                         }
4029                         key.type = found_key.type - 1;
4030                         key.offset = (u64)-1;
4031                         continue;
4032                 }
4033
4034                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4035                         found_key.objectid, found_key.type, found_key.offset);
4036
4037                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4038                 if (ret)
4039                         break;
4040                 btrfs_release_path(path);
4041
4042                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4043                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4044                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4045                                 found_key.offset : root->leafsize;
4046
4047                         ret = btrfs_update_block_group(trans, root, bytenr,
4048                                                        bytes, 0, 0);
4049                         if (ret)
4050                                 break;
4051                 }
4052         }
4053
4054         btrfs_release_path(path);
4055         return ret;
4056 }
4057
4058 /*
4059  * for a single backref, this will allocate a new extent
4060  * and add the backref to it.
4061  */
4062 static int record_extent(struct btrfs_trans_handle *trans,
4063                          struct btrfs_fs_info *info,
4064                          struct btrfs_path *path,
4065                          struct extent_record *rec,
4066                          struct extent_backref *back,
4067                          int allocated, u64 flags)
4068 {
4069         int ret;
4070         struct btrfs_root *extent_root = info->extent_root;
4071         struct extent_buffer *leaf;
4072         struct btrfs_key ins_key;
4073         struct btrfs_extent_item *ei;
4074         struct tree_backref *tback;
4075         struct data_backref *dback;
4076         struct btrfs_tree_block_info *bi;
4077
4078         if (!back->is_data)
4079                 rec->max_size = max_t(u64, rec->max_size,
4080                                     info->extent_root->leafsize);
4081
4082         if (!allocated) {
4083                 u32 item_size = sizeof(*ei);
4084
4085                 if (!back->is_data)
4086                         item_size += sizeof(*bi);
4087
4088                 ins_key.objectid = rec->start;
4089                 ins_key.offset = rec->max_size;
4090                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4091
4092                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4093                                         &ins_key, item_size);
4094                 if (ret)
4095                         goto fail;
4096
4097                 leaf = path->nodes[0];
4098                 ei = btrfs_item_ptr(leaf, path->slots[0],
4099                                     struct btrfs_extent_item);
4100
4101                 btrfs_set_extent_refs(leaf, ei, 0);
4102                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4103
4104                 if (back->is_data) {
4105                         btrfs_set_extent_flags(leaf, ei,
4106                                                BTRFS_EXTENT_FLAG_DATA);
4107                 } else {
4108                         struct btrfs_disk_key copy_key;;
4109
4110                         tback = (struct tree_backref *)back;
4111                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4112                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4113                                              sizeof(*bi));
4114
4115                         btrfs_set_disk_key_objectid(&copy_key,
4116                                                     rec->info_objectid);
4117                         btrfs_set_disk_key_type(&copy_key, 0);
4118                         btrfs_set_disk_key_offset(&copy_key, 0);
4119
4120                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4121                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4122
4123                         btrfs_set_extent_flags(leaf, ei,
4124                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4125                 }
4126
4127                 btrfs_mark_buffer_dirty(leaf);
4128                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4129                                                rec->max_size, 1, 0);
4130                 if (ret)
4131                         goto fail;
4132                 btrfs_release_path(path);
4133         }
4134
4135         if (back->is_data) {
4136                 u64 parent;
4137                 int i;
4138
4139                 dback = (struct data_backref *)back;
4140                 if (back->full_backref)
4141                         parent = dback->parent;
4142                 else
4143                         parent = 0;
4144
4145                 for (i = 0; i < dback->found_ref; i++) {
4146                         /* if parent != 0, we're doing a full backref
4147                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4148                          * just makes the backref allocator create a data
4149                          * backref
4150                          */
4151                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4152                                                    rec->start, rec->max_size,
4153                                                    parent,
4154                                                    dback->root,
4155                                                    parent ?
4156                                                    BTRFS_FIRST_FREE_OBJECTID :
4157                                                    dback->owner,
4158                                                    dback->offset);
4159                         if (ret)
4160                                 break;
4161                 }
4162                 fprintf(stderr, "adding new data backref"
4163                                 " on %llu %s %llu owner %llu"
4164                                 " offset %llu found %d\n",
4165                                 (unsigned long long)rec->start,
4166                                 back->full_backref ?
4167                                 "parent" : "root",
4168                                 back->full_backref ?
4169                                 (unsigned long long)parent :
4170                                 (unsigned long long)dback->root,
4171                                 (unsigned long long)dback->owner,
4172                                 (unsigned long long)dback->offset,
4173                                 dback->found_ref);
4174         } else {
4175                 u64 parent;
4176
4177                 tback = (struct tree_backref *)back;
4178                 if (back->full_backref)
4179                         parent = tback->parent;
4180                 else
4181                         parent = 0;
4182
4183                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4184                                            rec->start, rec->max_size,
4185                                            parent, tback->root, 0, 0);
4186                 fprintf(stderr, "adding new tree backref on "
4187                         "start %llu len %llu parent %llu root %llu\n",
4188                         rec->start, rec->max_size, tback->parent, tback->root);
4189         }
4190         if (ret)
4191                 goto fail;
4192 fail:
4193         btrfs_release_path(path);
4194         return ret;
4195 }
4196
4197 struct extent_entry {
4198         u64 bytenr;
4199         u64 bytes;
4200         int count;
4201         int broken;
4202         struct list_head list;
4203 };
4204
4205 static struct extent_entry *find_entry(struct list_head *entries,
4206                                        u64 bytenr, u64 bytes)
4207 {
4208         struct extent_entry *entry = NULL;
4209
4210         list_for_each_entry(entry, entries, list) {
4211                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4212                         return entry;
4213         }
4214
4215         return NULL;
4216 }
4217
4218 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4219 {
4220         struct extent_entry *entry, *best = NULL, *prev = NULL;
4221
4222         list_for_each_entry(entry, entries, list) {
4223                 if (!prev) {
4224                         prev = entry;
4225                         continue;
4226                 }
4227
4228                 /*
4229                  * If there are as many broken entries as entries then we know
4230                  * not to trust this particular entry.
4231                  */
4232                 if (entry->broken == entry->count)
4233                         continue;
4234
4235                 /*
4236                  * If our current entry == best then we can't be sure our best
4237                  * is really the best, so we need to keep searching.
4238                  */
4239                 if (best && best->count == entry->count) {
4240                         prev = entry;
4241                         best = NULL;
4242                         continue;
4243                 }
4244
4245                 /* Prev == entry, not good enough, have to keep searching */
4246                 if (!prev->broken && prev->count == entry->count)
4247                         continue;
4248
4249                 if (!best)
4250                         best = (prev->count > entry->count) ? prev : entry;
4251                 else if (best->count < entry->count)
4252                         best = entry;
4253                 prev = entry;
4254         }
4255
4256         return best;
4257 }
4258
4259 static int repair_ref(struct btrfs_trans_handle *trans,
4260                       struct btrfs_fs_info *info, struct btrfs_path *path,
4261                       struct data_backref *dback, struct extent_entry *entry)
4262 {
4263         struct btrfs_root *root;
4264         struct btrfs_file_extent_item *fi;
4265         struct extent_buffer *leaf;
4266         struct btrfs_key key;
4267         u64 bytenr, bytes;
4268         int ret;
4269
4270         key.objectid = dback->root;
4271         key.type = BTRFS_ROOT_ITEM_KEY;
4272         key.offset = (u64)-1;
4273         root = btrfs_read_fs_root(info, &key);
4274         if (IS_ERR(root)) {
4275                 fprintf(stderr, "Couldn't find root for our ref\n");
4276                 return -EINVAL;
4277         }
4278
4279         /*
4280          * The backref points to the original offset of the extent if it was
4281          * split, so we need to search down to the offset we have and then walk
4282          * forward until we find the backref we're looking for.
4283          */
4284         key.objectid = dback->owner;
4285         key.type = BTRFS_EXTENT_DATA_KEY;
4286         key.offset = dback->offset;
4287         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4288         if (ret < 0) {
4289                 fprintf(stderr, "Error looking up ref %d\n", ret);
4290                 return ret;
4291         }
4292
4293         while (1) {
4294                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4295                         ret = btrfs_next_leaf(root, path);
4296                         if (ret) {
4297                                 fprintf(stderr, "Couldn't find our ref, next\n");
4298                                 return -EINVAL;
4299                         }
4300                 }
4301                 leaf = path->nodes[0];
4302                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4303                 if (key.objectid != dback->owner ||
4304                     key.type != BTRFS_EXTENT_DATA_KEY) {
4305                         fprintf(stderr, "Couldn't find our ref, search\n");
4306                         return -EINVAL;
4307                 }
4308                 fi = btrfs_item_ptr(leaf, path->slots[0],
4309                                     struct btrfs_file_extent_item);
4310                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4311                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4312
4313                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4314                         break;
4315                 path->slots[0]++;
4316         }
4317
4318         btrfs_release_path(path);
4319
4320         /*
4321          * Have to make sure that this root gets updated when we commit the
4322          * transaction
4323          */
4324         root->track_dirty = 1;
4325         if (root->last_trans != trans->transid) {
4326                 root->last_trans = trans->transid;
4327                 root->commit_root = root->node;
4328                 extent_buffer_get(root->node);
4329         }
4330
4331         /*
4332          * Ok we have the key of the file extent we want to fix, now we can cow
4333          * down to the thing and fix it.
4334          */
4335         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4336         if (ret < 0) {
4337                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4338                         key.objectid, key.type, key.offset, ret);
4339                 return ret;
4340         }
4341         if (ret > 0) {
4342                 fprintf(stderr, "Well that's odd, we just found this key "
4343                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4344                         key.offset);
4345                 return -EINVAL;
4346         }
4347         leaf = path->nodes[0];
4348         fi = btrfs_item_ptr(leaf, path->slots[0],
4349                             struct btrfs_file_extent_item);
4350
4351         if (btrfs_file_extent_compression(leaf, fi) &&
4352             dback->disk_bytenr != entry->bytenr) {
4353                 fprintf(stderr, "Ref doesn't match the record start and is "
4354                         "compressed, please take a btrfs-image of this file "
4355                         "system and send it to a btrfs developer so they can "
4356                         "complete this functionality for bytenr %Lu\n",
4357                         dback->disk_bytenr);
4358                 return -EINVAL;
4359         }
4360
4361         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4362                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4363         } else if (dback->disk_bytenr > entry->bytenr) {
4364                 u64 off_diff, offset;
4365
4366                 off_diff = dback->disk_bytenr - entry->bytenr;
4367                 offset = btrfs_file_extent_offset(leaf, fi);
4368                 if (dback->disk_bytenr + offset +
4369                     btrfs_file_extent_num_bytes(leaf, fi) >
4370                     entry->bytenr + entry->bytes) {
4371                         fprintf(stderr, "Ref is past the entry end, please "
4372                                 "take a btrfs-image of this file system and "
4373                                 "send it to a btrfs developer, ref %Lu\n",
4374                                 dback->disk_bytenr);
4375                         return -EINVAL;
4376                 }
4377                 offset += off_diff;
4378                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4379                 btrfs_set_file_extent_offset(leaf, fi, offset);
4380         } else if (dback->disk_bytenr < entry->bytenr) {
4381                 u64 offset;
4382
4383                 offset = btrfs_file_extent_offset(leaf, fi);
4384                 if (dback->disk_bytenr + offset < entry->bytenr) {
4385                         fprintf(stderr, "Ref is before the entry start, please"
4386                                 " take a btrfs-image of this file system and "
4387                                 "send it to a btrfs developer, ref %Lu\n",
4388                                 dback->disk_bytenr);
4389                         return -EINVAL;
4390                 }
4391
4392                 offset += dback->disk_bytenr;
4393                 offset -= entry->bytenr;
4394                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4395                 btrfs_set_file_extent_offset(leaf, fi, offset);
4396         }
4397
4398         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4399
4400         /*
4401          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4402          * only do this if we aren't using compression, otherwise it's a
4403          * trickier case.
4404          */
4405         if (!btrfs_file_extent_compression(leaf, fi))
4406                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4407         else
4408                 printf("ram bytes may be wrong?\n");
4409         btrfs_mark_buffer_dirty(leaf);
4410         btrfs_release_path(path);
4411         return 0;
4412 }
4413
4414 static int verify_backrefs(struct btrfs_trans_handle *trans,
4415                            struct btrfs_fs_info *info, struct btrfs_path *path,
4416                            struct extent_record *rec)
4417 {
4418         struct extent_backref *back;
4419         struct data_backref *dback;
4420         struct extent_entry *entry, *best = NULL;
4421         LIST_HEAD(entries);
4422         int nr_entries = 0;
4423         int broken_entries = 0;
4424         int ret = 0;
4425         short mismatch = 0;
4426
4427         /*
4428          * Metadata is easy and the backrefs should always agree on bytenr and
4429          * size, if not we've got bigger issues.
4430          */
4431         if (rec->metadata)
4432                 return 0;
4433
4434         list_for_each_entry(back, &rec->backrefs, list) {
4435                 dback = (struct data_backref *)back;
4436                 /*
4437                  * We only pay attention to backrefs that we found a real
4438                  * backref for.
4439                  */
4440                 if (dback->found_ref == 0)
4441                         continue;
4442                 if (back->full_backref)
4443                         continue;
4444
4445                 /*
4446                  * For now we only catch when the bytes don't match, not the
4447                  * bytenr.  We can easily do this at the same time, but I want
4448                  * to have a fs image to test on before we just add repair
4449                  * functionality willy-nilly so we know we won't screw up the
4450                  * repair.
4451                  */
4452
4453                 entry = find_entry(&entries, dback->disk_bytenr,
4454                                    dback->bytes);
4455                 if (!entry) {
4456                         entry = malloc(sizeof(struct extent_entry));
4457                         if (!entry) {
4458                                 ret = -ENOMEM;
4459                                 goto out;
4460                         }
4461                         memset(entry, 0, sizeof(*entry));
4462                         entry->bytenr = dback->disk_bytenr;
4463                         entry->bytes = dback->bytes;
4464                         list_add_tail(&entry->list, &entries);
4465                         nr_entries++;
4466                 }
4467
4468                 /*
4469                  * If we only have on entry we may think the entries agree when
4470                  * in reality they don't so we have to do some extra checking.
4471                  */
4472                 if (dback->disk_bytenr != rec->start ||
4473                     dback->bytes != rec->nr || back->broken)
4474                         mismatch = 1;
4475
4476                 if (back->broken) {
4477                         entry->broken++;
4478                         broken_entries++;
4479                 }
4480
4481                 entry->count++;
4482         }
4483
4484         /* Yay all the backrefs agree, carry on good sir */
4485         if (nr_entries <= 1 && !mismatch)
4486                 goto out;
4487
4488         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4489                 "%Lu\n", rec->start);
4490
4491         /*
4492          * First we want to see if the backrefs can agree amongst themselves who
4493          * is right, so figure out which one of the entries has the highest
4494          * count.
4495          */
4496         best = find_most_right_entry(&entries);
4497
4498         /*
4499          * Ok so we may have an even split between what the backrefs think, so
4500          * this is where we use the extent ref to see what it thinks.
4501          */
4502         if (!best) {
4503                 entry = find_entry(&entries, rec->start, rec->nr);
4504                 if (!entry && (!broken_entries || !rec->found_rec)) {
4505                         fprintf(stderr, "Backrefs don't agree with eachother "
4506                                 "and extent record doesn't agree with anybody,"
4507                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4508                                 rec->start, rec->nr);
4509                         ret = -EINVAL;
4510                         goto out;
4511                 } else if (!entry) {
4512                         /*
4513                          * Ok our backrefs were broken, we'll assume this is the
4514                          * correct value and add an entry for this range.
4515                          */
4516                         entry = malloc(sizeof(struct extent_entry));
4517                         if (!entry) {
4518                                 ret = -ENOMEM;
4519                                 goto out;
4520                         }
4521                         memset(entry, 0, sizeof(*entry));
4522                         entry->bytenr = rec->start;
4523                         entry->bytes = rec->nr;
4524                         list_add_tail(&entry->list, &entries);
4525                         nr_entries++;
4526                 }
4527                 entry->count++;
4528                 best = find_most_right_entry(&entries);
4529                 if (!best) {
4530                         fprintf(stderr, "Backrefs and extent record evenly "
4531                                 "split on who is right, this is going to "
4532                                 "require user input to fix bytenr %Lu bytes "
4533                                 "%Lu\n", rec->start, rec->nr);
4534                         ret = -EINVAL;
4535                         goto out;
4536                 }
4537         }
4538
4539         /*
4540          * I don't think this can happen currently as we'll abort() if we catch
4541          * this case higher up, but in case somebody removes that we still can't
4542          * deal with it properly here yet, so just bail out of that's the case.
4543          */
4544         if (best->bytenr != rec->start) {
4545                 fprintf(stderr, "Extent start and backref starts don't match, "
4546                         "please use btrfs-image on this file system and send "
4547                         "it to a btrfs developer so they can make fsck fix "
4548                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
4549                         rec->start, rec->nr);
4550                 ret = -EINVAL;
4551                 goto out;
4552         }
4553
4554         /*
4555          * Ok great we all agreed on an extent record, let's go find the real
4556          * references and fix up the ones that don't match.
4557          */
4558         list_for_each_entry(back, &rec->backrefs, list) {
4559                 dback = (struct data_backref *)back;
4560
4561                 /*
4562                  * Still ignoring backrefs that don't have a real ref attached
4563                  * to them.
4564                  */
4565                 if (dback->found_ref == 0)
4566                         continue;
4567                 if (back->full_backref)
4568                         continue;
4569
4570                 if (dback->bytes == best->bytes &&
4571                     dback->disk_bytenr == best->bytenr)
4572                         continue;
4573
4574                 ret = repair_ref(trans, info, path, dback, best);
4575                 if (ret)
4576                         goto out;
4577         }
4578
4579         /*
4580          * Ok we messed with the actual refs, which means we need to drop our
4581          * entire cache and go back and rescan.  I know this is a huge pain and
4582          * adds a lot of extra work, but it's the only way to be safe.  Once all
4583          * the backrefs agree we may not need to do anything to the extent
4584          * record itself.
4585          */
4586         ret = -EAGAIN;
4587 out:
4588         while (!list_empty(&entries)) {
4589                 entry = list_entry(entries.next, struct extent_entry, list);
4590                 list_del_init(&entry->list);
4591                 free(entry);
4592         }
4593         return ret;
4594 }
4595
4596 static int process_duplicates(struct btrfs_root *root,
4597                               struct cache_tree *extent_cache,
4598                               struct extent_record *rec)
4599 {
4600         struct extent_record *good, *tmp;
4601         struct cache_extent *cache;
4602         int ret;
4603
4604         /*
4605          * If we found a extent record for this extent then return, or if we
4606          * have more than one duplicate we are likely going to need to delete
4607          * something.
4608          */
4609         if (rec->found_rec || rec->num_duplicates > 1)
4610                 return 0;
4611
4612         /* Shouldn't happen but just in case */
4613         BUG_ON(!rec->num_duplicates);
4614
4615         /*
4616          * So this happens if we end up with a backref that doesn't match the
4617          * actual extent entry.  So either the backref is bad or the extent
4618          * entry is bad.  Either way we want to have the extent_record actually
4619          * reflect what we found in the extent_tree, so we need to take the
4620          * duplicate out and use that as the extent_record since the only way we
4621          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4622          */
4623         remove_cache_extent(extent_cache, &rec->cache);
4624
4625         good = list_entry(rec->dups.next, struct extent_record, list);
4626         list_del_init(&good->list);
4627         INIT_LIST_HEAD(&good->backrefs);
4628         INIT_LIST_HEAD(&good->dups);
4629         good->cache.start = good->start;
4630         good->cache.size = good->nr;
4631         good->content_checked = 0;
4632         good->owner_ref_checked = 0;
4633         good->num_duplicates = 0;
4634         good->refs = rec->refs;
4635         list_splice_init(&rec->backrefs, &good->backrefs);
4636         while (1) {
4637                 cache = lookup_cache_extent(extent_cache, good->start,
4638                                             good->nr);
4639                 if (!cache)
4640                         break;
4641                 tmp = container_of(cache, struct extent_record, cache);
4642
4643                 /*
4644                  * If we find another overlapping extent and it's found_rec is
4645                  * set then it's a duplicate and we need to try and delete
4646                  * something.
4647                  */
4648                 if (tmp->found_rec || tmp->num_duplicates > 0) {
4649                         if (list_empty(&good->list))
4650                                 list_add_tail(&good->list,
4651                                               &duplicate_extents);
4652                         good->num_duplicates += tmp->num_duplicates + 1;
4653                         list_splice_init(&tmp->dups, &good->dups);
4654                         list_del_init(&tmp->list);
4655                         list_add_tail(&tmp->list, &good->dups);
4656                         remove_cache_extent(extent_cache, &tmp->cache);
4657                         continue;
4658                 }
4659
4660                 /*
4661                  * Ok we have another non extent item backed extent rec, so lets
4662                  * just add it to this extent and carry on like we did above.
4663                  */
4664                 good->refs += tmp->refs;
4665                 list_splice_init(&tmp->backrefs, &good->backrefs);
4666                 remove_cache_extent(extent_cache, &tmp->cache);
4667                 free(tmp);
4668         }
4669         ret = insert_cache_extent(extent_cache, &good->cache);
4670         BUG_ON(ret);
4671         free(rec);
4672         return good->num_duplicates ? 0 : 1;
4673 }
4674
4675 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
4676                                     struct btrfs_root *root,
4677                                     struct extent_record *rec)
4678 {
4679         LIST_HEAD(delete_list);
4680         struct btrfs_path *path;
4681         struct extent_record *tmp, *good, *n;
4682         int nr_del = 0;
4683         int ret = 0;
4684         struct btrfs_key key;
4685
4686         path = btrfs_alloc_path();
4687         if (!path) {
4688                 ret = -ENOMEM;
4689                 goto out;
4690         }
4691
4692         good = rec;
4693         /* Find the record that covers all of the duplicates. */
4694         list_for_each_entry(tmp, &rec->dups, list) {
4695                 if (good->start < tmp->start)
4696                         continue;
4697                 if (good->nr > tmp->nr)
4698                         continue;
4699
4700                 if (tmp->start + tmp->nr < good->start + good->nr) {
4701                         fprintf(stderr, "Ok we have overlapping extents that "
4702                                 "aren't completely covered by eachother, this "
4703                                 "is going to require more careful thought.  "
4704                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
4705                                 tmp->start, tmp->nr, good->start, good->nr);
4706                         abort();
4707                 }
4708                 good = tmp;
4709         }
4710
4711         if (good != rec)
4712                 list_add_tail(&rec->list, &delete_list);
4713
4714         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
4715                 if (tmp == good)
4716                         continue;
4717                 list_move_tail(&tmp->list, &delete_list);
4718         }
4719
4720         root = root->fs_info->extent_root;
4721         list_for_each_entry(tmp, &delete_list, list) {
4722                 if (tmp->found_rec == 0)
4723                         continue;
4724                 key.objectid = tmp->start;
4725                 key.type = BTRFS_EXTENT_ITEM_KEY;
4726                 key.offset = tmp->nr;
4727
4728                 /* Shouldn't happen but just in case */
4729                 if (tmp->metadata) {
4730                         fprintf(stderr, "Well this shouldn't happen, extent "
4731                                 "record overlaps but is metadata? "
4732                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
4733                         abort();
4734                 }
4735
4736                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4737                 if (ret) {
4738                         if (ret > 0)
4739                                 ret = -EINVAL;
4740                         goto out;
4741                 }
4742                 ret = btrfs_del_item(trans, root, path);
4743                 if (ret)
4744                         goto out;
4745                 btrfs_release_path(path);
4746                 nr_del++;
4747         }
4748
4749 out:
4750         while (!list_empty(&delete_list)) {
4751                 tmp = list_entry(delete_list.next, struct extent_record, list);
4752                 list_del_init(&tmp->list);
4753                 if (tmp == rec)
4754                         continue;
4755                 free(tmp);
4756         }
4757
4758         while (!list_empty(&rec->dups)) {
4759                 tmp = list_entry(rec->dups.next, struct extent_record, list);
4760                 list_del_init(&tmp->list);
4761                 free(tmp);
4762         }
4763
4764         btrfs_free_path(path);
4765
4766         if (!ret && !nr_del)
4767                 rec->num_duplicates = 0;
4768
4769         return ret ? ret : nr_del;
4770 }
4771
4772 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
4773                                   struct btrfs_fs_info *info,
4774                                   struct btrfs_path *path,
4775                                   struct cache_tree *extent_cache,
4776                                   struct extent_record *rec)
4777 {
4778         struct btrfs_root *root;
4779         struct extent_backref *back;
4780         struct data_backref *dback;
4781         struct cache_extent *cache;
4782         struct btrfs_file_extent_item *fi;
4783         struct btrfs_key key;
4784         u64 bytenr, bytes;
4785         int ret;
4786
4787         list_for_each_entry(back, &rec->backrefs, list) {
4788                 dback = (struct data_backref *)back;
4789
4790                 /* We found this one, we don't need to do a lookup */
4791                 if (dback->found_ref)
4792                         continue;
4793                 /* Don't care about full backrefs (poor unloved backrefs) */
4794                 if (back->full_backref)
4795                         continue;
4796                 key.objectid = dback->root;
4797                 key.type = BTRFS_ROOT_ITEM_KEY;
4798                 key.offset = (u64)-1;
4799
4800                 root = btrfs_read_fs_root(info, &key);
4801
4802                 /* No root, definitely a bad ref, skip */
4803                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
4804                         continue;
4805                 /* Other err, exit */
4806                 if (IS_ERR(root))
4807                         return PTR_ERR(root);
4808
4809                 key.objectid = dback->owner;
4810                 key.type = BTRFS_EXTENT_DATA_KEY;
4811                 key.offset = dback->offset;
4812                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4813                 if (ret) {
4814                         btrfs_release_path(path);
4815                         if (ret < 0)
4816                                 return ret;
4817                         /* Didn't find it, we can carry on */
4818                         ret = 0;
4819                         continue;
4820                 }
4821
4822                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4823                                     struct btrfs_file_extent_item);
4824                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
4825                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
4826                 btrfs_release_path(path);
4827                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4828                 if (cache) {
4829                         struct extent_record *tmp;
4830                         tmp = container_of(cache, struct extent_record, cache);
4831
4832                         /*
4833                          * If we found an extent record for the bytenr for this
4834                          * particular backref then we can't add it to our
4835                          * current extent record.  We only want to add backrefs
4836                          * that don't have a corresponding extent item in the
4837                          * extent tree since they likely belong to this record
4838                          * and we need to fix it if it doesn't match bytenrs.
4839                          */
4840                         if  (tmp->found_rec)
4841                                 continue;
4842                 }
4843
4844                 dback->found_ref += 1;
4845                 dback->disk_bytenr = bytenr;
4846                 dback->bytes = bytes;
4847
4848                 /*
4849                  * Set this so the verify backref code knows not to trust the
4850                  * values in this backref.
4851                  */
4852                 back->broken = 1;
4853         }
4854
4855         return 0;
4856 }
4857
4858 /*
4859  * when an incorrect extent item is found, this will delete
4860  * all of the existing entries for it and recreate them
4861  * based on what the tree scan found.
4862  */
4863 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
4864                              struct btrfs_fs_info *info,
4865                              struct cache_tree *extent_cache,
4866                              struct extent_record *rec)
4867 {
4868         int ret;
4869         struct btrfs_path *path;
4870         struct list_head *cur = rec->backrefs.next;
4871         struct cache_extent *cache;
4872         struct extent_backref *back;
4873         int allocated = 0;
4874         u64 flags = 0;
4875
4876         /* remember our flags for recreating the extent */
4877         ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start,
4878                                        rec->max_size, rec->metadata, NULL,
4879                                        &flags);
4880         if (ret < 0)
4881                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4882
4883         path = btrfs_alloc_path();
4884         if (!path)
4885                 return -ENOMEM;
4886
4887         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
4888                 /*
4889                  * Sometimes the backrefs themselves are so broken they don't
4890                  * get attached to any meaningful rec, so first go back and
4891                  * check any of our backrefs that we couldn't find and throw
4892                  * them into the list if we find the backref so that
4893                  * verify_backrefs can figure out what to do.
4894                  */
4895                 ret = find_possible_backrefs(trans, info, path, extent_cache,
4896                                              rec);
4897                 if (ret < 0)
4898                         goto out;
4899         }
4900
4901         /* step one, make sure all of the backrefs agree */
4902         ret = verify_backrefs(trans, info, path, rec);
4903         if (ret < 0)
4904                 goto out;
4905
4906         /* step two, delete all the existing records */
4907         ret = delete_extent_records(trans, info->extent_root, path,
4908                                     rec->start, rec->max_size);
4909
4910         if (ret < 0)
4911                 goto out;
4912
4913         /* was this block corrupt?  If so, don't add references to it */
4914         cache = lookup_cache_extent(info->corrupt_blocks,
4915                                     rec->start, rec->max_size);
4916         if (cache) {
4917                 ret = 0;
4918                 goto out;
4919         }
4920
4921         /* step three, recreate all the refs we did find */
4922         while(cur != &rec->backrefs) {
4923                 back = list_entry(cur, struct extent_backref, list);
4924                 cur = cur->next;
4925
4926                 /*
4927                  * if we didn't find any references, don't create a
4928                  * new extent record
4929                  */
4930                 if (!back->found_ref)
4931                         continue;
4932
4933                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
4934                 allocated = 1;
4935
4936                 if (ret)
4937                         goto out;
4938         }
4939 out:
4940         btrfs_free_path(path);
4941         return ret;
4942 }
4943
4944 /* right now we only prune from the extent allocation tree */
4945 static int prune_one_block(struct btrfs_trans_handle *trans,
4946                            struct btrfs_fs_info *info,
4947                            struct btrfs_corrupt_block *corrupt)
4948 {
4949         int ret;
4950         struct btrfs_path path;
4951         struct extent_buffer *eb;
4952         u64 found;
4953         int slot;
4954         int nritems;
4955         int level = corrupt->level + 1;
4956
4957         btrfs_init_path(&path);
4958 again:
4959         /* we want to stop at the parent to our busted block */
4960         path.lowest_level = level;
4961
4962         ret = btrfs_search_slot(trans, info->extent_root,
4963                                 &corrupt->key, &path, -1, 1);
4964
4965         if (ret < 0)
4966                 goto out;
4967
4968         eb = path.nodes[level];
4969         if (!eb) {
4970                 ret = -ENOENT;
4971                 goto out;
4972         }
4973
4974         /*
4975          * hopefully the search gave us the block we want to prune,
4976          * lets try that first
4977          */
4978         slot = path.slots[level];
4979         found =  btrfs_node_blockptr(eb, slot);
4980         if (found == corrupt->cache.start)
4981                 goto del_ptr;
4982
4983         nritems = btrfs_header_nritems(eb);
4984
4985         /* the search failed, lets scan this node and hope we find it */
4986         for (slot = 0; slot < nritems; slot++) {
4987                 found =  btrfs_node_blockptr(eb, slot);
4988                 if (found == corrupt->cache.start)
4989                         goto del_ptr;
4990         }
4991         /*
4992          * we couldn't find the bad block.  TODO, search all the nodes for pointers
4993          * to this block
4994          */
4995         if (eb == info->extent_root->node) {
4996                 ret = -ENOENT;
4997                 goto out;
4998         } else {
4999                 level++;
5000                 btrfs_release_path(&path);
5001                 goto again;
5002         }
5003
5004 del_ptr:
5005         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5006         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5007
5008 out:
5009         btrfs_release_path(&path);
5010         return ret;
5011 }
5012
5013 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5014                                 struct btrfs_fs_info *info)
5015 {
5016         struct cache_extent *cache;
5017         struct btrfs_corrupt_block *corrupt;
5018
5019         cache = search_cache_extent(info->corrupt_blocks, 0);
5020         while (1) {
5021                 if (!cache)
5022                         break;
5023                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5024                 prune_one_block(trans, info, corrupt);
5025                 cache = next_cache_extent(cache);
5026         }
5027         return 0;
5028 }
5029
5030 static void free_corrupt_block(struct cache_extent *cache)
5031 {
5032         struct btrfs_corrupt_block *corrupt;
5033
5034         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5035         free(corrupt);
5036 }
5037
5038 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5039
5040 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5041 {
5042         struct btrfs_block_group_cache *cache;
5043         u64 start, end;
5044         int ret;
5045
5046         while (1) {
5047                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5048                                             &start, &end, EXTENT_DIRTY);
5049                 if (ret)
5050                         break;
5051                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5052                                    GFP_NOFS);
5053         }
5054
5055         start = 0;
5056         while (1) {
5057                 cache = btrfs_lookup_first_block_group(fs_info, start);
5058                 if (!cache)
5059                         break;
5060                 if (cache->cached)
5061                         cache->cached = 0;
5062                 start = cache->key.objectid + cache->key.offset;
5063         }
5064 }
5065
5066 static int check_extent_refs(struct btrfs_trans_handle *trans,
5067                              struct btrfs_root *root,
5068                              struct cache_tree *extent_cache)
5069 {
5070         struct extent_record *rec;
5071         struct cache_extent *cache;
5072         int err = 0;
5073         int ret = 0;
5074         int fixed = 0;
5075         int had_dups = 0;
5076
5077         if (repair) {
5078                 /*
5079                  * if we're doing a repair, we have to make sure
5080                  * we don't allocate from the problem extents.
5081                  * In the worst case, this will be all the
5082                  * extents in the FS
5083                  */
5084                 cache = search_cache_extent(extent_cache, 0);
5085                 while(cache) {
5086                         rec = container_of(cache, struct extent_record, cache);
5087                         btrfs_pin_extent(root->fs_info,
5088                                          rec->start, rec->max_size);
5089                         cache = next_cache_extent(cache);
5090                 }
5091
5092                 /* pin down all the corrupted blocks too */
5093                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5094                 while(cache) {
5095                         btrfs_pin_extent(root->fs_info,
5096                                          cache->start, cache->size);
5097                         cache = next_cache_extent(cache);
5098                 }
5099                 prune_corrupt_blocks(trans, root->fs_info);
5100                 reset_cached_block_groups(root->fs_info);
5101         }
5102
5103         /*
5104          * We need to delete any duplicate entries we find first otherwise we
5105          * could mess up the extent tree when we have backrefs that actually
5106          * belong to a different extent item and not the weird duplicate one.
5107          */
5108         while (repair && !list_empty(&duplicate_extents)) {
5109                 rec = list_entry(duplicate_extents.next, struct extent_record,
5110                                  list);
5111                 list_del_init(&rec->list);
5112
5113                 /* Sometimes we can find a backref before we find an actual
5114                  * extent, so we need to process it a little bit to see if there
5115                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5116                  * if this is a backref screwup.  If we need to delete stuff
5117                  * process_duplicates() will return 0, otherwise it will return
5118                  * 1 and we
5119                  */
5120                 if (process_duplicates(root, extent_cache, rec))
5121                         continue;
5122                 ret = delete_duplicate_records(trans, root, rec);
5123                 if (ret < 0)
5124                         return ret;
5125                 /*
5126                  * delete_duplicate_records will return the number of entries
5127                  * deleted, so if it's greater than 0 then we know we actually
5128                  * did something and we need to remove.
5129                  */
5130                 if (ret)
5131                         had_dups = 1;
5132         }
5133
5134         if (had_dups)
5135                 return -EAGAIN;
5136
5137         while(1) {
5138                 fixed = 0;
5139                 cache = search_cache_extent(extent_cache, 0);
5140                 if (!cache)
5141                         break;
5142                 rec = container_of(cache, struct extent_record, cache);
5143                 if (rec->num_duplicates) {
5144                         fprintf(stderr, "extent item %llu has multiple extent "
5145                                 "items\n", (unsigned long long)rec->start);
5146                         err = 1;
5147                 }
5148
5149                 if (rec->refs != rec->extent_item_refs) {
5150                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5151                                 (unsigned long long)rec->start,
5152                                 (unsigned long long)rec->nr);
5153                         fprintf(stderr, "extent item %llu, found %llu\n",
5154                                 (unsigned long long)rec->extent_item_refs,
5155                                 (unsigned long long)rec->refs);
5156                         if (!fixed && repair) {
5157                                 ret = fixup_extent_refs(trans, root->fs_info,
5158                                                         extent_cache, rec);
5159                                 if (ret)
5160                                         goto repair_abort;
5161                                 fixed = 1;
5162                         }
5163                         err = 1;
5164
5165                 }
5166                 if (all_backpointers_checked(rec, 1)) {
5167                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5168                                 (unsigned long long)rec->start,
5169                                 (unsigned long long)rec->nr);
5170
5171                         if (!fixed && repair) {
5172                                 ret = fixup_extent_refs(trans, root->fs_info,
5173                                                         extent_cache, rec);
5174                                 if (ret)
5175                                         goto repair_abort;
5176                                 fixed = 1;
5177                         }
5178
5179                         err = 1;
5180                 }
5181                 if (!rec->owner_ref_checked) {
5182                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5183                                 (unsigned long long)rec->start,
5184                                 (unsigned long long)rec->nr);
5185                         if (!fixed && repair) {
5186                                 ret = fixup_extent_refs(trans, root->fs_info,
5187                                                         extent_cache, rec);
5188                                 if (ret)
5189                                         goto repair_abort;
5190                                 fixed = 1;
5191                         }
5192                         err = 1;
5193                 }
5194
5195                 remove_cache_extent(extent_cache, cache);
5196                 free_all_extent_backrefs(rec);
5197                 free(rec);
5198         }
5199 repair_abort:
5200         if (repair) {
5201                 if (ret && ret != -EAGAIN) {
5202                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5203                         exit(1);
5204                 } else if (!ret) {
5205                         btrfs_fix_block_accounting(trans, root);
5206                 }
5207                 if (err)
5208                         fprintf(stderr, "repaired damaged extent references\n");
5209                 return ret;
5210         }
5211         return err;
5212 }
5213
5214 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5215 {
5216         u64 stripe_size;
5217
5218         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5219                 stripe_size = length;
5220                 stripe_size /= num_stripes;
5221         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5222                 stripe_size = length * 2;
5223                 stripe_size /= num_stripes;
5224         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5225                 stripe_size = length;
5226                 stripe_size /= (num_stripes - 1);
5227         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5228                 stripe_size = length;
5229                 stripe_size /= (num_stripes - 2);
5230         } else {
5231                 stripe_size = length;
5232         }
5233         return stripe_size;
5234 }
5235
5236 static int check_chunk_refs(struct chunk_record *chunk_rec,
5237                             struct block_group_tree *block_group_cache,
5238                             struct device_extent_tree *dev_extent_cache,
5239                             int silent)
5240 {
5241         struct cache_extent *block_group_item;
5242         struct block_group_record *block_group_rec;
5243         struct cache_extent *dev_extent_item;
5244         struct device_extent_record *dev_extent_rec;
5245         u64 devid;
5246         u64 offset;
5247         u64 length;
5248         int i;
5249         int ret = 0;
5250
5251         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5252                                                chunk_rec->offset,
5253                                                chunk_rec->length);
5254         if (block_group_item) {
5255                 block_group_rec = container_of(block_group_item,
5256                                                struct block_group_record,
5257                                                cache);
5258                 if (chunk_rec->length != block_group_rec->offset ||
5259                     chunk_rec->offset != block_group_rec->objectid ||
5260                     chunk_rec->type_flags != block_group_rec->flags) {
5261                         if (!silent)
5262                                 fprintf(stderr,
5263                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5264                                         chunk_rec->objectid,
5265                                         chunk_rec->type,
5266                                         chunk_rec->offset,
5267                                         chunk_rec->length,
5268                                         chunk_rec->offset,
5269                                         chunk_rec->type_flags,
5270                                         block_group_rec->objectid,
5271                                         block_group_rec->type,
5272                                         block_group_rec->offset,
5273                                         block_group_rec->offset,
5274                                         block_group_rec->objectid,
5275                                         block_group_rec->flags);
5276                         ret = -1;
5277                 } else {
5278                         list_del_init(&block_group_rec->list);
5279                         chunk_rec->bg_rec = block_group_rec;
5280                 }
5281         } else {
5282                 if (!silent)
5283                         fprintf(stderr,
5284                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5285                                 chunk_rec->objectid,
5286                                 chunk_rec->type,
5287                                 chunk_rec->offset,
5288                                 chunk_rec->length,
5289                                 chunk_rec->offset,
5290                                 chunk_rec->type_flags);
5291                 ret = -1;
5292         }
5293
5294         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5295                                     chunk_rec->num_stripes);
5296         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5297                 devid = chunk_rec->stripes[i].devid;
5298                 offset = chunk_rec->stripes[i].offset;
5299                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5300                                                        devid, offset, length);
5301                 if (dev_extent_item) {
5302                         dev_extent_rec = container_of(dev_extent_item,
5303                                                 struct device_extent_record,
5304                                                 cache);
5305                         if (dev_extent_rec->objectid != devid ||
5306                             dev_extent_rec->offset != offset ||
5307                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5308                             dev_extent_rec->length != length) {
5309                                 if (!silent)
5310                                         fprintf(stderr,
5311                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5312                                                 chunk_rec->objectid,
5313                                                 chunk_rec->type,
5314                                                 chunk_rec->offset,
5315                                                 chunk_rec->stripes[i].devid,
5316                                                 chunk_rec->stripes[i].offset,
5317                                                 dev_extent_rec->objectid,
5318                                                 dev_extent_rec->offset,
5319                                                 dev_extent_rec->length);
5320                                 ret = -1;
5321                         } else {
5322                                 list_move(&dev_extent_rec->chunk_list,
5323                                           &chunk_rec->dextents);
5324                         }
5325                 } else {
5326                         if (!silent)
5327                                 fprintf(stderr,
5328                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5329                                         chunk_rec->objectid,
5330                                         chunk_rec->type,
5331                                         chunk_rec->offset,
5332                                         chunk_rec->stripes[i].devid,
5333                                         chunk_rec->stripes[i].offset);
5334                         ret = -1;
5335                 }
5336         }
5337         return ret;
5338 }
5339
5340 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5341 int check_chunks(struct cache_tree *chunk_cache,
5342                  struct block_group_tree *block_group_cache,
5343                  struct device_extent_tree *dev_extent_cache,
5344                  struct list_head *good, struct list_head *bad, int silent)
5345 {
5346         struct cache_extent *chunk_item;
5347         struct chunk_record *chunk_rec;
5348         struct block_group_record *bg_rec;
5349         struct device_extent_record *dext_rec;
5350         int err;
5351         int ret = 0;
5352
5353         chunk_item = first_cache_extent(chunk_cache);
5354         while (chunk_item) {
5355                 chunk_rec = container_of(chunk_item, struct chunk_record,
5356                                          cache);
5357                 err = check_chunk_refs(chunk_rec, block_group_cache,
5358                                        dev_extent_cache, silent);
5359                 if (err) {
5360                         ret = err;
5361                         if (bad)
5362                                 list_add_tail(&chunk_rec->list, bad);
5363                 } else {
5364                         if (good)
5365                                 list_add_tail(&chunk_rec->list, good);
5366                 }
5367
5368                 chunk_item = next_cache_extent(chunk_item);
5369         }
5370
5371         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5372                 if (!silent)
5373                         fprintf(stderr,
5374                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5375                                 bg_rec->objectid,
5376                                 bg_rec->offset,
5377                                 bg_rec->flags);
5378                 if (!ret)
5379                         ret = 1;
5380         }
5381
5382         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5383                             chunk_list) {
5384                 if (!silent)
5385                         fprintf(stderr,
5386                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5387                                 dext_rec->objectid,
5388                                 dext_rec->offset,
5389                                 dext_rec->length);
5390                 if (!ret)
5391                         ret = 1;
5392         }
5393         return ret;
5394 }
5395
5396
5397 static int check_device_used(struct device_record *dev_rec,
5398                              struct device_extent_tree *dext_cache)
5399 {
5400         struct cache_extent *cache;
5401         struct device_extent_record *dev_extent_rec;
5402         u64 total_byte = 0;
5403
5404         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5405         while (cache) {
5406                 dev_extent_rec = container_of(cache,
5407                                               struct device_extent_record,
5408                                               cache);
5409                 if (dev_extent_rec->objectid != dev_rec->devid)
5410                         break;
5411
5412                 list_del(&dev_extent_rec->device_list);
5413                 total_byte += dev_extent_rec->length;
5414                 cache = next_cache_extent(cache);
5415         }
5416
5417         if (total_byte != dev_rec->byte_used) {
5418                 fprintf(stderr,
5419                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5420                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5421                         dev_rec->type, dev_rec->offset);
5422                 return -1;
5423         } else {
5424                 return 0;
5425         }
5426 }
5427
5428 /* check btrfs_dev_item -> btrfs_dev_extent */
5429 static int check_devices(struct rb_root *dev_cache,
5430                          struct device_extent_tree *dev_extent_cache)
5431 {
5432         struct rb_node *dev_node;
5433         struct device_record *dev_rec;
5434         struct device_extent_record *dext_rec;
5435         int err;
5436         int ret = 0;
5437
5438         dev_node = rb_first(dev_cache);
5439         while (dev_node) {
5440                 dev_rec = container_of(dev_node, struct device_record, node);
5441                 err = check_device_used(dev_rec, dev_extent_cache);
5442                 if (err)
5443                         ret = err;
5444
5445                 dev_node = rb_next(dev_node);
5446         }
5447         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5448                             device_list) {
5449                 fprintf(stderr,
5450                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5451                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5452                 if (!ret)
5453                         ret = 1;
5454         }
5455         return ret;
5456 }
5457
5458 static int check_chunks_and_extents(struct btrfs_root *root)
5459 {
5460         struct rb_root dev_cache;
5461         struct cache_tree chunk_cache;
5462         struct block_group_tree block_group_cache;
5463         struct device_extent_tree dev_extent_cache;
5464         struct cache_tree extent_cache;
5465         struct cache_tree seen;
5466         struct cache_tree pending;
5467         struct cache_tree reada;
5468         struct cache_tree nodes;
5469         struct cache_tree corrupt_blocks;
5470         struct btrfs_path path;
5471         struct btrfs_key key;
5472         struct btrfs_key found_key;
5473         int ret, err = 0;
5474         u64 last = 0;
5475         struct block_info *bits;
5476         int bits_nr;
5477         struct extent_buffer *leaf;
5478         struct btrfs_trans_handle *trans = NULL;
5479         int slot;
5480         struct btrfs_root_item ri;
5481         struct list_head dropping_trees;
5482
5483         dev_cache = RB_ROOT;
5484         cache_tree_init(&chunk_cache);
5485         block_group_tree_init(&block_group_cache);
5486         device_extent_tree_init(&dev_extent_cache);
5487
5488         cache_tree_init(&extent_cache);
5489         cache_tree_init(&seen);
5490         cache_tree_init(&pending);
5491         cache_tree_init(&nodes);
5492         cache_tree_init(&reada);
5493         cache_tree_init(&corrupt_blocks);
5494         INIT_LIST_HEAD(&dropping_trees);
5495
5496         if (repair) {
5497                 trans = btrfs_start_transaction(root, 1);
5498                 if (IS_ERR(trans)) {
5499                         fprintf(stderr, "Error starting transaction\n");
5500                         return PTR_ERR(trans);
5501                 }
5502                 root->fs_info->fsck_extent_cache = &extent_cache;
5503                 root->fs_info->free_extent_hook = free_extent_hook;
5504                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5505         }
5506
5507         bits_nr = 1024;
5508         bits = malloc(bits_nr * sizeof(struct block_info));
5509         if (!bits) {
5510                 perror("malloc");
5511                 exit(1);
5512         }
5513
5514 again:
5515         add_root_to_pending(root->fs_info->tree_root->node,
5516                             &extent_cache, &pending, &seen, &nodes,
5517                             &root->fs_info->tree_root->root_key);
5518
5519         add_root_to_pending(root->fs_info->chunk_root->node,
5520                             &extent_cache, &pending, &seen, &nodes,
5521                             &root->fs_info->chunk_root->root_key);
5522
5523         btrfs_init_path(&path);
5524         key.offset = 0;
5525         key.objectid = 0;
5526         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
5527         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
5528                                         &key, &path, 0, 0);
5529         BUG_ON(ret < 0);
5530         while(1) {
5531                 leaf = path.nodes[0];
5532                 slot = path.slots[0];
5533                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
5534                         ret = btrfs_next_leaf(root, &path);
5535                         if (ret != 0)
5536                                 break;
5537                         leaf = path.nodes[0];
5538                         slot = path.slots[0];
5539                 }
5540                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
5541                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
5542                         unsigned long offset;
5543                         struct extent_buffer *buf;
5544
5545                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5546                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
5547                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
5548                                 buf = read_tree_block(root->fs_info->tree_root,
5549                                                       btrfs_root_bytenr(&ri),
5550                                                       btrfs_level_size(root,
5551                                                       btrfs_root_level(&ri)),
5552                                                       0);
5553                                 add_root_to_pending(buf, &extent_cache,
5554                                                     &pending, &seen, &nodes,
5555                                                     &found_key);
5556                                 free_extent_buffer(buf);
5557                         } else {
5558                                 struct dropping_root_item_record *dri_rec;
5559                                 dri_rec = malloc(sizeof(*dri_rec));
5560                                 if (!dri_rec) {
5561                                         perror("malloc");
5562                                         exit(1);
5563                                 }
5564                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
5565                                 memcpy(&dri_rec->found_key, &found_key,
5566                                        sizeof(found_key));
5567                                 list_add_tail(&dri_rec->list, &dropping_trees);
5568                         }
5569                 }
5570                 path.slots[0]++;
5571         }
5572         btrfs_release_path(&path);
5573         while (1) {
5574                 ret = run_next_block(root, bits, bits_nr, &last, &pending,
5575                                      &seen, &reada, &nodes, &extent_cache,
5576                                      &chunk_cache, &dev_cache,
5577                                      &block_group_cache, &dev_extent_cache,
5578                                      NULL);
5579                 if (ret != 0)
5580                         break;
5581         }
5582
5583         while (!list_empty(&dropping_trees)) {
5584                 struct dropping_root_item_record *rec;
5585                 struct extent_buffer *buf;
5586                 rec = list_entry(dropping_trees.next,
5587                                  struct dropping_root_item_record, list);
5588                 last = 0;
5589                 if (!bits) {
5590                         perror("realloc");
5591                         exit(1);
5592                 }
5593                 buf = read_tree_block(root->fs_info->tree_root,
5594                                       btrfs_root_bytenr(&rec->ri),
5595                                       btrfs_level_size(root,
5596                                       btrfs_root_level(&rec->ri)), 0);
5597                 add_root_to_pending(buf, &extent_cache, &pending,
5598                                     &seen, &nodes, &rec->found_key);
5599                 while (1) {
5600                         ret = run_next_block(root, bits, bits_nr, &last,
5601                                              &pending, &seen, &reada,
5602                                              &nodes, &extent_cache,
5603                                              &chunk_cache, &dev_cache,
5604                                              &block_group_cache,
5605                                              &dev_extent_cache,
5606                                              &rec->ri);
5607                         if (ret != 0)
5608                                 break;
5609                 }
5610                 free_extent_buffer(buf);
5611                 list_del(&rec->list);
5612                 free(rec);
5613         }
5614
5615         ret = check_extent_refs(trans, root, &extent_cache);
5616         if (ret == -EAGAIN) {
5617                 ret = btrfs_commit_transaction(trans, root);
5618                 if (ret)
5619                         goto out;
5620
5621                 trans = btrfs_start_transaction(root, 1);
5622                 if (IS_ERR(trans)) {
5623                         ret = PTR_ERR(trans);
5624                         goto out;
5625                 }
5626
5627                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5628                 free_extent_cache_tree(&seen);
5629                 free_extent_cache_tree(&pending);
5630                 free_extent_cache_tree(&reada);
5631                 free_extent_cache_tree(&nodes);
5632                 free_extent_record_cache(root->fs_info, &extent_cache);
5633                 goto again;
5634         }
5635
5636         err = check_chunks(&chunk_cache, &block_group_cache,
5637                            &dev_extent_cache, NULL, NULL, 0);
5638         if (err && !ret)
5639                 ret = err;
5640
5641         err = check_devices(&dev_cache, &dev_extent_cache);
5642         if (err && !ret)
5643                 ret = err;
5644
5645         if (trans) {
5646                 err = btrfs_commit_transaction(trans, root);
5647                 if (!ret)
5648                         ret = err;
5649         }
5650 out:
5651         if (repair) {
5652                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5653                 root->fs_info->fsck_extent_cache = NULL;
5654                 root->fs_info->free_extent_hook = NULL;
5655                 root->fs_info->corrupt_blocks = NULL;
5656         }
5657         free(bits);
5658         free_chunk_cache_tree(&chunk_cache);
5659         free_device_cache_tree(&dev_cache);
5660         free_block_group_tree(&block_group_cache);
5661         free_device_extent_tree(&dev_extent_cache);
5662         return ret;
5663 }
5664
5665 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
5666                            struct btrfs_root *root, int overwrite)
5667 {
5668         struct extent_buffer *c;
5669         struct extent_buffer *old = root->node;
5670         int level;
5671         struct btrfs_disk_key disk_key = {0,0,0};
5672
5673         level = 0;
5674
5675         if (overwrite) {
5676                 c = old;
5677                 extent_buffer_get(c);
5678                 goto init;
5679         }
5680         c = btrfs_alloc_free_block(trans, root,
5681                                    btrfs_level_size(root, 0),
5682                                    root->root_key.objectid,
5683                                    &disk_key, level, 0, 0);
5684         if (IS_ERR(c)) {
5685                 c = old;
5686                 extent_buffer_get(c);
5687         }
5688 init:
5689         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
5690         btrfs_set_header_level(c, level);
5691         btrfs_set_header_bytenr(c, c->start);
5692         btrfs_set_header_generation(c, trans->transid);
5693         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
5694         btrfs_set_header_owner(c, root->root_key.objectid);
5695
5696         write_extent_buffer(c, root->fs_info->fsid,
5697                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
5698
5699         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
5700                             btrfs_header_chunk_tree_uuid(c),
5701                             BTRFS_UUID_SIZE);
5702
5703         btrfs_mark_buffer_dirty(c);
5704
5705         free_extent_buffer(old);
5706         root->node = c;
5707         add_root_to_dirty_list(root);
5708         return 0;
5709 }
5710
5711 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
5712                                 struct extent_buffer *eb, int tree_root)
5713 {
5714         struct extent_buffer *tmp;
5715         struct btrfs_root_item *ri;
5716         struct btrfs_key key;
5717         u64 bytenr;
5718         u32 leafsize;
5719         int level = btrfs_header_level(eb);
5720         int nritems;
5721         int ret;
5722         int i;
5723
5724         btrfs_pin_extent(fs_info, eb->start, eb->len);
5725
5726         leafsize = btrfs_super_leafsize(fs_info->super_copy);
5727         nritems = btrfs_header_nritems(eb);
5728         for (i = 0; i < nritems; i++) {
5729                 if (level == 0) {
5730                         btrfs_item_key_to_cpu(eb, &key, i);
5731                         if (key.type != BTRFS_ROOT_ITEM_KEY)
5732                                 continue;
5733                         /* Skip the extent root and reloc roots */
5734                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
5735                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
5736                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
5737                                 continue;
5738                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
5739                         bytenr = btrfs_disk_root_bytenr(eb, ri);
5740
5741                         /*
5742                          * If at any point we start needing the real root we
5743                          * will have to build a stump root for the root we are
5744                          * in, but for now this doesn't actually use the root so
5745                          * just pass in extent_root.
5746                          */
5747                         tmp = read_tree_block(fs_info->extent_root, bytenr,
5748                                               leafsize, 0);
5749                         if (!tmp) {
5750                                 fprintf(stderr, "Error reading root block\n");
5751                                 return -EIO;
5752                         }
5753                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
5754                         free_extent_buffer(tmp);
5755                         if (ret)
5756                                 return ret;
5757                 } else {
5758                         bytenr = btrfs_node_blockptr(eb, i);
5759
5760                         /* If we aren't the tree root don't read the block */
5761                         if (level == 1 && !tree_root) {
5762                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
5763                                 continue;
5764                         }
5765
5766                         tmp = read_tree_block(fs_info->extent_root, bytenr,
5767                                               leafsize, 0);
5768                         if (!tmp) {
5769                                 fprintf(stderr, "Error reading tree block\n");
5770                                 return -EIO;
5771                         }
5772                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
5773                         free_extent_buffer(tmp);
5774                         if (ret)
5775                                 return ret;
5776                 }
5777         }
5778
5779         return 0;
5780 }
5781
5782 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
5783 {
5784         int ret;
5785
5786         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
5787         if (ret)
5788                 return ret;
5789
5790         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
5791 }
5792
5793 static int reset_block_groups(struct btrfs_fs_info *fs_info)
5794 {
5795         struct btrfs_path *path;
5796         struct extent_buffer *leaf;
5797         struct btrfs_chunk *chunk;
5798         struct btrfs_key key;
5799         int ret;
5800
5801         path = btrfs_alloc_path();
5802         if (!path)
5803                 return -ENOMEM;
5804
5805         key.objectid = 0;
5806         key.type = BTRFS_CHUNK_ITEM_KEY;
5807         key.offset = 0;
5808
5809         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
5810         if (ret < 0) {
5811                 btrfs_free_path(path);
5812                 return ret;
5813         }
5814
5815         /*
5816          * We do this in case the block groups were screwed up and had alloc
5817          * bits that aren't actually set on the chunks.  This happens with
5818          * restored images every time and could happen in real life I guess.
5819          */
5820         fs_info->avail_data_alloc_bits = 0;
5821         fs_info->avail_metadata_alloc_bits = 0;
5822         fs_info->avail_system_alloc_bits = 0;
5823
5824         /* First we need to create the in-memory block groups */
5825         while (1) {
5826                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5827                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
5828                         if (ret < 0) {
5829                                 btrfs_free_path(path);
5830                                 return ret;
5831                         }
5832                         if (ret) {
5833                                 ret = 0;
5834                                 break;
5835                         }
5836                 }
5837                 leaf = path->nodes[0];
5838                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5839                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
5840                         path->slots[0]++;
5841                         continue;
5842                 }
5843
5844                 chunk = btrfs_item_ptr(leaf, path->slots[0],
5845                                        struct btrfs_chunk);
5846                 btrfs_add_block_group(fs_info, 0,
5847                                       btrfs_chunk_type(leaf, chunk),
5848                                       key.objectid, key.offset,
5849                                       btrfs_chunk_length(leaf, chunk));
5850                 path->slots[0]++;
5851         }
5852
5853         btrfs_free_path(path);
5854         return 0;
5855 }
5856
5857 static int reset_balance(struct btrfs_trans_handle *trans,
5858                          struct btrfs_fs_info *fs_info)
5859 {
5860         struct btrfs_root *root = fs_info->tree_root;
5861         struct btrfs_path *path;
5862         struct extent_buffer *leaf;
5863         struct btrfs_key key;
5864         int del_slot, del_nr = 0;
5865         int ret;
5866         int found = 0;
5867
5868         path = btrfs_alloc_path();
5869         if (!path)
5870                 return -ENOMEM;
5871
5872         key.objectid = BTRFS_BALANCE_OBJECTID;
5873         key.type = BTRFS_BALANCE_ITEM_KEY;
5874         key.offset = 0;
5875
5876         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5877         if (ret) {
5878                 if (ret > 0)
5879                         ret = 0;
5880                 goto out;
5881         }
5882
5883         ret = btrfs_del_item(trans, root, path);
5884         if (ret)
5885                 goto out;
5886         btrfs_release_path(path);
5887
5888         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
5889         key.type = BTRFS_ROOT_ITEM_KEY;
5890         key.offset = 0;
5891
5892         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5893         if (ret < 0)
5894                 goto out;
5895         while (1) {
5896                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5897                         if (!found)
5898                                 break;
5899
5900                         if (del_nr) {
5901                                 ret = btrfs_del_items(trans, root, path,
5902                                                       del_slot, del_nr);
5903                                 del_nr = 0;
5904                                 if (ret)
5905                                         goto out;
5906                         }
5907                         key.offset++;
5908                         btrfs_release_path(path);
5909
5910                         found = 0;
5911                         ret = btrfs_search_slot(trans, root, &key, path,
5912                                                 -1, 1);
5913                         if (ret < 0)
5914                                 goto out;
5915                         continue;
5916                 }
5917                 found = 1;
5918                 leaf = path->nodes[0];
5919                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5920                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
5921                         break;
5922                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5923                         path->slots[0]++;
5924                         continue;
5925                 }
5926                 if (!del_nr) {
5927                         del_slot = path->slots[0];
5928                         del_nr = 1;
5929                 } else {
5930                         del_nr++;
5931                 }
5932                 path->slots[0]++;
5933         }
5934
5935         if (del_nr) {
5936                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
5937                 if (ret)
5938                         goto out;
5939         }
5940         btrfs_release_path(path);
5941
5942         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5943         key.type = BTRFS_ROOT_ITEM_KEY;
5944         key.offset = (u64)-1;
5945         root = btrfs_read_fs_root(fs_info, &key);
5946         if (IS_ERR(root)) {
5947                 fprintf(stderr, "Error reading data reloc tree\n");
5948                 return PTR_ERR(root);
5949         }
5950         root->track_dirty = 1;
5951         if (root->last_trans != trans->transid) {
5952                 root->last_trans = trans->transid;
5953                 root->commit_root = root->node;
5954                 extent_buffer_get(root->node);
5955         }
5956         ret = btrfs_fsck_reinit_root(trans, root, 0);
5957 out:
5958         btrfs_free_path(path);
5959         return ret;
5960 }
5961
5962 static int reinit_extent_tree(struct btrfs_fs_info *fs_info)
5963 {
5964         struct btrfs_trans_handle *trans;
5965         u64 start = 0;
5966         int ret;
5967
5968         /*
5969          * The only reason we don't do this is because right now we're just
5970          * walking the trees we find and pinning down their bytes, we don't look
5971          * at any of the leaves.  In order to do mixed groups we'd have to check
5972          * the leaves of any fs roots and pin down the bytes for any file
5973          * extents we find.  Not hard but why do it if we don't have to?
5974          */
5975         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
5976                 fprintf(stderr, "We don't support re-initing the extent tree "
5977                         "for mixed block groups yet, please notify a btrfs "
5978                         "developer you want to do this so they can add this "
5979                         "functionality.\n");
5980                 return -EINVAL;
5981         }
5982
5983         trans = btrfs_start_transaction(fs_info->extent_root, 1);
5984         if (IS_ERR(trans)) {
5985                 fprintf(stderr, "Error starting transaction\n");
5986                 return PTR_ERR(trans);
5987         }
5988
5989         /*
5990          * first we need to walk all of the trees except the extent tree and pin
5991          * down the bytes that are in use so we don't overwrite any existing
5992          * metadata.
5993          */
5994         ret = pin_metadata_blocks(fs_info);
5995         if (ret) {
5996                 fprintf(stderr, "error pinning down used bytes\n");
5997                 return ret;
5998         }
5999
6000         /*
6001          * Need to drop all the block groups since we're going to recreate all
6002          * of them again.
6003          */
6004         btrfs_free_block_groups(fs_info);
6005         ret = reset_block_groups(fs_info);
6006         if (ret) {
6007                 fprintf(stderr, "error resetting the block groups\n");
6008                 return ret;
6009         }
6010
6011         ret = reset_balance(trans, fs_info);
6012         if (ret) {
6013                 fprintf(stderr, "error reseting the pending balance\n");
6014                 return ret;
6015         }
6016
6017         /* Ok we can allocate now, reinit the extent root */
6018         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6019         if (ret) {
6020                 fprintf(stderr, "extent root initialization failed\n");
6021                 /*
6022                  * When the transaction code is updated we should end the
6023                  * transaction, but for now progs only knows about commit so
6024                  * just return an error.
6025                  */
6026                 return ret;
6027         }
6028
6029         /*
6030          * Now we have all the in-memory block groups setup so we can make
6031          * allocations properly, and the metadata we care about is safe since we
6032          * pinned all of it above.
6033          */
6034         while (1) {
6035                 struct btrfs_block_group_cache *cache;
6036
6037                 cache = btrfs_lookup_first_block_group(fs_info, start);
6038                 if (!cache)
6039                         break;
6040                 start = cache->key.objectid + cache->key.offset;
6041                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6042                                         &cache->key, &cache->item,
6043                                         sizeof(cache->item));
6044                 if (ret) {
6045                         fprintf(stderr, "Error adding block group\n");
6046                         return ret;
6047                 }
6048                 btrfs_extent_post_op(trans, fs_info->extent_root);
6049         }
6050
6051         /*
6052          * Ok now we commit and run the normal fsck, which will add extent
6053          * entries for all of the items it finds.
6054          */
6055         return btrfs_commit_transaction(trans, fs_info->extent_root);
6056 }
6057
6058 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6059 {
6060         struct btrfs_path *path;
6061         struct btrfs_trans_handle *trans;
6062         struct btrfs_key key;
6063         int ret;
6064
6065         printf("Recowing metadata block %llu\n", eb->start);
6066         key.objectid = btrfs_header_owner(eb);
6067         key.type = BTRFS_ROOT_ITEM_KEY;
6068         key.offset = (u64)-1;
6069
6070         root = btrfs_read_fs_root(root->fs_info, &key);
6071         if (IS_ERR(root)) {
6072                 fprintf(stderr, "Couldn't find owner root %llu\n",
6073                         key.objectid);
6074                 return PTR_ERR(root);
6075         }
6076
6077         path = btrfs_alloc_path();
6078         if (!path)
6079                 return -ENOMEM;
6080
6081         trans = btrfs_start_transaction(root, 1);
6082         if (IS_ERR(trans)) {
6083                 btrfs_free_path(path);
6084                 return PTR_ERR(trans);
6085         }
6086
6087         path->lowest_level = btrfs_header_level(eb);
6088         if (path->lowest_level)
6089                 btrfs_node_key_to_cpu(eb, &key, 0);
6090         else
6091                 btrfs_item_key_to_cpu(eb, &key, 0);
6092
6093         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6094         btrfs_commit_transaction(trans, root);
6095         btrfs_free_path(path);
6096         return ret;
6097 }
6098
6099 static struct option long_options[] = {
6100         { "super", 1, NULL, 's' },
6101         { "repair", 0, NULL, 0 },
6102         { "init-csum-tree", 0, NULL, 0 },
6103         { "init-extent-tree", 0, NULL, 0 },
6104         { "backup", 0, NULL, 0 },
6105         { NULL, 0, NULL, 0}
6106 };
6107
6108 const char * const cmd_check_usage[] = {
6109         "btrfs check [options] <device>",
6110         "Check an unmounted btrfs filesystem.",
6111         "",
6112         "-s|--super <superblock>     use this superblock copy",
6113         "-b|--backup                 use the backup root copy",
6114         "--repair                    try to repair the filesystem",
6115         "--init-csum-tree            create a new CRC tree",
6116         "--init-extent-tree          create a new extent tree",
6117         NULL
6118 };
6119
6120 int cmd_check(int argc, char **argv)
6121 {
6122         struct cache_tree root_cache;
6123         struct btrfs_root *root;
6124         struct btrfs_fs_info *info;
6125         u64 bytenr = 0;
6126         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6127         int ret;
6128         int num;
6129         int option_index = 0;
6130         int init_csum_tree = 0;
6131         int init_extent_tree = 0;
6132         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_PARTIAL;
6133
6134         while(1) {
6135                 int c;
6136                 c = getopt_long(argc, argv, "as:b", long_options,
6137                                 &option_index);
6138                 if (c < 0)
6139                         break;
6140                 switch(c) {
6141                         case 'a': /* ignored */ break;
6142                         case 'b':
6143                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6144                                 break;
6145                         case 's':
6146                                 num = atol(optarg);
6147                                 bytenr = btrfs_sb_offset(num);
6148                                 printf("using SB copy %d, bytenr %llu\n", num,
6149                                        (unsigned long long)bytenr);
6150                                 break;
6151                         case '?':
6152                         case 'h':
6153                                 usage(cmd_check_usage);
6154                 }
6155                 if (option_index == 1) {
6156                         printf("enabling repair mode\n");
6157                         repair = 1;
6158                         ctree_flags |= OPEN_CTREE_WRITES;
6159                 } else if (option_index == 2) {
6160                         printf("Creating a new CRC tree\n");
6161                         init_csum_tree = 1;
6162                         repair = 1;
6163                         ctree_flags |= OPEN_CTREE_WRITES;
6164                 } else if (option_index == 3) {
6165                         init_extent_tree = 1;
6166                         ctree_flags |= (OPEN_CTREE_WRITES |
6167                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6168                         repair = 1;
6169                 }
6170
6171         }
6172         argc = argc - optind;
6173
6174         if (argc != 1)
6175                 usage(cmd_check_usage);
6176
6177         radix_tree_init();
6178         cache_tree_init(&root_cache);
6179
6180         if((ret = check_mounted(argv[optind])) < 0) {
6181                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6182                 return ret;
6183         } else if(ret) {
6184                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6185                 return -EBUSY;
6186         }
6187
6188         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6189         if (!info) {
6190                 fprintf(stderr, "Couldn't open file system\n");
6191                 return -EIO;
6192         }
6193
6194         uuid_unparse(info->super_copy->fsid, uuidbuf);
6195         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6196
6197         if (!extent_buffer_uptodate(info->tree_root->node) ||
6198             !extent_buffer_uptodate(info->dev_root->node) ||
6199             !extent_buffer_uptodate(info->chunk_root->node)) {
6200                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6201                 return -EIO;
6202         }
6203
6204         root = info->fs_root;
6205         if (init_extent_tree) {
6206                 printf("Creating a new extent tree\n");
6207                 ret = reinit_extent_tree(info);
6208                 if (ret)
6209                         return ret;
6210         }
6211         if (!extent_buffer_uptodate(info->extent_root->node)) {
6212                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6213                 return -EIO;
6214         }
6215
6216         fprintf(stderr, "checking extents\n");
6217         if (init_csum_tree) {
6218                 struct btrfs_trans_handle *trans;
6219
6220                 fprintf(stderr, "Reinit crc root\n");
6221                 trans = btrfs_start_transaction(info->csum_root, 1);
6222                 if (IS_ERR(trans)) {
6223                         fprintf(stderr, "Error starting transaction\n");
6224                         return PTR_ERR(trans);
6225                 }
6226
6227                 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6228                 if (ret) {
6229                         fprintf(stderr, "crc root initialization failed\n");
6230                         return -EIO;
6231                 }
6232
6233                 ret = btrfs_commit_transaction(trans, info->csum_root);
6234                 if (ret)
6235                         exit(1);
6236
6237                 ret = check_chunks_and_extents(root);
6238                 if (ret)
6239                         fprintf(stderr,
6240                                 "Errors found in extent allocation tree or chunk allocation\n");
6241                 goto out;
6242         }
6243         ret = check_chunks_and_extents(root);
6244         if (ret)
6245                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
6246
6247         fprintf(stderr, "checking free space cache\n");
6248         ret = check_space_cache(root);
6249         if (ret)
6250                 goto out;
6251
6252         fprintf(stderr, "checking fs roots\n");
6253         ret = check_fs_roots(root, &root_cache);
6254         if (ret)
6255                 goto out;
6256
6257         fprintf(stderr, "checking csums\n");
6258         ret = check_csums(root);
6259         if (ret)
6260                 goto out;
6261
6262         fprintf(stderr, "checking root refs\n");
6263         ret = check_root_refs(root, &root_cache);
6264         if (ret)
6265                 goto out;
6266
6267         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
6268                 struct extent_buffer *eb;
6269
6270                 eb = list_first_entry(&root->fs_info->recow_ebs,
6271                                       struct extent_buffer, recow);
6272                 ret = recow_extent_buffer(root, eb);
6273                 if (ret)
6274                         break;
6275         }
6276
6277         if (!list_empty(&root->fs_info->recow_ebs)) {
6278                 fprintf(stderr, "Transid errors in file system\n");
6279                 ret = 1;
6280         }
6281 out:
6282         free_root_recs_tree(&root_cache);
6283         close_ctree(root);
6284
6285         if (found_old_backref) { /*
6286                  * there was a disk format change when mixed
6287                  * backref was in testing tree. The old format
6288                  * existed about one week.
6289                  */
6290                 printf("\n * Found old mixed backref format. "
6291                        "The old format is not supported! *"
6292                        "\n * Please mount the FS in readonly mode, "
6293                        "backup data and re-format the FS. *\n\n");
6294                 ret = 1;
6295         }
6296         printf("found %llu bytes used err is %d\n",
6297                (unsigned long long)bytes_used, ret);
6298         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
6299         printf("total tree bytes: %llu\n",
6300                (unsigned long long)total_btree_bytes);
6301         printf("total fs tree bytes: %llu\n",
6302                (unsigned long long)total_fs_tree_bytes);
6303         printf("total extent tree bytes: %llu\n",
6304                (unsigned long long)total_extent_tree_bytes);
6305         printf("btree space waste bytes: %llu\n",
6306                (unsigned long long)btree_space_waste);
6307         printf("file data blocks allocated: %llu\n referenced %llu\n",
6308                 (unsigned long long)data_bytes_allocated,
6309                 (unsigned long long)data_bytes_referenced);
6310         printf("%s\n", BTRFS_BUILD_VERSION);
6311         return ret;
6312 }