Btrfs-progs: introduce common insert/search/delete functions for rb-tree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "kerncompat.h"
31 #include "ctree.h"
32 #include "volumes.h"
33 #include "repair.h"
34 #include "disk-io.h"
35 #include "print-tree.h"
36 #include "transaction.h"
37 #include "list.h"
38 #include "version.h"
39 #include "utils.h"
40 #include "commands.h"
41 #include "free-space-cache.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53
54 struct extent_backref {
55         struct list_head list;
56         unsigned int is_data:1;
57         unsigned int found_extent_tree:1;
58         unsigned int full_backref:1;
59         unsigned int found_ref:1;
60 };
61
62 struct data_backref {
63         struct extent_backref node;
64         union {
65                 u64 parent;
66                 u64 root;
67         };
68         u64 owner;
69         u64 offset;
70         u64 disk_bytenr;
71         u64 bytes;
72         u64 ram_bytes;
73         u32 num_refs;
74         u32 found_ref;
75 };
76
77 struct tree_backref {
78         struct extent_backref node;
79         union {
80                 u64 parent;
81                 u64 root;
82         };
83 };
84
85 struct extent_record {
86         struct list_head backrefs;
87         struct list_head dups;
88         struct list_head list;
89         struct cache_extent cache;
90         struct btrfs_disk_key parent_key;
91         unsigned int found_rec;
92         u64 start;
93         u64 max_size;
94         u64 nr;
95         u64 refs;
96         u64 extent_item_refs;
97         u64 generation;
98         u64 info_objectid;
99         u64 num_duplicates;
100         u8 info_level;
101         unsigned int content_checked:1;
102         unsigned int owner_ref_checked:1;
103         unsigned int is_root:1;
104         unsigned int metadata:1;
105 };
106
107 struct inode_backref {
108         struct list_head list;
109         unsigned int found_dir_item:1;
110         unsigned int found_dir_index:1;
111         unsigned int found_inode_ref:1;
112         unsigned int filetype:8;
113         int errors;
114         unsigned int ref_type;
115         u64 dir;
116         u64 index;
117         u16 namelen;
118         char name[0];
119 };
120
121 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
122 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
123 #define REF_ERR_NO_INODE_REF            (1 << 2)
124 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
125 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
126 #define REF_ERR_DUP_INODE_REF           (1 << 5)
127 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
128 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
129 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
130 #define REF_ERR_NO_ROOT_REF             (1 << 9)
131 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
132 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
133 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
134
135 struct inode_record {
136         struct list_head backrefs;
137         unsigned int checked:1;
138         unsigned int merging:1;
139         unsigned int found_inode_item:1;
140         unsigned int found_dir_item:1;
141         unsigned int found_file_extent:1;
142         unsigned int found_csum_item:1;
143         unsigned int some_csum_missing:1;
144         unsigned int nodatasum:1;
145         int errors;
146
147         u64 ino;
148         u32 nlink;
149         u32 imode;
150         u64 isize;
151         u64 nbytes;
152
153         u32 found_link;
154         u64 found_size;
155         u64 extent_start;
156         u64 extent_end;
157         u64 first_extent_gap;
158
159         u32 refs;
160 };
161
162 #define I_ERR_NO_INODE_ITEM             (1 << 0)
163 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
164 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
165 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
166 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
167 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
168 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
169 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
170 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
171 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
172 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
173 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
174 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
175 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
176
177 struct root_backref {
178         struct list_head list;
179         unsigned int found_dir_item:1;
180         unsigned int found_dir_index:1;
181         unsigned int found_back_ref:1;
182         unsigned int found_forward_ref:1;
183         unsigned int reachable:1;
184         int errors;
185         u64 ref_root;
186         u64 dir;
187         u64 index;
188         u16 namelen;
189         char name[0];
190 };
191
192 struct root_record {
193         struct list_head backrefs;
194         struct cache_extent cache;
195         unsigned int found_root_item:1;
196         u64 objectid;
197         u32 found_ref;
198 };
199
200 struct ptr_node {
201         struct cache_extent cache;
202         void *data;
203 };
204
205 struct shared_node {
206         struct cache_extent cache;
207         struct cache_tree root_cache;
208         struct cache_tree inode_cache;
209         struct inode_record *current;
210         u32 refs;
211 };
212
213 struct block_info {
214         u64 start;
215         u32 size;
216 };
217
218 struct walk_control {
219         struct cache_tree shared;
220         struct shared_node *nodes[BTRFS_MAX_LEVEL];
221         int active_node;
222         int root_level;
223 };
224
225 static u8 imode_to_type(u32 imode)
226 {
227 #define S_SHIFT 12
228         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
229                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
230                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
231                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
232                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
233                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
234                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
235                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
236         };
237
238         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
239 #undef S_SHIFT
240 }
241
242 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
243 {
244         struct inode_record *rec;
245         struct inode_backref *backref;
246         struct inode_backref *orig;
247         size_t size;
248
249         rec = malloc(sizeof(*rec));
250         memcpy(rec, orig_rec, sizeof(*rec));
251         rec->refs = 1;
252         INIT_LIST_HEAD(&rec->backrefs);
253
254         list_for_each_entry(orig, &orig_rec->backrefs, list) {
255                 size = sizeof(*orig) + orig->namelen + 1;
256                 backref = malloc(size);
257                 memcpy(backref, orig, size);
258                 list_add_tail(&backref->list, &rec->backrefs);
259         }
260         return rec;
261 }
262
263 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
264                                           u64 ino, int mod)
265 {
266         struct ptr_node *node;
267         struct cache_extent *cache;
268         struct inode_record *rec = NULL;
269         int ret;
270
271         cache = find_cache_extent(inode_cache, ino, 1);
272         if (cache) {
273                 node = container_of(cache, struct ptr_node, cache);
274                 rec = node->data;
275                 if (mod && rec->refs > 1) {
276                         node->data = clone_inode_rec(rec);
277                         rec->refs--;
278                         rec = node->data;
279                 }
280         } else if (mod) {
281                 rec = calloc(1, sizeof(*rec));
282                 rec->ino = ino;
283                 rec->extent_start = (u64)-1;
284                 rec->first_extent_gap = (u64)-1;
285                 rec->refs = 1;
286                 INIT_LIST_HEAD(&rec->backrefs);
287
288                 node = malloc(sizeof(*node));
289                 node->cache.start = ino;
290                 node->cache.size = 1;
291                 node->data = rec;
292
293                 if (ino == BTRFS_FREE_INO_OBJECTID)
294                         rec->found_link = 1;
295
296                 ret = insert_cache_extent(inode_cache, &node->cache);
297                 BUG_ON(ret);
298         }
299         return rec;
300 }
301
302 static void free_inode_rec(struct inode_record *rec)
303 {
304         struct inode_backref *backref;
305
306         if (--rec->refs > 0)
307                 return;
308
309         while (!list_empty(&rec->backrefs)) {
310                 backref = list_entry(rec->backrefs.next,
311                                      struct inode_backref, list);
312                 list_del(&backref->list);
313                 free(backref);
314         }
315         free(rec);
316 }
317
318 static int can_free_inode_rec(struct inode_record *rec)
319 {
320         if (!rec->errors && rec->checked && rec->found_inode_item &&
321             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
322                 return 1;
323         return 0;
324 }
325
326 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
327                                  struct inode_record *rec)
328 {
329         struct cache_extent *cache;
330         struct inode_backref *tmp, *backref;
331         struct ptr_node *node;
332         unsigned char filetype;
333
334         if (!rec->found_inode_item)
335                 return;
336
337         filetype = imode_to_type(rec->imode);
338         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
339                 if (backref->found_dir_item && backref->found_dir_index) {
340                         if (backref->filetype != filetype)
341                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
342                         if (!backref->errors && backref->found_inode_ref) {
343                                 list_del(&backref->list);
344                                 free(backref);
345                         }
346                 }
347         }
348
349         if (!rec->checked || rec->merging)
350                 return;
351
352         if (S_ISDIR(rec->imode)) {
353                 if (rec->found_size != rec->isize)
354                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
355                 if (rec->found_file_extent)
356                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
357         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
358                 if (rec->found_dir_item)
359                         rec->errors |= I_ERR_ODD_DIR_ITEM;
360                 if (rec->found_size != rec->nbytes)
361                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
362                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
363                         rec->first_extent_gap = 0;
364                 if (rec->nlink > 0 && (rec->extent_end < rec->isize ||
365                     rec->first_extent_gap < rec->isize))
366                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
367         }
368
369         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
370                 if (rec->found_csum_item && rec->nodatasum)
371                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
372                 if (rec->some_csum_missing && !rec->nodatasum)
373                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
374         }
375
376         BUG_ON(rec->refs != 1);
377         if (can_free_inode_rec(rec)) {
378                 cache = find_cache_extent(inode_cache, rec->ino, 1);
379                 node = container_of(cache, struct ptr_node, cache);
380                 BUG_ON(node->data != rec);
381                 remove_cache_extent(inode_cache, &node->cache);
382                 free(node);
383                 free_inode_rec(rec);
384         }
385 }
386
387 static int check_orphan_item(struct btrfs_root *root, u64 ino)
388 {
389         struct btrfs_path path;
390         struct btrfs_key key;
391         int ret;
392
393         key.objectid = BTRFS_ORPHAN_OBJECTID;
394         key.type = BTRFS_ORPHAN_ITEM_KEY;
395         key.offset = ino;
396
397         btrfs_init_path(&path);
398         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
399         btrfs_release_path(root, &path);
400         if (ret > 0)
401                 ret = -ENOENT;
402         return ret;
403 }
404
405 static int process_inode_item(struct extent_buffer *eb,
406                               int slot, struct btrfs_key *key,
407                               struct shared_node *active_node)
408 {
409         struct inode_record *rec;
410         struct btrfs_inode_item *item;
411
412         rec = active_node->current;
413         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
414         if (rec->found_inode_item) {
415                 rec->errors |= I_ERR_DUP_INODE_ITEM;
416                 return 1;
417         }
418         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
419         rec->nlink = btrfs_inode_nlink(eb, item);
420         rec->isize = btrfs_inode_size(eb, item);
421         rec->nbytes = btrfs_inode_nbytes(eb, item);
422         rec->imode = btrfs_inode_mode(eb, item);
423         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
424                 rec->nodatasum = 1;
425         rec->found_inode_item = 1;
426         if (rec->nlink == 0)
427                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
428         maybe_free_inode_rec(&active_node->inode_cache, rec);
429         return 0;
430 }
431
432 static struct inode_backref *get_inode_backref(struct inode_record *rec,
433                                                 const char *name,
434                                                 int namelen, u64 dir)
435 {
436         struct inode_backref *backref;
437
438         list_for_each_entry(backref, &rec->backrefs, list) {
439                 if (backref->dir != dir || backref->namelen != namelen)
440                         continue;
441                 if (memcmp(name, backref->name, namelen))
442                         continue;
443                 return backref;
444         }
445
446         backref = malloc(sizeof(*backref) + namelen + 1);
447         memset(backref, 0, sizeof(*backref));
448         backref->dir = dir;
449         backref->namelen = namelen;
450         memcpy(backref->name, name, namelen);
451         backref->name[namelen] = '\0';
452         list_add_tail(&backref->list, &rec->backrefs);
453         return backref;
454 }
455
456 static int add_inode_backref(struct cache_tree *inode_cache,
457                              u64 ino, u64 dir, u64 index,
458                              const char *name, int namelen,
459                              int filetype, int itemtype, int errors)
460 {
461         struct inode_record *rec;
462         struct inode_backref *backref;
463
464         rec = get_inode_rec(inode_cache, ino, 1);
465         backref = get_inode_backref(rec, name, namelen, dir);
466         if (errors)
467                 backref->errors |= errors;
468         if (itemtype == BTRFS_DIR_INDEX_KEY) {
469                 if (backref->found_dir_index)
470                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
471                 if (backref->found_inode_ref && backref->index != index)
472                         backref->errors |= REF_ERR_INDEX_UNMATCH;
473                 if (backref->found_dir_item && backref->filetype != filetype)
474                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
475
476                 backref->index = index;
477                 backref->filetype = filetype;
478                 backref->found_dir_index = 1;
479         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
480                 rec->found_link++;
481                 if (backref->found_dir_item)
482                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
483                 if (backref->found_dir_index && backref->filetype != filetype)
484                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
485
486                 backref->filetype = filetype;
487                 backref->found_dir_item = 1;
488         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
489                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
490                 if (backref->found_inode_ref)
491                         backref->errors |= REF_ERR_DUP_INODE_REF;
492                 if (backref->found_dir_index && backref->index != index)
493                         backref->errors |= REF_ERR_INDEX_UNMATCH;
494
495                 backref->ref_type = itemtype;
496                 backref->index = index;
497                 backref->found_inode_ref = 1;
498         } else {
499                 BUG_ON(1);
500         }
501
502         maybe_free_inode_rec(inode_cache, rec);
503         return 0;
504 }
505
506 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
507                             struct cache_tree *dst_cache)
508 {
509         struct inode_backref *backref;
510         u32 dir_count = 0;
511
512         dst->merging = 1;
513         list_for_each_entry(backref, &src->backrefs, list) {
514                 if (backref->found_dir_index) {
515                         add_inode_backref(dst_cache, dst->ino, backref->dir,
516                                         backref->index, backref->name,
517                                         backref->namelen, backref->filetype,
518                                         BTRFS_DIR_INDEX_KEY, backref->errors);
519                 }
520                 if (backref->found_dir_item) {
521                         dir_count++;
522                         add_inode_backref(dst_cache, dst->ino,
523                                         backref->dir, 0, backref->name,
524                                         backref->namelen, backref->filetype,
525                                         BTRFS_DIR_ITEM_KEY, backref->errors);
526                 }
527                 if (backref->found_inode_ref) {
528                         add_inode_backref(dst_cache, dst->ino,
529                                         backref->dir, backref->index,
530                                         backref->name, backref->namelen, 0,
531                                         backref->ref_type, backref->errors);
532                 }
533         }
534
535         if (src->found_dir_item)
536                 dst->found_dir_item = 1;
537         if (src->found_file_extent)
538                 dst->found_file_extent = 1;
539         if (src->found_csum_item)
540                 dst->found_csum_item = 1;
541         if (src->some_csum_missing)
542                 dst->some_csum_missing = 1;
543         if (dst->first_extent_gap > src->first_extent_gap)
544                 dst->first_extent_gap = src->first_extent_gap;
545
546         BUG_ON(src->found_link < dir_count);
547         dst->found_link += src->found_link - dir_count;
548         dst->found_size += src->found_size;
549         if (src->extent_start != (u64)-1) {
550                 if (dst->extent_start == (u64)-1) {
551                         dst->extent_start = src->extent_start;
552                         dst->extent_end = src->extent_end;
553                 } else {
554                         if (dst->extent_end > src->extent_start)
555                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
556                         else if (dst->extent_end < src->extent_start &&
557                                  dst->extent_end < dst->first_extent_gap)
558                                 dst->first_extent_gap = dst->extent_end;
559                         if (dst->extent_end < src->extent_end)
560                                 dst->extent_end = src->extent_end;
561                 }
562         }
563
564         dst->errors |= src->errors;
565         if (src->found_inode_item) {
566                 if (!dst->found_inode_item) {
567                         dst->nlink = src->nlink;
568                         dst->isize = src->isize;
569                         dst->nbytes = src->nbytes;
570                         dst->imode = src->imode;
571                         dst->nodatasum = src->nodatasum;
572                         dst->found_inode_item = 1;
573                 } else {
574                         dst->errors |= I_ERR_DUP_INODE_ITEM;
575                 }
576         }
577         dst->merging = 0;
578
579         return 0;
580 }
581
582 static int splice_shared_node(struct shared_node *src_node,
583                               struct shared_node *dst_node)
584 {
585         struct cache_extent *cache;
586         struct ptr_node *node, *ins;
587         struct cache_tree *src, *dst;
588         struct inode_record *rec, *conflict;
589         u64 current_ino = 0;
590         int splice = 0;
591         int ret;
592
593         if (--src_node->refs == 0)
594                 splice = 1;
595         if (src_node->current)
596                 current_ino = src_node->current->ino;
597
598         src = &src_node->root_cache;
599         dst = &dst_node->root_cache;
600 again:
601         cache = find_first_cache_extent(src, 0);
602         while (cache) {
603                 node = container_of(cache, struct ptr_node, cache);
604                 rec = node->data;
605                 cache = next_cache_extent(cache);
606
607                 if (splice) {
608                         remove_cache_extent(src, &node->cache);
609                         ins = node;
610                 } else {
611                         ins = malloc(sizeof(*ins));
612                         ins->cache.start = node->cache.start;
613                         ins->cache.size = node->cache.size;
614                         ins->data = rec;
615                         rec->refs++;
616                 }
617                 ret = insert_cache_extent(dst, &ins->cache);
618                 if (ret == -EEXIST) {
619                         conflict = get_inode_rec(dst, rec->ino, 1);
620                         merge_inode_recs(rec, conflict, dst);
621                         if (rec->checked) {
622                                 conflict->checked = 1;
623                                 if (dst_node->current == conflict)
624                                         dst_node->current = NULL;
625                         }
626                         maybe_free_inode_rec(dst, conflict);
627                         free_inode_rec(rec);
628                         free(ins);
629                 } else {
630                         BUG_ON(ret);
631                 }
632         }
633
634         if (src == &src_node->root_cache) {
635                 src = &src_node->inode_cache;
636                 dst = &dst_node->inode_cache;
637                 goto again;
638         }
639
640         if (current_ino > 0 && (!dst_node->current ||
641             current_ino > dst_node->current->ino)) {
642                 if (dst_node->current) {
643                         dst_node->current->checked = 1;
644                         maybe_free_inode_rec(dst, dst_node->current);
645                 }
646                 dst_node->current = get_inode_rec(dst, current_ino, 1);
647         }
648         return 0;
649 }
650
651 static void free_inode_ptr(struct cache_extent *cache)
652 {
653         struct ptr_node *node;
654         struct inode_record *rec;
655
656         node = container_of(cache, struct ptr_node, cache);
657         rec = node->data;
658         free_inode_rec(rec);
659         free(node);
660 }
661
662 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
663
664 static struct shared_node *find_shared_node(struct cache_tree *shared,
665                                             u64 bytenr)
666 {
667         struct cache_extent *cache;
668         struct shared_node *node;
669
670         cache = find_cache_extent(shared, bytenr, 1);
671         if (cache) {
672                 node = container_of(cache, struct shared_node, cache);
673                 return node;
674         }
675         return NULL;
676 }
677
678 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
679 {
680         int ret;
681         struct shared_node *node;
682
683         node = calloc(1, sizeof(*node));
684         node->cache.start = bytenr;
685         node->cache.size = 1;
686         cache_tree_init(&node->root_cache);
687         cache_tree_init(&node->inode_cache);
688         node->refs = refs;
689
690         ret = insert_cache_extent(shared, &node->cache);
691         BUG_ON(ret);
692         return 0;
693 }
694
695 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
696                              struct walk_control *wc, int level)
697 {
698         struct shared_node *node;
699         struct shared_node *dest;
700
701         if (level == wc->active_node)
702                 return 0;
703
704         BUG_ON(wc->active_node <= level);
705         node = find_shared_node(&wc->shared, bytenr);
706         if (!node) {
707                 add_shared_node(&wc->shared, bytenr, refs);
708                 node = find_shared_node(&wc->shared, bytenr);
709                 wc->nodes[level] = node;
710                 wc->active_node = level;
711                 return 0;
712         }
713
714         if (wc->root_level == wc->active_node &&
715             btrfs_root_refs(&root->root_item) == 0) {
716                 if (--node->refs == 0) {
717                         free_inode_recs_tree(&node->root_cache);
718                         free_inode_recs_tree(&node->inode_cache);
719                         remove_cache_extent(&wc->shared, &node->cache);
720                         free(node);
721                 }
722                 return 1;
723         }
724
725         dest = wc->nodes[wc->active_node];
726         splice_shared_node(node, dest);
727         if (node->refs == 0) {
728                 remove_cache_extent(&wc->shared, &node->cache);
729                 free(node);
730         }
731         return 1;
732 }
733
734 static int leave_shared_node(struct btrfs_root *root,
735                              struct walk_control *wc, int level)
736 {
737         struct shared_node *node;
738         struct shared_node *dest;
739         int i;
740
741         if (level == wc->root_level)
742                 return 0;
743
744         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
745                 if (wc->nodes[i])
746                         break;
747         }
748         BUG_ON(i >= BTRFS_MAX_LEVEL);
749
750         node = wc->nodes[wc->active_node];
751         wc->nodes[wc->active_node] = NULL;
752         wc->active_node = i;
753
754         dest = wc->nodes[wc->active_node];
755         if (wc->active_node < wc->root_level ||
756             btrfs_root_refs(&root->root_item) > 0) {
757                 BUG_ON(node->refs <= 1);
758                 splice_shared_node(node, dest);
759         } else {
760                 BUG_ON(node->refs < 2);
761                 node->refs--;
762         }
763         return 0;
764 }
765
766 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
767                          u64 child_root_id)
768 {
769         struct btrfs_path path;
770         struct btrfs_key key;
771         struct extent_buffer *leaf;
772         int has_parent = 0;
773         int ret;
774
775         btrfs_init_path(&path);
776
777         key.objectid = parent_root_id;
778         key.type = BTRFS_ROOT_REF_KEY;
779         key.offset = child_root_id;
780         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
781                                 0, 0);
782         BUG_ON(ret < 0);
783         btrfs_release_path(root, &path);
784         if (!ret)
785                 return 1;
786
787         key.objectid = child_root_id;
788         key.type = BTRFS_ROOT_BACKREF_KEY;
789         key.offset = 0;
790         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
791                                 0, 0);
792         BUG_ON(ret <= 0);
793
794         while (1) {
795                 leaf = path.nodes[0];
796                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
797                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
798                         BUG_ON(ret < 0);
799
800                         if (ret > 0)
801                                 break;
802                         leaf = path.nodes[0];
803                 }
804
805                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
806                 if (key.objectid != child_root_id ||
807                     key.type != BTRFS_ROOT_BACKREF_KEY)
808                         break;
809
810                 has_parent = 1;
811
812                 if (key.offset == parent_root_id) {
813                         btrfs_release_path(root, &path);
814                         return 1;
815                 }
816
817                 path.slots[0]++;
818         }
819
820         btrfs_release_path(root, &path);
821         return has_parent? 0 : -1;
822 }
823
824 static int process_dir_item(struct btrfs_root *root,
825                             struct extent_buffer *eb,
826                             int slot, struct btrfs_key *key,
827                             struct shared_node *active_node)
828 {
829         u32 total;
830         u32 cur = 0;
831         u32 len;
832         u32 name_len;
833         u32 data_len;
834         int error;
835         int nritems = 0;
836         int filetype;
837         struct btrfs_dir_item *di;
838         struct inode_record *rec;
839         struct cache_tree *root_cache;
840         struct cache_tree *inode_cache;
841         struct btrfs_key location;
842         char namebuf[BTRFS_NAME_LEN];
843
844         root_cache = &active_node->root_cache;
845         inode_cache = &active_node->inode_cache;
846         rec = active_node->current;
847         rec->found_dir_item = 1;
848
849         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
850         total = btrfs_item_size_nr(eb, slot);
851         while (cur < total) {
852                 nritems++;
853                 btrfs_dir_item_key_to_cpu(eb, di, &location);
854                 name_len = btrfs_dir_name_len(eb, di);
855                 data_len = btrfs_dir_data_len(eb, di);
856                 filetype = btrfs_dir_type(eb, di);
857
858                 rec->found_size += name_len;
859                 if (name_len <= BTRFS_NAME_LEN) {
860                         len = name_len;
861                         error = 0;
862                 } else {
863                         len = BTRFS_NAME_LEN;
864                         error = REF_ERR_NAME_TOO_LONG;
865                 }
866                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
867
868                 if (location.type == BTRFS_INODE_ITEM_KEY) {
869                         add_inode_backref(inode_cache, location.objectid,
870                                           key->objectid, key->offset, namebuf,
871                                           len, filetype, key->type, error);
872                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
873                         add_inode_backref(root_cache, location.objectid,
874                                           key->objectid, key->offset,
875                                           namebuf, len, filetype,
876                                           key->type, error);
877                 } else {
878                         fprintf(stderr, "warning line %d\n", __LINE__);
879                 }
880
881                 len = sizeof(*di) + name_len + data_len;
882                 di = (struct btrfs_dir_item *)((char *)di + len);
883                 cur += len;
884         }
885         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
886                 rec->errors |= I_ERR_DUP_DIR_INDEX;
887
888         return 0;
889 }
890
891 static int process_inode_ref(struct extent_buffer *eb,
892                              int slot, struct btrfs_key *key,
893                              struct shared_node *active_node)
894 {
895         u32 total;
896         u32 cur = 0;
897         u32 len;
898         u32 name_len;
899         u64 index;
900         int error;
901         struct cache_tree *inode_cache;
902         struct btrfs_inode_ref *ref;
903         char namebuf[BTRFS_NAME_LEN];
904
905         inode_cache = &active_node->inode_cache;
906
907         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
908         total = btrfs_item_size_nr(eb, slot);
909         while (cur < total) {
910                 name_len = btrfs_inode_ref_name_len(eb, ref);
911                 index = btrfs_inode_ref_index(eb, ref);
912                 if (name_len <= BTRFS_NAME_LEN) {
913                         len = name_len;
914                         error = 0;
915                 } else {
916                         len = BTRFS_NAME_LEN;
917                         error = REF_ERR_NAME_TOO_LONG;
918                 }
919                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
920                 add_inode_backref(inode_cache, key->objectid, key->offset,
921                                   index, namebuf, len, 0, key->type, error);
922
923                 len = sizeof(*ref) + name_len;
924                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
925                 cur += len;
926         }
927         return 0;
928 }
929
930 static int process_inode_extref(struct extent_buffer *eb,
931                                 int slot, struct btrfs_key *key,
932                                 struct shared_node *active_node)
933 {
934         u32 total;
935         u32 cur = 0;
936         u32 len;
937         u32 name_len;
938         u64 index;
939         u64 parent;
940         int error;
941         struct cache_tree *inode_cache;
942         struct btrfs_inode_extref *extref;
943         char namebuf[BTRFS_NAME_LEN];
944
945         inode_cache = &active_node->inode_cache;
946
947         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
948         total = btrfs_item_size_nr(eb, slot);
949         while (cur < total) {
950                 name_len = btrfs_inode_extref_name_len(eb, extref);
951                 index = btrfs_inode_extref_index(eb, extref);
952                 parent = btrfs_inode_extref_parent(eb, extref);
953                 if (name_len <= BTRFS_NAME_LEN) {
954                         len = name_len;
955                         error = 0;
956                 } else {
957                         len = BTRFS_NAME_LEN;
958                         error = REF_ERR_NAME_TOO_LONG;
959                 }
960                 read_extent_buffer(eb, namebuf,
961                                    (unsigned long)(extref + 1), len);
962                 add_inode_backref(inode_cache, key->objectid, parent,
963                                   index, namebuf, len, 0, key->type, error);
964
965                 len = sizeof(*extref) + name_len;
966                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
967                 cur += len;
968         }
969         return 0;
970
971 }
972
973 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
974 {
975         struct btrfs_key key;
976         struct btrfs_path path;
977         struct extent_buffer *leaf;
978         int ret ;
979         size_t size;
980         u64 found = 0;
981         u64 csum_end;
982         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
983
984         btrfs_init_path(&path);
985
986         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
987         key.offset = start;
988         key.type = BTRFS_EXTENT_CSUM_KEY;
989
990         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
991                                 &key, &path, 0, 0);
992         BUG_ON(ret < 0);
993         if (ret > 0 && path.slots[0] > 0) {
994                 leaf = path.nodes[0];
995                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
996                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
997                     key.type == BTRFS_EXTENT_CSUM_KEY)
998                         path.slots[0]--;
999         }
1000
1001         while (len > 0) {
1002                 leaf = path.nodes[0];
1003                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1004                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1005                         BUG_ON(ret < 0);
1006                         if (ret > 0)
1007                                 break;
1008                         leaf = path.nodes[0];
1009                 }
1010
1011                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1012                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1013                     key.type != BTRFS_EXTENT_CSUM_KEY)
1014                         break;
1015
1016                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1017                 if (key.offset >= start + len)
1018                         break;
1019
1020                 if (key.offset > start)
1021                         start = key.offset;
1022
1023                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1024                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1025                 if (csum_end > start) {
1026                         size = min(csum_end - start, len);
1027                         len -= size;
1028                         start += size;
1029                         found += size;
1030                 }
1031
1032                 path.slots[0]++;
1033         }
1034         btrfs_release_path(root->fs_info->csum_root, &path);
1035         return found;
1036 }
1037
1038 static int process_file_extent(struct btrfs_root *root,
1039                                 struct extent_buffer *eb,
1040                                 int slot, struct btrfs_key *key,
1041                                 struct shared_node *active_node)
1042 {
1043         struct inode_record *rec;
1044         struct btrfs_file_extent_item *fi;
1045         u64 num_bytes = 0;
1046         u64 disk_bytenr = 0;
1047         u64 extent_offset = 0;
1048         u64 mask = root->sectorsize - 1;
1049         int extent_type;
1050
1051         rec = active_node->current;
1052         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1053         rec->found_file_extent = 1;
1054
1055         if (rec->extent_start == (u64)-1) {
1056                 rec->extent_start = key->offset;
1057                 rec->extent_end = key->offset;
1058         }
1059
1060         if (rec->extent_end > key->offset)
1061                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1062         else if (rec->extent_end < key->offset &&
1063                  rec->extent_end < rec->first_extent_gap)
1064                 rec->first_extent_gap = rec->extent_end;
1065
1066         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1067         extent_type = btrfs_file_extent_type(eb, fi);
1068
1069         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1070                 num_bytes = btrfs_file_extent_inline_len(eb, fi);
1071                 if (num_bytes == 0)
1072                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1073                 rec->found_size += num_bytes;
1074                 num_bytes = (num_bytes + mask) & ~mask;
1075         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1076                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1077                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1078                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1079                 extent_offset = btrfs_file_extent_offset(eb, fi);
1080                 if (num_bytes == 0 || (num_bytes & mask))
1081                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1082                 if (num_bytes + extent_offset >
1083                     btrfs_file_extent_ram_bytes(eb, fi))
1084                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1085                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1086                     (btrfs_file_extent_compression(eb, fi) ||
1087                      btrfs_file_extent_encryption(eb, fi) ||
1088                      btrfs_file_extent_other_encoding(eb, fi)))
1089                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1090                 if (disk_bytenr > 0)
1091                         rec->found_size += num_bytes;
1092         } else {
1093                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1094         }
1095         rec->extent_end = key->offset + num_bytes;
1096
1097         if (disk_bytenr > 0) {
1098                 u64 found;
1099                 if (btrfs_file_extent_compression(eb, fi))
1100                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1101                 else
1102                         disk_bytenr += extent_offset;
1103
1104                 found = count_csum_range(root, disk_bytenr, num_bytes);
1105                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1106                         if (found > 0)
1107                                 rec->found_csum_item = 1;
1108                         if (found < num_bytes)
1109                                 rec->some_csum_missing = 1;
1110                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1111                         if (found > 0)
1112                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1113                 }
1114         }
1115         return 0;
1116 }
1117
1118 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1119                             struct walk_control *wc)
1120 {
1121         struct btrfs_key key;
1122         u32 nritems;
1123         int i;
1124         int ret = 0;
1125         struct cache_tree *inode_cache;
1126         struct shared_node *active_node;
1127
1128         if (wc->root_level == wc->active_node &&
1129             btrfs_root_refs(&root->root_item) == 0)
1130                 return 0;
1131
1132         active_node = wc->nodes[wc->active_node];
1133         inode_cache = &active_node->inode_cache;
1134         nritems = btrfs_header_nritems(eb);
1135         for (i = 0; i < nritems; i++) {
1136                 btrfs_item_key_to_cpu(eb, &key, i);
1137
1138                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1139                         continue;
1140
1141                 if (active_node->current == NULL ||
1142                     active_node->current->ino < key.objectid) {
1143                         if (active_node->current) {
1144                                 active_node->current->checked = 1;
1145                                 maybe_free_inode_rec(inode_cache,
1146                                                      active_node->current);
1147                         }
1148                         active_node->current = get_inode_rec(inode_cache,
1149                                                              key.objectid, 1);
1150                 }
1151                 switch (key.type) {
1152                 case BTRFS_DIR_ITEM_KEY:
1153                 case BTRFS_DIR_INDEX_KEY:
1154                         ret = process_dir_item(root, eb, i, &key, active_node);
1155                         break;
1156                 case BTRFS_INODE_REF_KEY:
1157                         ret = process_inode_ref(eb, i, &key, active_node);
1158                         break;
1159                 case BTRFS_INODE_EXTREF_KEY:
1160                         ret = process_inode_extref(eb, i, &key, active_node);
1161                         break;
1162                 case BTRFS_INODE_ITEM_KEY:
1163                         ret = process_inode_item(eb, i, &key, active_node);
1164                         break;
1165                 case BTRFS_EXTENT_DATA_KEY:
1166                         ret = process_file_extent(root, eb, i, &key,
1167                                                   active_node);
1168                         break;
1169                 default:
1170                         break;
1171                 };
1172         }
1173         return ret;
1174 }
1175
1176 static void reada_walk_down(struct btrfs_root *root,
1177                             struct extent_buffer *node, int slot)
1178 {
1179         u64 bytenr;
1180         u64 ptr_gen;
1181         u32 nritems;
1182         u32 blocksize;
1183         int i;
1184         int ret;
1185         int level;
1186
1187         level = btrfs_header_level(node);
1188         if (level != 1)
1189                 return;
1190
1191         nritems = btrfs_header_nritems(node);
1192         blocksize = btrfs_level_size(root, level - 1);
1193         for (i = slot; i < nritems; i++) {
1194                 bytenr = btrfs_node_blockptr(node, i);
1195                 ptr_gen = btrfs_node_ptr_generation(node, i);
1196                 ret = readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1197                 if (ret)
1198                         break;
1199         }
1200 }
1201
1202 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1203                           struct walk_control *wc, int *level)
1204 {
1205         u64 bytenr;
1206         u64 ptr_gen;
1207         struct extent_buffer *next;
1208         struct extent_buffer *cur;
1209         u32 blocksize;
1210         int ret;
1211         u64 refs;
1212
1213         WARN_ON(*level < 0);
1214         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1215         ret = btrfs_lookup_extent_info(NULL, root,
1216                                        path->nodes[*level]->start,
1217                                        *level, 1, &refs, NULL);
1218         if (ret < 0)
1219                 goto out;
1220
1221         if (refs > 1) {
1222                 ret = enter_shared_node(root, path->nodes[*level]->start,
1223                                         refs, wc, *level);
1224                 if (ret > 0)
1225                         goto out;
1226         }
1227
1228         while (*level >= 0) {
1229                 WARN_ON(*level < 0);
1230                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1231                 cur = path->nodes[*level];
1232
1233                 if (btrfs_header_level(cur) != *level)
1234                         WARN_ON(1);
1235
1236                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1237                         break;
1238                 if (*level == 0) {
1239                         ret = process_one_leaf(root, cur, wc);
1240                         break;
1241                 }
1242                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1243                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1244                 blocksize = btrfs_level_size(root, *level - 1);
1245                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1246                                                1, &refs, NULL);
1247                 if (ret < 0)
1248                         refs = 0;
1249
1250                 if (refs > 1) {
1251                         ret = enter_shared_node(root, bytenr, refs,
1252                                                 wc, *level - 1);
1253                         if (ret > 0) {
1254                                 path->slots[*level]++;
1255                                 continue;
1256                         }
1257                 }
1258
1259                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1260                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1261                         free_extent_buffer(next);
1262                         reada_walk_down(root, cur, path->slots[*level]);
1263                         next = read_tree_block(root, bytenr, blocksize,
1264                                                ptr_gen);
1265                 }
1266
1267                 *level = *level - 1;
1268                 free_extent_buffer(path->nodes[*level]);
1269                 path->nodes[*level] = next;
1270                 path->slots[*level] = 0;
1271         }
1272 out:
1273         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1274         return 0;
1275 }
1276
1277 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1278                         struct walk_control *wc, int *level)
1279 {
1280         int i;
1281         struct extent_buffer *leaf;
1282
1283         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1284                 leaf = path->nodes[i];
1285                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1286                         path->slots[i]++;
1287                         *level = i;
1288                         return 0;
1289                 } else {
1290                         free_extent_buffer(path->nodes[*level]);
1291                         path->nodes[*level] = NULL;
1292                         BUG_ON(*level > wc->active_node);
1293                         if (*level == wc->active_node)
1294                                 leave_shared_node(root, wc, *level);
1295                         *level = i + 1;
1296                 }
1297         }
1298         return 1;
1299 }
1300
1301 static int check_root_dir(struct inode_record *rec)
1302 {
1303         struct inode_backref *backref;
1304         int ret = -1;
1305
1306         if (!rec->found_inode_item || rec->errors)
1307                 goto out;
1308         if (rec->nlink != 1 || rec->found_link != 0)
1309                 goto out;
1310         if (list_empty(&rec->backrefs))
1311                 goto out;
1312         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1313         if (!backref->found_inode_ref)
1314                 goto out;
1315         if (backref->index != 0 || backref->namelen != 2 ||
1316             memcmp(backref->name, "..", 2))
1317                 goto out;
1318         if (backref->found_dir_index || backref->found_dir_item)
1319                 goto out;
1320         ret = 0;
1321 out:
1322         return ret;
1323 }
1324
1325 static int check_inode_recs(struct btrfs_root *root,
1326                             struct cache_tree *inode_cache)
1327 {
1328         struct cache_extent *cache;
1329         struct ptr_node *node;
1330         struct inode_record *rec;
1331         struct inode_backref *backref;
1332         int ret;
1333         u64 error = 0;
1334         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1335
1336         if (btrfs_root_refs(&root->root_item) == 0) {
1337                 if (!cache_tree_empty(inode_cache))
1338                         fprintf(stderr, "warning line %d\n", __LINE__);
1339                 return 0;
1340         }
1341
1342         rec = get_inode_rec(inode_cache, root_dirid, 0);
1343         if (rec) {
1344                 ret = check_root_dir(rec);
1345                 if (ret) {
1346                         fprintf(stderr, "root %llu root dir %llu error\n",
1347                                 (unsigned long long)root->root_key.objectid,
1348                                 (unsigned long long)root_dirid);
1349                         error++;
1350                 }
1351         } else {
1352                 fprintf(stderr, "root %llu root dir %llu not found\n",
1353                         (unsigned long long)root->root_key.objectid,
1354                         (unsigned long long)root_dirid);
1355         }
1356
1357         while (1) {
1358                 cache = find_first_cache_extent(inode_cache, 0);
1359                 if (!cache)
1360                         break;
1361                 node = container_of(cache, struct ptr_node, cache);
1362                 rec = node->data;
1363                 remove_cache_extent(inode_cache, &node->cache);
1364                 free(node);
1365                 if (rec->ino == root_dirid ||
1366                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1367                         free_inode_rec(rec);
1368                         continue;
1369                 }
1370
1371                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1372                         ret = check_orphan_item(root, rec->ino);
1373                         if (ret == 0)
1374                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1375                         if (can_free_inode_rec(rec)) {
1376                                 free_inode_rec(rec);
1377                                 continue;
1378                         }
1379                 }
1380
1381                 error++;
1382                 if (!rec->found_inode_item)
1383                         rec->errors |= I_ERR_NO_INODE_ITEM;
1384                 if (rec->found_link != rec->nlink)
1385                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1386                 fprintf(stderr, "root %llu inode %llu errors %x\n",
1387                         (unsigned long long) root->root_key.objectid,
1388                         (unsigned long long) rec->ino, rec->errors);
1389                 list_for_each_entry(backref, &rec->backrefs, list) {
1390                         if (!backref->found_dir_item)
1391                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1392                         if (!backref->found_dir_index)
1393                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1394                         if (!backref->found_inode_ref)
1395                                 backref->errors |= REF_ERR_NO_INODE_REF;
1396                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1397                                 " namelen %u name %s filetype %d error %x\n",
1398                                 (unsigned long long)backref->dir,
1399                                 (unsigned long long)backref->index,
1400                                 backref->namelen, backref->name,
1401                                 backref->filetype, backref->errors);
1402                 }
1403                 free_inode_rec(rec);
1404         }
1405         return (error > 0) ? -1 : 0;
1406 }
1407
1408 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1409                                         u64 objectid)
1410 {
1411         struct cache_extent *cache;
1412         struct root_record *rec = NULL;
1413         int ret;
1414
1415         cache = find_cache_extent(root_cache, objectid, 1);
1416         if (cache) {
1417                 rec = container_of(cache, struct root_record, cache);
1418         } else {
1419                 rec = calloc(1, sizeof(*rec));
1420                 rec->objectid = objectid;
1421                 INIT_LIST_HEAD(&rec->backrefs);
1422                 rec->cache.start = objectid;
1423                 rec->cache.size = 1;
1424
1425                 ret = insert_cache_extent(root_cache, &rec->cache);
1426                 BUG_ON(ret);
1427         }
1428         return rec;
1429 }
1430
1431 static struct root_backref *get_root_backref(struct root_record *rec,
1432                                              u64 ref_root, u64 dir, u64 index,
1433                                              const char *name, int namelen)
1434 {
1435         struct root_backref *backref;
1436
1437         list_for_each_entry(backref, &rec->backrefs, list) {
1438                 if (backref->ref_root != ref_root || backref->dir != dir ||
1439                     backref->namelen != namelen)
1440                         continue;
1441                 if (memcmp(name, backref->name, namelen))
1442                         continue;
1443                 return backref;
1444         }
1445
1446         backref = malloc(sizeof(*backref) + namelen + 1);
1447         memset(backref, 0, sizeof(*backref));
1448         backref->ref_root = ref_root;
1449         backref->dir = dir;
1450         backref->index = index;
1451         backref->namelen = namelen;
1452         memcpy(backref->name, name, namelen);
1453         backref->name[namelen] = '\0';
1454         list_add_tail(&backref->list, &rec->backrefs);
1455         return backref;
1456 }
1457
1458 static void free_root_record(struct cache_extent *cache)
1459 {
1460         struct root_record *rec;
1461         struct root_backref *backref;
1462
1463         rec = container_of(cache, struct root_record, cache);
1464         while (!list_empty(&rec->backrefs)) {
1465                 backref = list_entry(rec->backrefs.next,
1466                                      struct root_backref, list);
1467                 list_del(&backref->list);
1468                 free(backref);
1469         }
1470
1471         kfree(rec);
1472 }
1473
1474 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1475
1476 static int add_root_backref(struct cache_tree *root_cache,
1477                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1478                             const char *name, int namelen,
1479                             int item_type, int errors)
1480 {
1481         struct root_record *rec;
1482         struct root_backref *backref;
1483
1484         rec = get_root_rec(root_cache, root_id);
1485         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1486
1487         backref->errors |= errors;
1488
1489         if (item_type != BTRFS_DIR_ITEM_KEY) {
1490                 if (backref->found_dir_index || backref->found_back_ref ||
1491                     backref->found_forward_ref) {
1492                         if (backref->index != index)
1493                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1494                 } else {
1495                         backref->index = index;
1496                 }
1497         }
1498
1499         if (item_type == BTRFS_DIR_ITEM_KEY) {
1500                 if (backref->found_forward_ref)
1501                         rec->found_ref++;
1502                 backref->found_dir_item = 1;
1503         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1504                 backref->found_dir_index = 1;
1505         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1506                 if (backref->found_forward_ref)
1507                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1508                 else if (backref->found_dir_item)
1509                         rec->found_ref++;
1510                 backref->found_forward_ref = 1;
1511         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1512                 if (backref->found_back_ref)
1513                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1514                 backref->found_back_ref = 1;
1515         } else {
1516                 BUG_ON(1);
1517         }
1518
1519         if (backref->found_forward_ref && backref->found_dir_item)
1520                 backref->reachable = 1;
1521         return 0;
1522 }
1523
1524 static int merge_root_recs(struct btrfs_root *root,
1525                            struct cache_tree *src_cache,
1526                            struct cache_tree *dst_cache)
1527 {
1528         struct cache_extent *cache;
1529         struct ptr_node *node;
1530         struct inode_record *rec;
1531         struct inode_backref *backref;
1532
1533         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1534                 free_inode_recs_tree(src_cache);
1535                 return 0;
1536         }
1537
1538         while (1) {
1539                 cache = find_first_cache_extent(src_cache, 0);
1540                 if (!cache)
1541                         break;
1542                 node = container_of(cache, struct ptr_node, cache);
1543                 rec = node->data;
1544                 remove_cache_extent(src_cache, &node->cache);
1545                 free(node);
1546
1547                 if (!is_child_root(root, root->objectid, rec->ino))
1548                         goto skip;
1549
1550                 list_for_each_entry(backref, &rec->backrefs, list) {
1551                         BUG_ON(backref->found_inode_ref);
1552                         if (backref->found_dir_item)
1553                                 add_root_backref(dst_cache, rec->ino,
1554                                         root->root_key.objectid, backref->dir,
1555                                         backref->index, backref->name,
1556                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1557                                         backref->errors);
1558                         if (backref->found_dir_index)
1559                                 add_root_backref(dst_cache, rec->ino,
1560                                         root->root_key.objectid, backref->dir,
1561                                         backref->index, backref->name,
1562                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1563                                         backref->errors);
1564                 }
1565 skip:
1566                 free_inode_rec(rec);
1567         }
1568         return 0;
1569 }
1570
1571 static int check_root_refs(struct btrfs_root *root,
1572                            struct cache_tree *root_cache)
1573 {
1574         struct root_record *rec;
1575         struct root_record *ref_root;
1576         struct root_backref *backref;
1577         struct cache_extent *cache;
1578         int loop = 1;
1579         int ret;
1580         int error;
1581         int errors = 0;
1582
1583         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1584         rec->found_ref = 1;
1585
1586         /* fixme: this can not detect circular references */
1587         while (loop) {
1588                 loop = 0;
1589                 cache = find_first_cache_extent(root_cache, 0);
1590                 while (1) {
1591                         if (!cache)
1592                                 break;
1593                         rec = container_of(cache, struct root_record, cache);
1594                         cache = next_cache_extent(cache);
1595
1596                         if (rec->found_ref == 0)
1597                                 continue;
1598
1599                         list_for_each_entry(backref, &rec->backrefs, list) {
1600                                 if (!backref->reachable)
1601                                         continue;
1602
1603                                 ref_root = get_root_rec(root_cache,
1604                                                         backref->ref_root);
1605                                 if (ref_root->found_ref > 0)
1606                                         continue;
1607
1608                                 backref->reachable = 0;
1609                                 rec->found_ref--;
1610                                 if (rec->found_ref == 0)
1611                                         loop = 1;
1612                         }
1613                 }
1614         }
1615
1616         cache = find_first_cache_extent(root_cache, 0);
1617         while (1) {
1618                 if (!cache)
1619                         break;
1620                 rec = container_of(cache, struct root_record, cache);
1621                 cache = next_cache_extent(cache);
1622
1623                 if (rec->found_ref == 0 &&
1624                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1625                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1626                         ret = check_orphan_item(root->fs_info->tree_root,
1627                                                 rec->objectid);
1628                         if (ret == 0)
1629                                 continue;
1630
1631                         /*
1632                          * If we don't have a root item then we likely just have
1633                          * a dir item in a snapshot for this root but no actual
1634                          * ref key or anything so it's meaningless.
1635                          */
1636                         if (!rec->found_root_item)
1637                                 continue;
1638                         errors++;
1639                         fprintf(stderr, "fs tree %llu not referenced\n",
1640                                 (unsigned long long)rec->objectid);
1641                 }
1642
1643                 error = 0;
1644                 if (rec->found_ref > 0 && !rec->found_root_item)
1645                         error = 1;
1646                 list_for_each_entry(backref, &rec->backrefs, list) {
1647                         if (!backref->found_dir_item)
1648                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1649                         if (!backref->found_dir_index)
1650                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1651                         if (!backref->found_back_ref)
1652                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1653                         if (!backref->found_forward_ref)
1654                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1655                         if (backref->reachable && backref->errors)
1656                                 error = 1;
1657                 }
1658                 if (!error)
1659                         continue;
1660
1661                 errors++;
1662                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1663                         (unsigned long long)rec->objectid, rec->found_ref,
1664                          rec->found_root_item ? "" : "not found");
1665
1666                 list_for_each_entry(backref, &rec->backrefs, list) {
1667                         if (!backref->reachable)
1668                                 continue;
1669                         if (!backref->errors && rec->found_root_item)
1670                                 continue;
1671                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1672                                 " index %llu namelen %u name %s error %x\n",
1673                                 (unsigned long long)backref->ref_root,
1674                                 (unsigned long long)backref->dir,
1675                                 (unsigned long long)backref->index,
1676                                 backref->namelen, backref->name,
1677                                 backref->errors);
1678                 }
1679         }
1680         return errors > 0 ? 1 : 0;
1681 }
1682
1683 static int process_root_ref(struct extent_buffer *eb, int slot,
1684                             struct btrfs_key *key,
1685                             struct cache_tree *root_cache)
1686 {
1687         u64 dirid;
1688         u64 index;
1689         u32 len;
1690         u32 name_len;
1691         struct btrfs_root_ref *ref;
1692         char namebuf[BTRFS_NAME_LEN];
1693         int error;
1694
1695         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1696
1697         dirid = btrfs_root_ref_dirid(eb, ref);
1698         index = btrfs_root_ref_sequence(eb, ref);
1699         name_len = btrfs_root_ref_name_len(eb, ref);
1700
1701         if (name_len <= BTRFS_NAME_LEN) {
1702                 len = name_len;
1703                 error = 0;
1704         } else {
1705                 len = BTRFS_NAME_LEN;
1706                 error = REF_ERR_NAME_TOO_LONG;
1707         }
1708         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1709
1710         if (key->type == BTRFS_ROOT_REF_KEY) {
1711                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1712                                  index, namebuf, len, key->type, error);
1713         } else {
1714                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1715                                  index, namebuf, len, key->type, error);
1716         }
1717         return 0;
1718 }
1719
1720 static int check_fs_root(struct btrfs_root *root,
1721                          struct cache_tree *root_cache,
1722                          struct walk_control *wc)
1723 {
1724         int ret = 0;
1725         int wret;
1726         int level;
1727         struct btrfs_path path;
1728         struct shared_node root_node;
1729         struct root_record *rec;
1730         struct btrfs_root_item *root_item = &root->root_item;
1731
1732         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1733                 rec = get_root_rec(root_cache, root->root_key.objectid);
1734                 if (btrfs_root_refs(root_item) > 0)
1735                         rec->found_root_item = 1;
1736         }
1737
1738         btrfs_init_path(&path);
1739         memset(&root_node, 0, sizeof(root_node));
1740         cache_tree_init(&root_node.root_cache);
1741         cache_tree_init(&root_node.inode_cache);
1742
1743         level = btrfs_header_level(root->node);
1744         memset(wc->nodes, 0, sizeof(wc->nodes));
1745         wc->nodes[level] = &root_node;
1746         wc->active_node = level;
1747         wc->root_level = level;
1748
1749         if (btrfs_root_refs(root_item) > 0 ||
1750             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1751                 path.nodes[level] = root->node;
1752                 extent_buffer_get(root->node);
1753                 path.slots[level] = 0;
1754         } else {
1755                 struct btrfs_key key;
1756                 struct btrfs_disk_key found_key;
1757
1758                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1759                 level = root_item->drop_level;
1760                 path.lowest_level = level;
1761                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1762                 BUG_ON(wret < 0);
1763                 btrfs_node_key(path.nodes[level], &found_key,
1764                                 path.slots[level]);
1765                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1766                                         sizeof(found_key)));
1767         }
1768
1769         while (1) {
1770                 wret = walk_down_tree(root, &path, wc, &level);
1771                 if (wret < 0)
1772                         ret = wret;
1773                 if (wret != 0)
1774                         break;
1775
1776                 wret = walk_up_tree(root, &path, wc, &level);
1777                 if (wret < 0)
1778                         ret = wret;
1779                 if (wret != 0)
1780                         break;
1781         }
1782         btrfs_release_path(root, &path);
1783
1784         merge_root_recs(root, &root_node.root_cache, root_cache);
1785
1786         if (root_node.current) {
1787                 root_node.current->checked = 1;
1788                 maybe_free_inode_rec(&root_node.inode_cache,
1789                                 root_node.current);
1790         }
1791
1792         ret = check_inode_recs(root, &root_node.inode_cache);
1793         return ret;
1794 }
1795
1796 static int fs_root_objectid(u64 objectid)
1797 {
1798         if (objectid == BTRFS_FS_TREE_OBJECTID ||
1799             objectid == BTRFS_TREE_RELOC_OBJECTID ||
1800             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
1801             (objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1802              objectid <= BTRFS_LAST_FREE_OBJECTID))
1803                 return 1;
1804         return 0;
1805 }
1806
1807 static int check_fs_roots(struct btrfs_root *root,
1808                           struct cache_tree *root_cache)
1809 {
1810         struct btrfs_path path;
1811         struct btrfs_key key;
1812         struct walk_control wc;
1813         struct extent_buffer *leaf;
1814         struct btrfs_root *tmp_root;
1815         struct btrfs_root *tree_root = root->fs_info->tree_root;
1816         int ret;
1817         int err = 0;
1818
1819         memset(&wc, 0, sizeof(wc));
1820         cache_tree_init(&wc.shared);
1821         btrfs_init_path(&path);
1822
1823         key.offset = 0;
1824         key.objectid = 0;
1825         key.type = BTRFS_ROOT_ITEM_KEY;
1826         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
1827         BUG_ON(ret < 0);
1828         while (1) {
1829                 leaf = path.nodes[0];
1830                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1831                         ret = btrfs_next_leaf(tree_root, &path);
1832                         if (ret != 0)
1833                                 break;
1834                         leaf = path.nodes[0];
1835                 }
1836                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1837                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
1838                     fs_root_objectid(key.objectid)) {
1839                         tmp_root = btrfs_read_fs_root_no_cache(root->fs_info,
1840                                                                &key);
1841                         if (IS_ERR(tmp_root)) {
1842                                 err = 1;
1843                                 goto next;
1844                         }
1845                         ret = check_fs_root(tmp_root, root_cache, &wc);
1846                         if (ret)
1847                                 err = 1;
1848                         btrfs_free_fs_root(tmp_root);
1849                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
1850                            key.type == BTRFS_ROOT_BACKREF_KEY) {
1851                         process_root_ref(leaf, path.slots[0], &key,
1852                                          root_cache);
1853                 }
1854 next:
1855                 path.slots[0]++;
1856         }
1857         btrfs_release_path(tree_root, &path);
1858
1859         if (!cache_tree_empty(&wc.shared))
1860                 fprintf(stderr, "warning line %d\n", __LINE__);
1861
1862         return err;
1863 }
1864
1865 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
1866 {
1867         struct list_head *cur = rec->backrefs.next;
1868         struct extent_backref *back;
1869         struct tree_backref *tback;
1870         struct data_backref *dback;
1871         u64 found = 0;
1872         int err = 0;
1873
1874         while(cur != &rec->backrefs) {
1875                 back = list_entry(cur, struct extent_backref, list);
1876                 cur = cur->next;
1877                 if (!back->found_extent_tree) {
1878                         err = 1;
1879                         if (!print_errs)
1880                                 goto out;
1881                         if (back->is_data) {
1882                                 dback = (struct data_backref *)back;
1883                                 fprintf(stderr, "Backref %llu %s %llu"
1884                                         " owner %llu offset %llu num_refs %lu"
1885                                         " not found in extent tree\n",
1886                                         (unsigned long long)rec->start,
1887                                         back->full_backref ?
1888                                         "parent" : "root",
1889                                         back->full_backref ?
1890                                         (unsigned long long)dback->parent:
1891                                         (unsigned long long)dback->root,
1892                                         (unsigned long long)dback->owner,
1893                                         (unsigned long long)dback->offset,
1894                                         (unsigned long)dback->num_refs);
1895                         } else {
1896                                 tback = (struct tree_backref *)back;
1897                                 fprintf(stderr, "Backref %llu parent %llu"
1898                                         " root %llu not found in extent tree\n",
1899                                         (unsigned long long)rec->start,
1900                                         (unsigned long long)tback->parent,
1901                                         (unsigned long long)tback->root);
1902                         }
1903                 }
1904                 if (!back->is_data && !back->found_ref) {
1905                         err = 1;
1906                         if (!print_errs)
1907                                 goto out;
1908                         tback = (struct tree_backref *)back;
1909                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
1910                                 (unsigned long long)rec->start,
1911                                 back->full_backref ? "parent" : "root",
1912                                 back->full_backref ?
1913                                 (unsigned long long)tback->parent :
1914                                 (unsigned long long)tback->root, back);
1915                 }
1916                 if (back->is_data) {
1917                         dback = (struct data_backref *)back;
1918                         if (dback->found_ref != dback->num_refs) {
1919                                 err = 1;
1920                                 if (!print_errs)
1921                                         goto out;
1922                                 fprintf(stderr, "Incorrect local backref count"
1923                                         " on %llu %s %llu owner %llu"
1924                                         " offset %llu found %u wanted %u back %p\n",
1925                                         (unsigned long long)rec->start,
1926                                         back->full_backref ?
1927                                         "parent" : "root",
1928                                         back->full_backref ?
1929                                         (unsigned long long)dback->parent:
1930                                         (unsigned long long)dback->root,
1931                                         (unsigned long long)dback->owner,
1932                                         (unsigned long long)dback->offset,
1933                                         dback->found_ref, dback->num_refs, back);
1934                         }
1935                         if (dback->disk_bytenr != rec->start) {
1936                                 err = 1;
1937                                 if (!print_errs)
1938                                         goto out;
1939                                 fprintf(stderr, "Backref disk bytenr does not"
1940                                         " match extent record, bytenr=%llu, "
1941                                         "ref bytenr=%llu\n",
1942                                         (unsigned long long)rec->start,
1943                                         (unsigned long long)dback->disk_bytenr);
1944                         }
1945
1946                         if (dback->bytes != rec->nr) {
1947                                 err = 1;
1948                                 if (!print_errs)
1949                                         goto out;
1950                                 fprintf(stderr, "Backref bytes do not match "
1951                                         "extent backref, bytenr=%llu, ref "
1952                                         "bytes=%llu, backref bytes=%llu\n",
1953                                         (unsigned long long)rec->start,
1954                                         (unsigned long long)rec->nr,
1955                                         (unsigned long long)dback->bytes);
1956                         }
1957                 }
1958                 if (!back->is_data) {
1959                         found += 1;
1960                 } else {
1961                         dback = (struct data_backref *)back;
1962                         found += dback->found_ref;
1963                 }
1964         }
1965         if (found != rec->refs) {
1966                 err = 1;
1967                 if (!print_errs)
1968                         goto out;
1969                 fprintf(stderr, "Incorrect global backref count "
1970                         "on %llu found %llu wanted %llu\n",
1971                         (unsigned long long)rec->start,
1972                         (unsigned long long)found,
1973                         (unsigned long long)rec->refs);
1974         }
1975 out:
1976         return err;
1977 }
1978
1979 static int free_all_extent_backrefs(struct extent_record *rec)
1980 {
1981         struct extent_backref *back;
1982         struct list_head *cur;
1983         while (!list_empty(&rec->backrefs)) {
1984                 cur = rec->backrefs.next;
1985                 back = list_entry(cur, struct extent_backref, list);
1986                 list_del(cur);
1987                 free(back);
1988         }
1989         return 0;
1990 }
1991
1992 static void free_extent_cache(struct btrfs_fs_info *fs_info,
1993                               struct cache_tree *extent_cache)
1994 {
1995         struct cache_extent *cache;
1996         struct extent_record *rec;
1997
1998         while (1) {
1999                 cache = find_first_cache_extent(extent_cache, 0);
2000                 if (!cache)
2001                         break;
2002                 rec = container_of(cache, struct extent_record, cache);
2003                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2004                 remove_cache_extent(extent_cache, cache);
2005                 free_all_extent_backrefs(rec);
2006                 free(rec);
2007         }
2008 }
2009
2010 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2011                                  struct extent_record *rec)
2012 {
2013         if (rec->content_checked && rec->owner_ref_checked &&
2014             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2015             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2016                 remove_cache_extent(extent_cache, &rec->cache);
2017                 free_all_extent_backrefs(rec);
2018                 list_del_init(&rec->list);
2019                 free(rec);
2020         }
2021         return 0;
2022 }
2023
2024 static int check_owner_ref(struct btrfs_root *root,
2025                             struct extent_record *rec,
2026                             struct extent_buffer *buf)
2027 {
2028         struct extent_backref *node;
2029         struct tree_backref *back;
2030         struct btrfs_root *ref_root;
2031         struct btrfs_key key;
2032         struct btrfs_path path;
2033         struct extent_buffer *parent;
2034         int level;
2035         int found = 0;
2036         int ret;
2037
2038         list_for_each_entry(node, &rec->backrefs, list) {
2039                 if (node->is_data)
2040                         continue;
2041                 if (!node->found_ref)
2042                         continue;
2043                 if (node->full_backref)
2044                         continue;
2045                 back = (struct tree_backref *)node;
2046                 if (btrfs_header_owner(buf) == back->root)
2047                         return 0;
2048         }
2049         BUG_ON(rec->is_root);
2050
2051         /* try to find the block by search corresponding fs tree */
2052         key.objectid = btrfs_header_owner(buf);
2053         key.type = BTRFS_ROOT_ITEM_KEY;
2054         key.offset = (u64)-1;
2055
2056         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2057         if (IS_ERR(ref_root))
2058                 return 1;
2059
2060         level = btrfs_header_level(buf);
2061         if (level == 0)
2062                 btrfs_item_key_to_cpu(buf, &key, 0);
2063         else
2064                 btrfs_node_key_to_cpu(buf, &key, 0);
2065
2066         btrfs_init_path(&path);
2067         path.lowest_level = level + 1;
2068         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2069         if (ret < 0)
2070                 return 0;
2071
2072         parent = path.nodes[level + 1];
2073         if (parent && buf->start == btrfs_node_blockptr(parent,
2074                                                         path.slots[level + 1]))
2075                 found = 1;
2076
2077         btrfs_release_path(ref_root, &path);
2078         return found ? 0 : 1;
2079 }
2080
2081 static int is_extent_tree_record(struct extent_record *rec)
2082 {
2083         struct list_head *cur = rec->backrefs.next;
2084         struct extent_backref *node;
2085         struct tree_backref *back;
2086         int is_extent = 0;
2087
2088         while(cur != &rec->backrefs) {
2089                 node = list_entry(cur, struct extent_backref, list);
2090                 cur = cur->next;
2091                 if (node->is_data)
2092                         return 0;
2093                 back = (struct tree_backref *)node;
2094                 if (node->full_backref)
2095                         return 0;
2096                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2097                         is_extent = 1;
2098         }
2099         return is_extent;
2100 }
2101
2102
2103 static int record_bad_block_io(struct btrfs_fs_info *info,
2104                                struct cache_tree *extent_cache,
2105                                u64 start, u64 len)
2106 {
2107         struct extent_record *rec;
2108         struct cache_extent *cache;
2109         struct btrfs_key key;
2110
2111         cache = find_cache_extent(extent_cache, start, len);
2112         if (!cache)
2113                 return 0;
2114
2115         rec = container_of(cache, struct extent_record, cache);
2116         if (!is_extent_tree_record(rec))
2117                 return 0;
2118
2119         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2120         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2121 }
2122
2123 static int check_block(struct btrfs_root *root,
2124                        struct cache_tree *extent_cache,
2125                        struct extent_buffer *buf, u64 flags)
2126 {
2127         struct extent_record *rec;
2128         struct cache_extent *cache;
2129         struct btrfs_key key;
2130         int ret = 1;
2131         int level;
2132
2133         cache = find_cache_extent(extent_cache, buf->start, buf->len);
2134         if (!cache)
2135                 return 1;
2136         rec = container_of(cache, struct extent_record, cache);
2137         rec->generation = btrfs_header_generation(buf);
2138
2139         level = btrfs_header_level(buf);
2140         if (btrfs_header_nritems(buf) > 0) {
2141
2142                 if (level == 0)
2143                         btrfs_item_key_to_cpu(buf, &key, 0);
2144                 else
2145                         btrfs_node_key_to_cpu(buf, &key, 0);
2146
2147                 rec->info_objectid = key.objectid;
2148         }
2149         rec->info_level = level;
2150
2151         if (btrfs_is_leaf(buf))
2152                 ret = btrfs_check_leaf(root, &rec->parent_key, buf);
2153         else
2154                 ret = btrfs_check_node(root, &rec->parent_key, buf);
2155
2156         if (ret) {
2157                 fprintf(stderr, "bad block %llu\n",
2158                         (unsigned long long)buf->start);
2159         } else {
2160                 rec->content_checked = 1;
2161                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2162                         rec->owner_ref_checked = 1;
2163                 else {
2164                         ret = check_owner_ref(root, rec, buf);
2165                         if (!ret)
2166                                 rec->owner_ref_checked = 1;
2167                 }
2168         }
2169         if (!ret)
2170                 maybe_free_extent_rec(extent_cache, rec);
2171         return ret;
2172 }
2173
2174 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2175                                                 u64 parent, u64 root)
2176 {
2177         struct list_head *cur = rec->backrefs.next;
2178         struct extent_backref *node;
2179         struct tree_backref *back;
2180
2181         while(cur != &rec->backrefs) {
2182                 node = list_entry(cur, struct extent_backref, list);
2183                 cur = cur->next;
2184                 if (node->is_data)
2185                         continue;
2186                 back = (struct tree_backref *)node;
2187                 if (parent > 0) {
2188                         if (!node->full_backref)
2189                                 continue;
2190                         if (parent == back->parent)
2191                                 return back;
2192                 } else {
2193                         if (node->full_backref)
2194                                 continue;
2195                         if (back->root == root)
2196                                 return back;
2197                 }
2198         }
2199         return NULL;
2200 }
2201
2202 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2203                                                 u64 parent, u64 root)
2204 {
2205         struct tree_backref *ref = malloc(sizeof(*ref));
2206         memset(&ref->node, 0, sizeof(ref->node));
2207         if (parent > 0) {
2208                 ref->parent = parent;
2209                 ref->node.full_backref = 1;
2210         } else {
2211                 ref->root = root;
2212                 ref->node.full_backref = 0;
2213         }
2214         list_add_tail(&ref->node.list, &rec->backrefs);
2215
2216         return ref;
2217 }
2218
2219 static struct data_backref *find_data_backref(struct extent_record *rec,
2220                                                 u64 parent, u64 root,
2221                                                 u64 owner, u64 offset,
2222                                                 int found_ref,
2223                                                 u64 disk_bytenr, u64 bytes)
2224 {
2225         struct list_head *cur = rec->backrefs.next;
2226         struct extent_backref *node;
2227         struct data_backref *back;
2228
2229         while(cur != &rec->backrefs) {
2230                 node = list_entry(cur, struct extent_backref, list);
2231                 cur = cur->next;
2232                 if (!node->is_data)
2233                         continue;
2234                 back = (struct data_backref *)node;
2235                 if (parent > 0) {
2236                         if (!node->full_backref)
2237                                 continue;
2238                         if (parent == back->parent)
2239                                 return back;
2240                 } else {
2241                         if (node->full_backref)
2242                                 continue;
2243                         if (back->root == root && back->owner == owner &&
2244                             back->offset == offset) {
2245                                 if (found_ref && node->found_ref &&
2246                                     (back->bytes != bytes ||
2247                                     back->disk_bytenr != disk_bytenr))
2248                                         continue;
2249                                 return back;
2250                         }
2251                 }
2252         }
2253         return NULL;
2254 }
2255
2256 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2257                                                 u64 parent, u64 root,
2258                                                 u64 owner, u64 offset,
2259                                                 u64 max_size)
2260 {
2261         struct data_backref *ref = malloc(sizeof(*ref));
2262         memset(&ref->node, 0, sizeof(ref->node));
2263         ref->node.is_data = 1;
2264
2265         if (parent > 0) {
2266                 ref->parent = parent;
2267                 ref->owner = 0;
2268                 ref->offset = 0;
2269                 ref->node.full_backref = 1;
2270         } else {
2271                 ref->root = root;
2272                 ref->owner = owner;
2273                 ref->offset = offset;
2274                 ref->node.full_backref = 0;
2275         }
2276         ref->bytes = max_size;
2277         ref->found_ref = 0;
2278         ref->num_refs = 0;
2279         list_add_tail(&ref->node.list, &rec->backrefs);
2280         if (max_size > rec->max_size)
2281                 rec->max_size = max_size;
2282         return ref;
2283 }
2284
2285 static int add_extent_rec(struct cache_tree *extent_cache,
2286                           struct btrfs_key *parent_key,
2287                           u64 start, u64 nr, u64 extent_item_refs,
2288                           int is_root, int inc_ref, int set_checked,
2289                           int metadata, int extent_rec, u64 max_size)
2290 {
2291         struct extent_record *rec;
2292         struct cache_extent *cache;
2293         int ret = 0;
2294         int dup = 0;
2295
2296         cache = find_cache_extent(extent_cache, start, nr);
2297         if (cache) {
2298                 rec = container_of(cache, struct extent_record, cache);
2299                 if (inc_ref)
2300                         rec->refs++;
2301                 if (rec->nr == 1)
2302                         rec->nr = max(nr, max_size);
2303
2304                 /*
2305                  * We need to make sure to reset nr to whatever the extent
2306                  * record says was the real size, this way we can compare it to
2307                  * the backrefs.
2308                  */
2309                 if (extent_rec) {
2310                         if (start != rec->start || rec->found_rec) {
2311                                 struct extent_record *tmp;
2312
2313                                 dup = 1;
2314                                 if (list_empty(&rec->list))
2315                                         list_add_tail(&rec->list,
2316                                                       &duplicate_extents);
2317
2318                                 /*
2319                                  * We have to do this song and dance in case we
2320                                  * find an extent record that falls inside of
2321                                  * our current extent record but does not have
2322                                  * the same objectid.
2323                                  */
2324                                 tmp = malloc(sizeof(*tmp));
2325                                 if (!tmp)
2326                                         return -ENOMEM;
2327                                 tmp->start = start;
2328                                 tmp->max_size = max_size;
2329                                 tmp->nr = nr;
2330                                 tmp->found_rec = 1;
2331                                 tmp->metadata = metadata;
2332                                 tmp->extent_item_refs = extent_item_refs;
2333                                 INIT_LIST_HEAD(&tmp->list);
2334                                 list_add_tail(&tmp->list, &rec->dups);
2335                                 rec->num_duplicates++;
2336                         } else {
2337                                 rec->nr = nr;
2338                                 rec->found_rec = 1;
2339                         }
2340                 }
2341
2342                 if (extent_item_refs && !dup) {
2343                         if (rec->extent_item_refs) {
2344                                 fprintf(stderr, "block %llu rec "
2345                                         "extent_item_refs %llu, passed %llu\n",
2346                                         (unsigned long long)start,
2347                                         (unsigned long long)
2348                                                         rec->extent_item_refs,
2349                                         (unsigned long long)extent_item_refs);
2350                         }
2351                         rec->extent_item_refs = extent_item_refs;
2352                 }
2353                 if (is_root)
2354                         rec->is_root = 1;
2355                 if (set_checked) {
2356                         rec->content_checked = 1;
2357                         rec->owner_ref_checked = 1;
2358                 }
2359
2360                 if (parent_key)
2361                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2362
2363                 if (rec->max_size < max_size)
2364                         rec->max_size = max_size;
2365
2366                 maybe_free_extent_rec(extent_cache, rec);
2367                 return ret;
2368         }
2369         rec = malloc(sizeof(*rec));
2370         rec->start = start;
2371         rec->max_size = max_size;
2372         rec->nr = max(nr, max_size);
2373         rec->found_rec = extent_rec;
2374         rec->content_checked = 0;
2375         rec->owner_ref_checked = 0;
2376         rec->num_duplicates = 0;
2377         rec->metadata = metadata;
2378         INIT_LIST_HEAD(&rec->backrefs);
2379         INIT_LIST_HEAD(&rec->dups);
2380         INIT_LIST_HEAD(&rec->list);
2381
2382         if (is_root)
2383                 rec->is_root = 1;
2384         else
2385                 rec->is_root = 0;
2386
2387         if (inc_ref)
2388                 rec->refs = 1;
2389         else
2390                 rec->refs = 0;
2391
2392         if (extent_item_refs)
2393                 rec->extent_item_refs = extent_item_refs;
2394         else
2395                 rec->extent_item_refs = 0;
2396
2397         if (parent_key)
2398                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2399         else
2400                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2401
2402         rec->cache.start = start;
2403         rec->cache.size = nr;
2404         ret = insert_cache_extent(extent_cache, &rec->cache);
2405         BUG_ON(ret);
2406         bytes_used += nr;
2407         if (set_checked) {
2408                 rec->content_checked = 1;
2409                 rec->owner_ref_checked = 1;
2410         }
2411         return ret;
2412 }
2413
2414 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2415                             u64 parent, u64 root, int found_ref)
2416 {
2417         struct extent_record *rec;
2418         struct tree_backref *back;
2419         struct cache_extent *cache;
2420
2421         cache = find_cache_extent(extent_cache, bytenr, 1);
2422         if (!cache) {
2423                 add_extent_rec(extent_cache, NULL, bytenr,
2424                                1, 0, 0, 0, 0, 1, 0, 0);
2425                 cache = find_cache_extent(extent_cache, bytenr, 1);
2426                 if (!cache)
2427                         abort();
2428         }
2429
2430         rec = container_of(cache, struct extent_record, cache);
2431         if (rec->start != bytenr) {
2432                 abort();
2433         }
2434
2435         back = find_tree_backref(rec, parent, root);
2436         if (!back)
2437                 back = alloc_tree_backref(rec, parent, root);
2438
2439         if (found_ref) {
2440                 if (back->node.found_ref) {
2441                         fprintf(stderr, "Extent back ref already exists "
2442                                 "for %llu parent %llu root %llu \n",
2443                                 (unsigned long long)bytenr,
2444                                 (unsigned long long)parent,
2445                                 (unsigned long long)root);
2446                 }
2447                 back->node.found_ref = 1;
2448         } else {
2449                 if (back->node.found_extent_tree) {
2450                         fprintf(stderr, "Extent back ref already exists "
2451                                 "for %llu parent %llu root %llu \n",
2452                                 (unsigned long long)bytenr,
2453                                 (unsigned long long)parent,
2454                                 (unsigned long long)root);
2455                 }
2456                 back->node.found_extent_tree = 1;
2457         }
2458         return 0;
2459 }
2460
2461 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2462                             u64 parent, u64 root, u64 owner, u64 offset,
2463                             u32 num_refs, int found_ref, u64 max_size)
2464 {
2465         struct extent_record *rec;
2466         struct data_backref *back;
2467         struct cache_extent *cache;
2468
2469         cache = find_cache_extent(extent_cache, bytenr, 1);
2470         if (!cache) {
2471                 add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0,
2472                                0, 0, max_size);
2473                 cache = find_cache_extent(extent_cache, bytenr, 1);
2474                 if (!cache)
2475                         abort();
2476         }
2477
2478         rec = container_of(cache, struct extent_record, cache);
2479         if (rec->max_size < max_size)
2480                 rec->max_size = max_size;
2481
2482         /*
2483          * If found_ref is set then max_size is the real size and must match the
2484          * existing refs.  So if we have already found a ref then we need to
2485          * make sure that this ref matches the existing one, otherwise we need
2486          * to add a new backref so we can notice that the backrefs don't match
2487          * and we need to figure out who is telling the truth.  This is to
2488          * account for that awful fsync bug I introduced where we'd end up with
2489          * a btrfs_file_extent_item that would have its length include multiple
2490          * prealloc extents or point inside of a prealloc extent.
2491          */
2492         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2493                                  bytenr, max_size);
2494         if (!back)
2495                 back = alloc_data_backref(rec, parent, root, owner, offset,
2496                                           max_size);
2497
2498         if (found_ref) {
2499                 BUG_ON(num_refs != 1);
2500                 if (back->node.found_ref)
2501                         BUG_ON(back->bytes != max_size);
2502                 back->node.found_ref = 1;
2503                 back->found_ref += 1;
2504                 back->bytes = max_size;
2505                 back->disk_bytenr = bytenr;
2506                 rec->refs += 1;
2507                 rec->content_checked = 1;
2508                 rec->owner_ref_checked = 1;
2509         } else {
2510                 if (back->node.found_extent_tree) {
2511                         fprintf(stderr, "Extent back ref already exists "
2512                                 "for %llu parent %llu root %llu"
2513                                 "owner %llu offset %llu num_refs %lu\n",
2514                                 (unsigned long long)bytenr,
2515                                 (unsigned long long)parent,
2516                                 (unsigned long long)root,
2517                                 (unsigned long long)owner,
2518                                 (unsigned long long)offset,
2519                                 (unsigned long)num_refs);
2520                 }
2521                 back->num_refs = num_refs;
2522                 back->node.found_extent_tree = 1;
2523         }
2524         return 0;
2525 }
2526
2527 static int add_pending(struct cache_tree *pending,
2528                        struct cache_tree *seen, u64 bytenr, u32 size)
2529 {
2530         int ret;
2531         ret = add_cache_extent(seen, bytenr, size);
2532         if (ret)
2533                 return ret;
2534         add_cache_extent(pending, bytenr, size);
2535         return 0;
2536 }
2537
2538 static int pick_next_pending(struct cache_tree *pending,
2539                         struct cache_tree *reada,
2540                         struct cache_tree *nodes,
2541                         u64 last, struct block_info *bits, int bits_nr,
2542                         int *reada_bits)
2543 {
2544         unsigned long node_start = last;
2545         struct cache_extent *cache;
2546         int ret;
2547
2548         cache = find_first_cache_extent(reada, 0);
2549         if (cache) {
2550                 bits[0].start = cache->start;
2551                 bits[1].size = cache->size;
2552                 *reada_bits = 1;
2553                 return 1;
2554         }
2555         *reada_bits = 0;
2556         if (node_start > 32768)
2557                 node_start -= 32768;
2558
2559         cache = find_first_cache_extent(nodes, node_start);
2560         if (!cache)
2561                 cache = find_first_cache_extent(nodes, 0);
2562
2563         if (!cache) {
2564                  cache = find_first_cache_extent(pending, 0);
2565                  if (!cache)
2566                          return 0;
2567                  ret = 0;
2568                  do {
2569                          bits[ret].start = cache->start;
2570                          bits[ret].size = cache->size;
2571                          cache = next_cache_extent(cache);
2572                          ret++;
2573                  } while (cache && ret < bits_nr);
2574                  return ret;
2575         }
2576
2577         ret = 0;
2578         do {
2579                 bits[ret].start = cache->start;
2580                 bits[ret].size = cache->size;
2581                 cache = next_cache_extent(cache);
2582                 ret++;
2583         } while (cache && ret < bits_nr);
2584
2585         if (bits_nr - ret > 8) {
2586                 u64 lookup = bits[0].start + bits[0].size;
2587                 struct cache_extent *next;
2588                 next = find_first_cache_extent(pending, lookup);
2589                 while(next) {
2590                         if (next->start - lookup > 32768)
2591                                 break;
2592                         bits[ret].start = next->start;
2593                         bits[ret].size = next->size;
2594                         lookup = next->start + next->size;
2595                         ret++;
2596                         if (ret == bits_nr)
2597                                 break;
2598                         next = next_cache_extent(next);
2599                         if (!next)
2600                                 break;
2601                 }
2602         }
2603         return ret;
2604 }
2605
2606 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2607 static int process_extent_ref_v0(struct cache_tree *extent_cache,
2608                                  struct extent_buffer *leaf, int slot)
2609 {
2610         struct btrfs_extent_ref_v0 *ref0;
2611         struct btrfs_key key;
2612
2613         btrfs_item_key_to_cpu(leaf, &key, slot);
2614         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
2615         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
2616                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
2617         } else {
2618                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
2619                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
2620         }
2621         return 0;
2622 }
2623 #endif
2624
2625 static int process_extent_item(struct btrfs_root *root,
2626                                struct cache_tree *extent_cache,
2627                                struct extent_buffer *eb, int slot)
2628 {
2629         struct btrfs_extent_item *ei;
2630         struct btrfs_extent_inline_ref *iref;
2631         struct btrfs_extent_data_ref *dref;
2632         struct btrfs_shared_data_ref *sref;
2633         struct btrfs_key key;
2634         unsigned long end;
2635         unsigned long ptr;
2636         int type;
2637         u32 item_size = btrfs_item_size_nr(eb, slot);
2638         u64 refs = 0;
2639         u64 offset;
2640         u64 num_bytes;
2641         int metadata = 0;
2642
2643         btrfs_item_key_to_cpu(eb, &key, slot);
2644
2645         if (key.type == BTRFS_METADATA_ITEM_KEY) {
2646                 metadata = 1;
2647                 num_bytes = root->leafsize;
2648         } else {
2649                 num_bytes = key.offset;
2650         }
2651
2652         if (item_size < sizeof(*ei)) {
2653 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2654                 struct btrfs_extent_item_v0 *ei0;
2655                 BUG_ON(item_size != sizeof(*ei0));
2656                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
2657                 refs = btrfs_extent_refs_v0(eb, ei0);
2658 #else
2659                 BUG();
2660 #endif
2661                 return add_extent_rec(extent_cache, NULL, key.objectid,
2662                                       num_bytes, refs, 0, 0, 0, metadata, 1,
2663                                       num_bytes);
2664         }
2665
2666         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2667         refs = btrfs_extent_refs(eb, ei);
2668
2669         add_extent_rec(extent_cache, NULL, key.objectid, num_bytes,
2670                        refs, 0, 0, 0, metadata, 1, num_bytes);
2671
2672         ptr = (unsigned long)(ei + 1);
2673         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
2674             key.type == BTRFS_EXTENT_ITEM_KEY)
2675                 ptr += sizeof(struct btrfs_tree_block_info);
2676
2677         end = (unsigned long)ei + item_size;
2678         while (ptr < end) {
2679                 iref = (struct btrfs_extent_inline_ref *)ptr;
2680                 type = btrfs_extent_inline_ref_type(eb, iref);
2681                 offset = btrfs_extent_inline_ref_offset(eb, iref);
2682                 switch (type) {
2683                 case BTRFS_TREE_BLOCK_REF_KEY:
2684                         add_tree_backref(extent_cache, key.objectid,
2685                                          0, offset, 0);
2686                         break;
2687                 case BTRFS_SHARED_BLOCK_REF_KEY:
2688                         add_tree_backref(extent_cache, key.objectid,
2689                                          offset, 0, 0);
2690                         break;
2691                 case BTRFS_EXTENT_DATA_REF_KEY:
2692                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
2693                         add_data_backref(extent_cache, key.objectid, 0,
2694                                         btrfs_extent_data_ref_root(eb, dref),
2695                                         btrfs_extent_data_ref_objectid(eb,
2696                                                                        dref),
2697                                         btrfs_extent_data_ref_offset(eb, dref),
2698                                         btrfs_extent_data_ref_count(eb, dref),
2699                                         0, num_bytes);
2700                         break;
2701                 case BTRFS_SHARED_DATA_REF_KEY:
2702                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
2703                         add_data_backref(extent_cache, key.objectid, offset,
2704                                         0, 0, 0,
2705                                         btrfs_shared_data_ref_count(eb, sref),
2706                                         0, num_bytes);
2707                         break;
2708                 default:
2709                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
2710                                 key.objectid, key.type, num_bytes);
2711                         goto out;
2712                 }
2713                 ptr += btrfs_extent_inline_ref_size(type);
2714         }
2715         WARN_ON(ptr > end);
2716 out:
2717         return 0;
2718 }
2719
2720 static int check_cache_range(struct btrfs_root *root,
2721                              struct btrfs_block_group_cache *cache,
2722                              u64 offset, u64 bytes)
2723 {
2724         struct btrfs_free_space *entry;
2725         u64 *logical;
2726         u64 bytenr;
2727         int stripe_len;
2728         int i, nr, ret;
2729
2730         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2731                 bytenr = btrfs_sb_offset(i);
2732                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
2733                                        cache->key.objectid, bytenr, 0,
2734                                        &logical, &nr, &stripe_len);
2735                 if (ret)
2736                         return ret;
2737
2738                 while (nr--) {
2739                         if (logical[nr] + stripe_len <= offset)
2740                                 continue;
2741                         if (offset + bytes <= logical[nr])
2742                                 continue;
2743                         if (logical[nr] == offset) {
2744                                 if (stripe_len >= bytes) {
2745                                         kfree(logical);
2746                                         return 0;
2747                                 }
2748                                 bytes -= stripe_len;
2749                                 offset += stripe_len;
2750                         } else if (logical[nr] < offset) {
2751                                 if (logical[nr] + stripe_len >=
2752                                     offset + bytes) {
2753                                         kfree(logical);
2754                                         return 0;
2755                                 }
2756                                 bytes = (offset + bytes) -
2757                                         (logical[nr] + stripe_len);
2758                                 offset = logical[nr] + stripe_len;
2759                         } else {
2760                                 /*
2761                                  * Could be tricky, the super may land in the
2762                                  * middle of the area we're checking.  First
2763                                  * check the easiest case, it's at the end.
2764                                  */
2765                                 if (logical[nr] + stripe_len >=
2766                                     bytes + offset) {
2767                                         bytes = logical[nr] - offset;
2768                                         continue;
2769                                 }
2770
2771                                 /* Check the left side */
2772                                 ret = check_cache_range(root, cache,
2773                                                         offset,
2774                                                         logical[nr] - offset);
2775                                 if (ret) {
2776                                         kfree(logical);
2777                                         return ret;
2778                                 }
2779
2780                                 /* Now we continue with the right side */
2781                                 bytes = (offset + bytes) -
2782                                         (logical[nr] + stripe_len);
2783                                 offset = logical[nr] + stripe_len;
2784                         }
2785                 }
2786
2787                 kfree(logical);
2788         }
2789
2790         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
2791         if (!entry) {
2792                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
2793                         offset, offset+bytes);
2794                 return -EINVAL;
2795         }
2796
2797         if (entry->offset != offset) {
2798                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
2799                         entry->offset);
2800                 return -EINVAL;
2801         }
2802
2803         if (entry->bytes != bytes) {
2804                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
2805                         bytes, entry->bytes, offset);
2806                 return -EINVAL;
2807         }
2808
2809         unlink_free_space(cache->free_space_ctl, entry);
2810         free(entry);
2811         return 0;
2812 }
2813
2814 static int verify_space_cache(struct btrfs_root *root,
2815                               struct btrfs_block_group_cache *cache)
2816 {
2817         struct btrfs_path *path;
2818         struct extent_buffer *leaf;
2819         struct btrfs_key key;
2820         u64 last;
2821         int ret = 0;
2822
2823         path = btrfs_alloc_path();
2824         if (!path)
2825                 return -ENOMEM;
2826
2827         root = root->fs_info->extent_root;
2828
2829         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
2830
2831         key.objectid = last;
2832         key.offset = 0;
2833         key.type = BTRFS_EXTENT_ITEM_KEY;
2834
2835         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2836         if (ret < 0)
2837                 return ret;
2838         ret = 0;
2839         while (1) {
2840                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2841                         ret = btrfs_next_leaf(root, path);
2842                         if (ret < 0)
2843                                 return ret;
2844                         if (ret > 0) {
2845                                 ret = 0;
2846                                 break;
2847                         }
2848                 }
2849                 leaf = path->nodes[0];
2850                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2851                 if (key.objectid >= cache->key.offset + cache->key.objectid)
2852                         break;
2853                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2854                     key.type != BTRFS_METADATA_ITEM_KEY) {
2855                         path->slots[0]++;
2856                         continue;
2857                 }
2858
2859                 if (last == key.objectid) {
2860                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2861                                 last = key.objectid + key.offset;
2862                         else
2863                                 last = key.objectid + root->leafsize;
2864                         path->slots[0]++;
2865                         continue;
2866                 }
2867
2868                 ret = check_cache_range(root, cache, last,
2869                                         key.objectid - last);
2870                 if (ret)
2871                         break;
2872                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2873                         last = key.objectid + key.offset;
2874                 else
2875                         last = key.objectid + root->leafsize;
2876                 path->slots[0]++;
2877         }
2878
2879         if (last < cache->key.objectid + cache->key.offset)
2880                 ret = check_cache_range(root, cache, last,
2881                                         cache->key.objectid +
2882                                         cache->key.offset - last);
2883         btrfs_free_path(path);
2884
2885         if (!ret &&
2886             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
2887                 fprintf(stderr, "There are still entries left in the space "
2888                         "cache\n");
2889                 ret = -EINVAL;
2890         }
2891
2892         return ret;
2893 }
2894
2895 static int check_space_cache(struct btrfs_root *root)
2896 {
2897         struct btrfs_block_group_cache *cache;
2898         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
2899         int ret;
2900         int error = 0;
2901
2902         if (btrfs_super_generation(root->fs_info->super_copy) !=
2903             btrfs_super_cache_generation(root->fs_info->super_copy)) {
2904                 printf("cache and super generation don't match, space cache "
2905                        "will be invalidated\n");
2906                 return 0;
2907         }
2908
2909         while (1) {
2910                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
2911                 if (!cache)
2912                         break;
2913
2914                 start = cache->key.objectid + cache->key.offset;
2915                 if (!cache->free_space_ctl) {
2916                         if (btrfs_init_free_space_ctl(cache,
2917                                                       root->sectorsize)) {
2918                                 ret = -ENOMEM;
2919                                 break;
2920                         }
2921                 } else {
2922                         btrfs_remove_free_space_cache(cache);
2923                 }
2924
2925                 ret = load_free_space_cache(root->fs_info, cache);
2926                 if (!ret)
2927                         continue;
2928
2929                 ret = verify_space_cache(root, cache);
2930                 if (ret) {
2931                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
2932                                 cache->key.objectid);
2933                         error++;
2934                 }
2935         }
2936
2937         return error ? -EINVAL : 0;
2938 }
2939
2940 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
2941                                u64 num_bytes)
2942 {
2943         struct btrfs_path *path;
2944         struct extent_buffer *leaf;
2945         struct btrfs_key key;
2946         int ret;
2947
2948         path = btrfs_alloc_path();
2949         if (!path) {
2950                 fprintf(stderr, "Error allocing path\n");
2951                 return -ENOMEM;
2952         }
2953
2954         key.objectid = bytenr;
2955         key.type = BTRFS_EXTENT_ITEM_KEY;
2956         key.offset = 0;
2957
2958
2959 again:
2960         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
2961                                 0, 0);
2962         if (ret < 0) {
2963                 fprintf(stderr, "Error looking up extent record %d\n", ret);
2964                 btrfs_free_path(path);
2965                 return ret;
2966         } else if (ret) {
2967                 if (path->slots[0])
2968                         path->slots[0]--;
2969                 else
2970                         btrfs_prev_leaf(root, path);
2971         }
2972
2973         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2974
2975         /*
2976          * Block group items come before extent items if they have the same
2977          * bytenr, so walk back one more just in case.  Dear future traveler,
2978          * first congrats on mastering time travel.  Now if it's not too much
2979          * trouble could you go back to 2006 and tell Chris to make the
2980          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
2981          */
2982         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
2983                 if (path->slots[0])
2984                         path->slots[0]--;
2985                 else
2986                         btrfs_prev_leaf(root, path);
2987         }
2988
2989         while (num_bytes) {
2990                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2991                         ret = btrfs_next_leaf(root, path);
2992                         if (ret < 0) {
2993                                 fprintf(stderr, "Error going to next leaf "
2994                                         "%d\n", ret);
2995                                 btrfs_free_path(path);
2996                                 return ret;
2997                         } else if (ret) {
2998                                 break;
2999                         }
3000                 }
3001                 leaf = path->nodes[0];
3002                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3003                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3004                         path->slots[0]++;
3005                         continue;
3006                 }
3007                 if (key.objectid + key.offset < bytenr) {
3008                         path->slots[0]++;
3009                         continue;
3010                 }
3011                 if (key.objectid > bytenr + num_bytes)
3012                         break;
3013
3014                 if (key.objectid == bytenr) {
3015                         if (key.offset >= num_bytes) {
3016                                 num_bytes = 0;
3017                                 break;
3018                         }
3019                         num_bytes -= key.offset;
3020                         bytenr += key.offset;
3021                 } else if (key.objectid < bytenr) {
3022                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3023                                 num_bytes = 0;
3024                                 break;
3025                         }
3026                         num_bytes = (bytenr + num_bytes) -
3027                                 (key.objectid + key.offset);
3028                         bytenr = key.objectid + key.offset;
3029                 } else {
3030                         if (key.objectid + key.offset < bytenr + num_bytes) {
3031                                 u64 new_start = key.objectid + key.offset;
3032                                 u64 new_bytes = bytenr + num_bytes - new_start;
3033
3034                                 /*
3035                                  * Weird case, the extent is in the middle of
3036                                  * our range, we'll have to search one side
3037                                  * and then the other.  Not sure if this happens
3038                                  * in real life, but no harm in coding it up
3039                                  * anyway just in case.
3040                                  */
3041                                 btrfs_release_path(root, path);
3042                                 ret = check_extent_exists(root, new_start,
3043                                                           new_bytes);
3044                                 if (ret) {
3045                                         fprintf(stderr, "Right section didn't "
3046                                                 "have a record\n");
3047                                         break;
3048                                 }
3049                                 num_bytes = key.objectid - bytenr;
3050                                 goto again;
3051                         }
3052                         num_bytes = key.objectid - bytenr;
3053                 }
3054                 path->slots[0]++;
3055         }
3056         ret = 0;
3057
3058         if (num_bytes) {
3059                 fprintf(stderr, "There are no extents for csum range "
3060                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3061                 ret = 1;
3062         }
3063
3064         btrfs_free_path(path);
3065         return ret;
3066 }
3067
3068 static int check_csums(struct btrfs_root *root)
3069 {
3070         struct btrfs_path *path;
3071         struct extent_buffer *leaf;
3072         struct btrfs_key key;
3073         u64 offset = 0, num_bytes = 0;
3074         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3075         int errors = 0;
3076         int ret;
3077
3078         root = root->fs_info->csum_root;
3079
3080         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3081         key.type = BTRFS_EXTENT_CSUM_KEY;
3082         key.offset = 0;
3083
3084         path = btrfs_alloc_path();
3085         if (!path)
3086                 return -ENOMEM;
3087
3088         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3089         if (ret < 0) {
3090                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3091                 btrfs_free_path(path);
3092                 return ret;
3093         }
3094
3095         if (ret > 0 && path->slots[0])
3096                 path->slots[0]--;
3097         ret = 0;
3098
3099         while (1) {
3100                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3101                         ret = btrfs_next_leaf(root, path);
3102                         if (ret < 0) {
3103                                 fprintf(stderr, "Error going to next leaf "
3104                                         "%d\n", ret);
3105                                 break;
3106                         }
3107                         if (ret)
3108                                 break;
3109                 }
3110                 leaf = path->nodes[0];
3111
3112                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3113                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3114                         path->slots[0]++;
3115                         continue;
3116                 }
3117
3118                 if (!num_bytes) {
3119                         offset = key.offset;
3120                 } else if (key.offset != offset + num_bytes) {
3121                         ret = check_extent_exists(root, offset, num_bytes);
3122                         if (ret) {
3123                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3124                                         "there is no extent record\n",
3125                                         offset, offset+num_bytes);
3126                                 errors++;
3127                         }
3128                         offset = key.offset;
3129                         num_bytes = 0;
3130                 }
3131
3132                 num_bytes += (btrfs_item_size_nr(leaf, path->slots[0]) /
3133                               csum_size) * root->sectorsize;
3134                 path->slots[0]++;
3135         }
3136
3137         btrfs_free_path(path);
3138         return errors;
3139 }
3140
3141 static int run_next_block(struct btrfs_root *root,
3142                           struct block_info *bits,
3143                           int bits_nr,
3144                           u64 *last,
3145                           struct cache_tree *pending,
3146                           struct cache_tree *seen,
3147                           struct cache_tree *reada,
3148                           struct cache_tree *nodes,
3149                           struct cache_tree *extent_cache)
3150 {
3151         struct extent_buffer *buf;
3152         u64 bytenr;
3153         u32 size;
3154         u64 parent;
3155         u64 owner;
3156         u64 flags;
3157         int ret;
3158         int i;
3159         int nritems;
3160         struct btrfs_key key;
3161         struct cache_extent *cache;
3162         int reada_bits;
3163
3164         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3165                                     bits_nr, &reada_bits);
3166         if (nritems == 0)
3167                 return 1;
3168
3169         if (!reada_bits) {
3170                 for(i = 0; i < nritems; i++) {
3171                         ret = add_cache_extent(reada, bits[i].start,
3172                                                bits[i].size);
3173                         if (ret == -EEXIST)
3174                                 continue;
3175
3176                         /* fixme, get the parent transid */
3177                         readahead_tree_block(root, bits[i].start,
3178                                              bits[i].size, 0);
3179                 }
3180         }
3181         *last = bits[0].start;
3182         bytenr = bits[0].start;
3183         size = bits[0].size;
3184
3185         cache = find_cache_extent(pending, bytenr, size);
3186         if (cache) {
3187                 remove_cache_extent(pending, cache);
3188                 free(cache);
3189         }
3190         cache = find_cache_extent(reada, bytenr, size);
3191         if (cache) {
3192                 remove_cache_extent(reada, cache);
3193                 free(cache);
3194         }
3195         cache = find_cache_extent(nodes, bytenr, size);
3196         if (cache) {
3197                 remove_cache_extent(nodes, cache);
3198                 free(cache);
3199         }
3200
3201         /* fixme, get the real parent transid */
3202         buf = read_tree_block(root, bytenr, size, 0);
3203         if (!extent_buffer_uptodate(buf)) {
3204                 record_bad_block_io(root->fs_info,
3205                                     extent_cache, bytenr, size);
3206                 goto out;
3207         }
3208
3209         nritems = btrfs_header_nritems(buf);
3210
3211         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
3212                                        btrfs_header_level(buf), 1, NULL,
3213                                        &flags);
3214         if (ret < 0)
3215                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
3216
3217         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
3218                 parent = bytenr;
3219                 owner = 0;
3220         } else {
3221                 parent = 0;
3222                 owner = btrfs_header_owner(buf);
3223         }
3224
3225         ret = check_block(root, extent_cache, buf, flags);
3226         if (ret)
3227                 goto out;
3228
3229         if (btrfs_is_leaf(buf)) {
3230                 btree_space_waste += btrfs_leaf_free_space(root, buf);
3231                 for (i = 0; i < nritems; i++) {
3232                         struct btrfs_file_extent_item *fi;
3233                         btrfs_item_key_to_cpu(buf, &key, i);
3234                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3235                                 process_extent_item(root, extent_cache, buf,
3236                                                     i);
3237                                 continue;
3238                         }
3239                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3240                                 process_extent_item(root, extent_cache, buf,
3241                                                     i);
3242                                 continue;
3243                         }
3244                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
3245                                 total_csum_bytes +=
3246                                         btrfs_item_size_nr(buf, i);
3247                                 continue;
3248                         }
3249                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3250                                 continue;
3251                         }
3252                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
3253 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3254                                 process_extent_ref_v0(extent_cache, buf, i);
3255 #else
3256                                 BUG();
3257 #endif
3258                                 continue;
3259                         }
3260
3261                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
3262                                 add_tree_backref(extent_cache, key.objectid, 0,
3263                                                  key.offset, 0);
3264                                 continue;
3265                         }
3266                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3267                                 add_tree_backref(extent_cache, key.objectid,
3268                                                  key.offset, 0, 0);
3269                                 continue;
3270                         }
3271                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3272                                 struct btrfs_extent_data_ref *ref;
3273                                 ref = btrfs_item_ptr(buf, i,
3274                                                 struct btrfs_extent_data_ref);
3275                                 add_data_backref(extent_cache,
3276                                         key.objectid, 0,
3277                                         btrfs_extent_data_ref_root(buf, ref),
3278                                         btrfs_extent_data_ref_objectid(buf,
3279                                                                        ref),
3280                                         btrfs_extent_data_ref_offset(buf, ref),
3281                                         btrfs_extent_data_ref_count(buf, ref),
3282                                         0, root->sectorsize);
3283                                 continue;
3284                         }
3285                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3286                                 struct btrfs_shared_data_ref *ref;
3287                                 ref = btrfs_item_ptr(buf, i,
3288                                                 struct btrfs_shared_data_ref);
3289                                 add_data_backref(extent_cache,
3290                                         key.objectid, key.offset, 0, 0, 0,
3291                                         btrfs_shared_data_ref_count(buf, ref),
3292                                         0, root->sectorsize);
3293                                 continue;
3294                         }
3295                         if (key.type != BTRFS_EXTENT_DATA_KEY)
3296                                 continue;
3297                         fi = btrfs_item_ptr(buf, i,
3298                                             struct btrfs_file_extent_item);
3299                         if (btrfs_file_extent_type(buf, fi) ==
3300                             BTRFS_FILE_EXTENT_INLINE)
3301                                 continue;
3302                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
3303                                 continue;
3304
3305                         data_bytes_allocated +=
3306                                 btrfs_file_extent_disk_num_bytes(buf, fi);
3307                         if (data_bytes_allocated < root->sectorsize) {
3308                                 abort();
3309                         }
3310                         data_bytes_referenced +=
3311                                 btrfs_file_extent_num_bytes(buf, fi);
3312                         add_data_backref(extent_cache,
3313                                 btrfs_file_extent_disk_bytenr(buf, fi),
3314                                 parent, owner, key.objectid, key.offset -
3315                                 btrfs_file_extent_offset(buf, fi), 1, 1,
3316                                 btrfs_file_extent_disk_num_bytes(buf, fi));
3317                         BUG_ON(ret);
3318                 }
3319         } else {
3320                 int level;
3321                 struct btrfs_key first_key;
3322
3323                 first_key.objectid = 0;
3324
3325                 if (nritems > 0)
3326                         btrfs_item_key_to_cpu(buf, &first_key, 0);
3327                 level = btrfs_header_level(buf);
3328                 for (i = 0; i < nritems; i++) {
3329                         u64 ptr = btrfs_node_blockptr(buf, i);
3330                         u32 size = btrfs_level_size(root, level - 1);
3331                         btrfs_node_key_to_cpu(buf, &key, i);
3332                         ret = add_extent_rec(extent_cache, &key,
3333                                              ptr, size, 0, 0, 1, 0, 1, 0,
3334                                              size);
3335                         BUG_ON(ret);
3336
3337                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
3338
3339                         if (level > 1) {
3340                                 add_pending(nodes, seen, ptr, size);
3341                         } else {
3342                                 add_pending(pending, seen, ptr, size);
3343                         }
3344                 }
3345                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
3346                                       nritems) * sizeof(struct btrfs_key_ptr);
3347         }
3348         total_btree_bytes += buf->len;
3349         if (fs_root_objectid(btrfs_header_owner(buf)))
3350                 total_fs_tree_bytes += buf->len;
3351         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
3352                 total_extent_tree_bytes += buf->len;
3353         if (!found_old_backref &&
3354             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
3355             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
3356             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
3357                 found_old_backref = 1;
3358 out:
3359         free_extent_buffer(buf);
3360         return 0;
3361 }
3362
3363 static int add_root_to_pending(struct extent_buffer *buf,
3364                                struct cache_tree *extent_cache,
3365                                struct cache_tree *pending,
3366                                struct cache_tree *seen,
3367                                struct cache_tree *nodes,
3368                                struct btrfs_key *root_key)
3369 {
3370         if (btrfs_header_level(buf) > 0)
3371                 add_pending(nodes, seen, buf->start, buf->len);
3372         else
3373                 add_pending(pending, seen, buf->start, buf->len);
3374         add_extent_rec(extent_cache, NULL, buf->start, buf->len,
3375                        0, 1, 1, 0, 1, 0, buf->len);
3376
3377         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
3378             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
3379                 add_tree_backref(extent_cache, buf->start, buf->start,
3380                                  0, 1);
3381         else
3382                 add_tree_backref(extent_cache, buf->start, 0,
3383                                  root_key->objectid, 1);
3384         return 0;
3385 }
3386
3387 /* as we fix the tree, we might be deleting blocks that
3388  * we're tracking for repair.  This hook makes sure we
3389  * remove any backrefs for blocks as we are fixing them.
3390  */
3391 static int free_extent_hook(struct btrfs_trans_handle *trans,
3392                             struct btrfs_root *root,
3393                             u64 bytenr, u64 num_bytes, u64 parent,
3394                             u64 root_objectid, u64 owner, u64 offset,
3395                             int refs_to_drop)
3396 {
3397         struct extent_record *rec;
3398         struct cache_extent *cache;
3399         int is_data;
3400         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
3401
3402         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
3403         cache = find_cache_extent(extent_cache, bytenr, num_bytes);
3404         if (!cache)
3405                 return 0;
3406
3407         rec = container_of(cache, struct extent_record, cache);
3408         if (is_data) {
3409                 struct data_backref *back;
3410                 back = find_data_backref(rec, parent, root_objectid, owner,
3411                                          offset, 1, bytenr, num_bytes);
3412                 if (!back)
3413                         goto out;
3414                 if (back->node.found_ref) {
3415                         back->found_ref -= refs_to_drop;
3416                         if (rec->refs)
3417                                 rec->refs -= refs_to_drop;
3418                 }
3419                 if (back->node.found_extent_tree) {
3420                         back->num_refs -= refs_to_drop;
3421                         if (rec->extent_item_refs)
3422                                 rec->extent_item_refs -= refs_to_drop;
3423                 }
3424                 if (back->found_ref == 0)
3425                         back->node.found_ref = 0;
3426                 if (back->num_refs == 0)
3427                         back->node.found_extent_tree = 0;
3428
3429                 if (!back->node.found_extent_tree && back->node.found_ref) {
3430                         list_del(&back->node.list);
3431                         free(back);
3432                 }
3433         } else {
3434                 struct tree_backref *back;
3435                 back = find_tree_backref(rec, parent, root_objectid);
3436                 if (!back)
3437                         goto out;
3438                 if (back->node.found_ref) {
3439                         if (rec->refs)
3440                                 rec->refs--;
3441                         back->node.found_ref = 0;
3442                 }
3443                 if (back->node.found_extent_tree) {
3444                         if (rec->extent_item_refs)
3445                                 rec->extent_item_refs--;
3446                         back->node.found_extent_tree = 0;
3447                 }
3448                 if (!back->node.found_extent_tree && back->node.found_ref) {
3449                         list_del(&back->node.list);
3450                         free(back);
3451                 }
3452         }
3453         maybe_free_extent_rec(extent_cache, rec);
3454 out:
3455         return 0;
3456 }
3457
3458 static int delete_extent_records(struct btrfs_trans_handle *trans,
3459                                  struct btrfs_root *root,
3460                                  struct btrfs_path *path,
3461                                  u64 bytenr, u64 new_len)
3462 {
3463         struct btrfs_key key;
3464         struct btrfs_key found_key;
3465         struct extent_buffer *leaf;
3466         int ret;
3467         int slot;
3468
3469
3470         key.objectid = bytenr;
3471         key.type = (u8)-1;
3472         key.offset = (u64)-1;
3473
3474         while(1) {
3475                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3476                                         &key, path, 0, 1);
3477                 if (ret < 0)
3478                         break;
3479
3480                 if (ret > 0) {
3481                         ret = 0;
3482                         if (path->slots[0] == 0)
3483                                 break;
3484                         path->slots[0]--;
3485                 }
3486                 ret = 0;
3487
3488                 leaf = path->nodes[0];
3489                 slot = path->slots[0];
3490
3491                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3492                 if (found_key.objectid != bytenr)
3493                         break;
3494
3495                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
3496                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
3497                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3498                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
3499                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
3500                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
3501                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
3502                         btrfs_release_path(NULL, path);
3503                         if (found_key.type == 0) {
3504                                 if (found_key.offset == 0)
3505                                         break;
3506                                 key.offset = found_key.offset - 1;
3507                                 key.type = found_key.type;
3508                         }
3509                         key.type = found_key.type - 1;
3510                         key.offset = (u64)-1;
3511                         continue;
3512                 }
3513
3514                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
3515                         found_key.objectid, found_key.type, found_key.offset);
3516
3517                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
3518                 if (ret)
3519                         break;
3520                 btrfs_release_path(NULL, path);
3521
3522                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
3523                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
3524                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
3525                                 found_key.offset : root->leafsize;
3526
3527                         ret = btrfs_update_block_group(trans, root, bytenr,
3528                                                        bytes, 0, 0);
3529                         if (ret)
3530                                 break;
3531                 }
3532         }
3533
3534         btrfs_release_path(NULL, path);
3535         return ret;
3536 }
3537
3538 /*
3539  * for a single backref, this will allocate a new extent
3540  * and add the backref to it.
3541  */
3542 static int record_extent(struct btrfs_trans_handle *trans,
3543                          struct btrfs_fs_info *info,
3544                          struct btrfs_path *path,
3545                          struct extent_record *rec,
3546                          struct extent_backref *back,
3547                          int allocated, u64 flags)
3548 {
3549         int ret;
3550         struct btrfs_root *extent_root = info->extent_root;
3551         struct extent_buffer *leaf;
3552         struct btrfs_key ins_key;
3553         struct btrfs_extent_item *ei;
3554         struct tree_backref *tback;
3555         struct data_backref *dback;
3556         struct btrfs_tree_block_info *bi;
3557
3558         if (!back->is_data)
3559                 rec->max_size = max_t(u64, rec->max_size,
3560                                     info->extent_root->leafsize);
3561
3562         if (!allocated) {
3563                 u32 item_size = sizeof(*ei);
3564
3565                 if (!back->is_data)
3566                         item_size += sizeof(*bi);
3567
3568                 ins_key.objectid = rec->start;
3569                 ins_key.offset = rec->max_size;
3570                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
3571
3572                 ret = btrfs_insert_empty_item(trans, extent_root, path,
3573                                         &ins_key, item_size);
3574                 if (ret)
3575                         goto fail;
3576
3577                 leaf = path->nodes[0];
3578                 ei = btrfs_item_ptr(leaf, path->slots[0],
3579                                     struct btrfs_extent_item);
3580
3581                 btrfs_set_extent_refs(leaf, ei, 0);
3582                 btrfs_set_extent_generation(leaf, ei, rec->generation);
3583
3584                 if (back->is_data) {
3585                         btrfs_set_extent_flags(leaf, ei,
3586                                                BTRFS_EXTENT_FLAG_DATA);
3587                 } else {
3588                         struct btrfs_disk_key copy_key;;
3589
3590                         tback = (struct tree_backref *)back;
3591                         bi = (struct btrfs_tree_block_info *)(ei + 1);
3592                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
3593                                              sizeof(*bi));
3594                         memset(&copy_key, 0, sizeof(copy_key));
3595
3596                         copy_key.objectid = le64_to_cpu(rec->info_objectid);
3597                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
3598                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
3599
3600                         btrfs_set_extent_flags(leaf, ei,
3601                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
3602                 }
3603
3604                 btrfs_mark_buffer_dirty(leaf);
3605                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
3606                                                rec->max_size, 1, 0);
3607                 if (ret)
3608                         goto fail;
3609                 btrfs_release_path(NULL, path);
3610         }
3611
3612         if (back->is_data) {
3613                 u64 parent;
3614                 int i;
3615
3616                 dback = (struct data_backref *)back;
3617                 if (back->full_backref)
3618                         parent = dback->parent;
3619                 else
3620                         parent = 0;
3621
3622                 for (i = 0; i < dback->found_ref; i++) {
3623                         /* if parent != 0, we're doing a full backref
3624                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
3625                          * just makes the backref allocator create a data
3626                          * backref
3627                          */
3628                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
3629                                                    rec->start, rec->max_size,
3630                                                    parent,
3631                                                    dback->root,
3632                                                    parent ?
3633                                                    BTRFS_FIRST_FREE_OBJECTID :
3634                                                    dback->owner,
3635                                                    dback->offset);
3636                         if (ret)
3637                                 break;
3638                 }
3639                 fprintf(stderr, "adding new data backref"
3640                                 " on %llu %s %llu owner %llu"
3641                                 " offset %llu found %d\n",
3642                                 (unsigned long long)rec->start,
3643                                 back->full_backref ?
3644                                 "parent" : "root",
3645                                 back->full_backref ?
3646                                 (unsigned long long)parent :
3647                                 (unsigned long long)dback->root,
3648                                 (unsigned long long)dback->owner,
3649                                 (unsigned long long)dback->offset,
3650                                 dback->found_ref);
3651         } else {
3652                 u64 parent;
3653
3654                 tback = (struct tree_backref *)back;
3655                 if (back->full_backref)
3656                         parent = tback->parent;
3657                 else
3658                         parent = 0;
3659
3660                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
3661                                            rec->start, rec->max_size,
3662                                            parent, tback->root, 0, 0);
3663                 fprintf(stderr, "adding new tree backref on "
3664                         "start %llu len %llu parent %llu root %llu\n",
3665                         rec->start, rec->max_size, tback->parent, tback->root);
3666         }
3667         if (ret)
3668                 goto fail;
3669 fail:
3670         btrfs_release_path(NULL, path);
3671         return ret;
3672 }
3673
3674 struct extent_entry {
3675         u64 bytenr;
3676         u64 bytes;
3677         int count;
3678         struct list_head list;
3679 };
3680
3681 static struct extent_entry *find_entry(struct list_head *entries,
3682                                        u64 bytenr, u64 bytes)
3683 {
3684         struct extent_entry *entry = NULL;
3685
3686         list_for_each_entry(entry, entries, list) {
3687                 if (entry->bytenr == bytenr && entry->bytes == bytes)
3688                         return entry;
3689         }
3690
3691         return NULL;
3692 }
3693
3694 static struct extent_entry *find_most_right_entry(struct list_head *entries)
3695 {
3696         struct extent_entry *entry, *best = NULL, *prev = NULL;
3697
3698         list_for_each_entry(entry, entries, list) {
3699                 if (!prev) {
3700                         prev = entry;
3701                         continue;
3702                 }
3703
3704                 /*
3705                  * If our current entry == best then we can't be sure our best
3706                  * is really the best, so we need to keep searching.
3707                  */
3708                 if (best && best->count == entry->count) {
3709                         prev = entry;
3710                         best = NULL;
3711                         continue;
3712                 }
3713
3714                 /* Prev == entry, not good enough, have to keep searching */
3715                 if (prev->count == entry->count)
3716                         continue;
3717
3718                 if (!best)
3719                         best = (prev->count > entry->count) ? prev : entry;
3720                 else if (best->count < entry->count)
3721                         best = entry;
3722                 prev = entry;
3723         }
3724
3725         return best;
3726 }
3727
3728 static int repair_ref(struct btrfs_trans_handle *trans,
3729                       struct btrfs_fs_info *info, struct btrfs_path *path,
3730                       struct data_backref *dback, struct extent_entry *entry)
3731 {
3732         struct btrfs_root *root;
3733         struct btrfs_file_extent_item *fi;
3734         struct extent_buffer *leaf;
3735         struct btrfs_key key;
3736         u64 bytenr, bytes;
3737         int ret;
3738
3739         key.objectid = dback->root;
3740         key.type = BTRFS_ROOT_ITEM_KEY;
3741         key.offset = (u64)-1;
3742         root = btrfs_read_fs_root(info, &key);
3743         if (IS_ERR(root)) {
3744                 fprintf(stderr, "Couldn't find root for our ref\n");
3745                 return -EINVAL;
3746         }
3747
3748         /*
3749          * The backref points to the original offset of the extent if it was
3750          * split, so we need to search down to the offset we have and then walk
3751          * forward until we find the backref we're looking for.
3752          */
3753         key.objectid = dback->owner;
3754         key.type = BTRFS_EXTENT_DATA_KEY;
3755         key.offset = dback->offset;
3756         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3757         if (ret < 0) {
3758                 fprintf(stderr, "Error looking up ref %d\n", ret);
3759                 return ret;
3760         }
3761
3762         while (1) {
3763                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3764                         ret = btrfs_next_leaf(root, path);
3765                         if (ret) {
3766                                 fprintf(stderr, "Couldn't find our ref, next\n");
3767                                 return -EINVAL;
3768                         }
3769                 }
3770                 leaf = path->nodes[0];
3771                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3772                 if (key.objectid != dback->owner ||
3773                     key.type != BTRFS_EXTENT_DATA_KEY) {
3774                         fprintf(stderr, "Couldn't find our ref, search\n");
3775                         return -EINVAL;
3776                 }
3777                 fi = btrfs_item_ptr(leaf, path->slots[0],
3778                                     struct btrfs_file_extent_item);
3779                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3780                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
3781
3782                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
3783                         break;
3784                 path->slots[0]++;
3785         }
3786
3787         btrfs_release_path(root, path);
3788
3789         /*
3790          * Have to make sure that this root gets updated when we commit the
3791          * transaction
3792          */
3793         root->track_dirty = 1;
3794         if (root->last_trans != trans->transid) {
3795                 root->last_trans = trans->transid;
3796                 root->commit_root = root->node;
3797                 extent_buffer_get(root->node);
3798         }
3799
3800         /*
3801          * Ok we have the key of the file extent we want to fix, now we can cow
3802          * down to the thing and fix it.
3803          */
3804         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3805         if (ret < 0) {
3806                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
3807                         key.objectid, key.type, key.offset, ret);
3808                 return ret;
3809         }
3810         if (ret > 0) {
3811                 fprintf(stderr, "Well that's odd, we just found this key "
3812                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
3813                         key.offset);
3814                 return -EINVAL;
3815         }
3816         leaf = path->nodes[0];
3817         fi = btrfs_item_ptr(leaf, path->slots[0],
3818                             struct btrfs_file_extent_item);
3819
3820         if (btrfs_file_extent_compression(leaf, fi) &&
3821             dback->disk_bytenr != entry->bytenr) {
3822                 fprintf(stderr, "Ref doesn't match the record start and is "
3823                         "compressed, please take a btrfs-image of this file "
3824                         "system and send it to a btrfs developer so they can "
3825                         "complete this functionality for bytenr %Lu\n",
3826                         dback->disk_bytenr);
3827                 return -EINVAL;
3828         }
3829
3830         if (dback->disk_bytenr > entry->bytenr) {
3831                 u64 off_diff, offset;
3832
3833                 off_diff = dback->disk_bytenr - entry->bytenr;
3834                 offset = btrfs_file_extent_offset(leaf, fi);
3835                 if (dback->disk_bytenr + offset +
3836                     btrfs_file_extent_num_bytes(leaf, fi) >
3837                     entry->bytenr + entry->bytes) {
3838                         fprintf(stderr, "Ref is past the entry end, please "
3839                                 "take a btrfs-image of this file system and "
3840                                 "send it to a btrfs developer, ref %Lu\n",
3841                                 dback->disk_bytenr);
3842                         return -EINVAL;
3843                 }
3844                 offset += off_diff;
3845                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
3846                 btrfs_set_file_extent_offset(leaf, fi, offset);
3847         } else if (dback->disk_bytenr < entry->bytenr) {
3848                 u64 offset;
3849
3850                 offset = btrfs_file_extent_offset(leaf, fi);
3851                 if (dback->disk_bytenr + offset < entry->bytenr) {
3852                         fprintf(stderr, "Ref is before the entry start, please"
3853                                 " take a btrfs-image of this file system and "
3854                                 "send it to a btrfs developer, ref %Lu\n",
3855                                 dback->disk_bytenr);
3856                         return -EINVAL;
3857                 }
3858
3859                 offset += dback->disk_bytenr;
3860                 offset -= entry->bytenr;
3861                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
3862                 btrfs_set_file_extent_offset(leaf, fi, offset);
3863         }
3864
3865         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
3866
3867         /*
3868          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
3869          * only do this if we aren't using compression, otherwise it's a
3870          * trickier case.
3871          */
3872         if (!btrfs_file_extent_compression(leaf, fi))
3873                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
3874         else
3875                 printf("ram bytes may be wrong?\n");
3876         btrfs_mark_buffer_dirty(leaf);
3877         btrfs_release_path(root, path);
3878         return 0;
3879 }
3880
3881 static int verify_backrefs(struct btrfs_trans_handle *trans,
3882                            struct btrfs_fs_info *info, struct btrfs_path *path,
3883                            struct extent_record *rec)
3884 {
3885         struct extent_backref *back;
3886         struct data_backref *dback;
3887         struct extent_entry *entry, *best = NULL;
3888         LIST_HEAD(entries);
3889         int nr_entries = 0;
3890         int ret = 0;
3891
3892         /*
3893          * Metadata is easy and the backrefs should always agree on bytenr and
3894          * size, if not we've got bigger issues.
3895          */
3896         if (rec->metadata)
3897                 return 0;
3898
3899         list_for_each_entry(back, &rec->backrefs, list) {
3900                 dback = (struct data_backref *)back;
3901                 /*
3902                  * We only pay attention to backrefs that we found a real
3903                  * backref for.
3904                  */
3905                 if (dback->found_ref == 0)
3906                         continue;
3907                 if (back->full_backref)
3908                         continue;
3909
3910                 /*
3911                  * For now we only catch when the bytes don't match, not the
3912                  * bytenr.  We can easily do this at the same time, but I want
3913                  * to have a fs image to test on before we just add repair
3914                  * functionality willy-nilly so we know we won't screw up the
3915                  * repair.
3916                  */
3917
3918                 entry = find_entry(&entries, dback->disk_bytenr,
3919                                    dback->bytes);
3920                 if (!entry) {
3921                         entry = malloc(sizeof(struct extent_entry));
3922                         if (!entry) {
3923                                 ret = -ENOMEM;
3924                                 goto out;
3925                         }
3926                         memset(entry, 0, sizeof(*entry));
3927                         entry->bytenr = dback->disk_bytenr;
3928                         entry->bytes = dback->bytes;
3929                         list_add_tail(&entry->list, &entries);
3930                         nr_entries++;
3931                 }
3932                 entry->count++;
3933         }
3934
3935         /* Yay all the backrefs agree, carry on good sir */
3936         if (nr_entries <= 1)
3937                 goto out;
3938
3939         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
3940                 "%Lu\n", rec->start);
3941
3942         /*
3943          * First we want to see if the backrefs can agree amongst themselves who
3944          * is right, so figure out which one of the entries has the highest
3945          * count.
3946          */
3947         best = find_most_right_entry(&entries);
3948
3949         /*
3950          * Ok so we may have an even split between what the backrefs think, so
3951          * this is where we use the extent ref to see what it thinks.
3952          */
3953         if (!best) {
3954                 entry = find_entry(&entries, rec->start, rec->nr);
3955                 if (!entry) {
3956                         fprintf(stderr, "Backrefs don't agree with eachother "
3957                                 "and extent record doesn't agree with anybody,"
3958                                 " so we can't fix bytenr %Lu bytes %Lu\n",
3959                                 rec->start, rec->nr);
3960                         ret = -EINVAL;
3961                         goto out;
3962                 }
3963                 entry->count++;
3964                 best = find_most_right_entry(&entries);
3965                 if (!best) {
3966                         fprintf(stderr, "Backrefs and extent record evenly "
3967                                 "split on who is right, this is going to "
3968                                 "require user input to fix bytenr %Lu bytes "
3969                                 "%Lu\n", rec->start, rec->nr);
3970                         ret = -EINVAL;
3971                         goto out;
3972                 }
3973         }
3974
3975         /*
3976          * I don't think this can happen currently as we'll abort() if we catch
3977          * this case higher up, but in case somebody removes that we still can't
3978          * deal with it properly here yet, so just bail out of that's the case.
3979          */
3980         if (best->bytenr != rec->start) {
3981                 fprintf(stderr, "Extent start and backref starts don't match, "
3982                         "please use btrfs-image on this file system and send "
3983                         "it to a btrfs developer so they can make fsck fix "
3984                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
3985                         rec->start, rec->nr);
3986                 ret = -EINVAL;
3987                 goto out;
3988         }
3989
3990         /*
3991          * Ok great we all agreed on an extent record, let's go find the real
3992          * references and fix up the ones that don't match.
3993          */
3994         list_for_each_entry(back, &rec->backrefs, list) {
3995                 dback = (struct data_backref *)back;
3996
3997                 /*
3998                  * Still ignoring backrefs that don't have a real ref attached
3999                  * to them.
4000                  */
4001                 if (dback->found_ref == 0)
4002                         continue;
4003                 if (back->full_backref)
4004                         continue;
4005
4006                 if (dback->bytes == best->bytes &&
4007                     dback->disk_bytenr == best->bytenr)
4008                         continue;
4009
4010                 ret = repair_ref(trans, info, path, dback, best);
4011                 if (ret)
4012                         goto out;
4013         }
4014
4015         /*
4016          * Ok we messed with the actual refs, which means we need to drop our
4017          * entire cache and go back and rescan.  I know this is a huge pain and
4018          * adds a lot of extra work, but it's the only way to be safe.  Once all
4019          * the backrefs agree we may not need to do anything to the extent
4020          * record itself.
4021          */
4022         ret = -EAGAIN;
4023 out:
4024         while (!list_empty(&entries)) {
4025                 entry = list_entry(entries.next, struct extent_entry, list);
4026                 list_del_init(&entry->list);
4027                 free(entry);
4028         }
4029         return ret;
4030 }
4031
4032 static int process_duplicates(struct btrfs_root *root,
4033                               struct cache_tree *extent_cache,
4034                               struct extent_record *rec)
4035 {
4036         struct extent_record *good, *tmp;
4037         struct cache_extent *cache;
4038         int ret;
4039
4040         /*
4041          * If we found a extent record for this extent then return, or if we
4042          * have more than one duplicate we are likely going to need to delete
4043          * something.
4044          */
4045         if (rec->found_rec || rec->num_duplicates > 1)
4046                 return 0;
4047
4048         /* Shouldn't happen but just in case */
4049         BUG_ON(!rec->num_duplicates);
4050
4051         /*
4052          * So this happens if we end up with a backref that doesn't match the
4053          * actual extent entry.  So either the backref is bad or the extent
4054          * entry is bad.  Either way we want to have the extent_record actually
4055          * reflect what we found in the extent_tree, so we need to take the
4056          * duplicate out and use that as the extent_record since the only way we
4057          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4058          */
4059         remove_cache_extent(extent_cache, &rec->cache);
4060
4061         good = list_entry(rec->dups.next, struct extent_record, list);
4062         list_del_init(&good->list);
4063         INIT_LIST_HEAD(&good->backrefs);
4064         INIT_LIST_HEAD(&good->dups);
4065         good->cache.start = good->start;
4066         good->cache.size = good->nr;
4067         good->content_checked = 0;
4068         good->owner_ref_checked = 0;
4069         good->num_duplicates = 0;
4070         good->refs = rec->refs;
4071         list_splice_init(&rec->backrefs, &good->backrefs);
4072         while (1) {
4073                 cache = find_cache_extent(extent_cache, good->start,
4074                                           good->nr);
4075                 if (!cache)
4076                         break;
4077                 tmp = container_of(cache, struct extent_record, cache);
4078
4079                 /*
4080                  * If we find another overlapping extent and it's found_rec is
4081                  * set then it's a duplicate and we need to try and delete
4082                  * something.
4083                  */
4084                 if (tmp->found_rec || tmp->num_duplicates > 0) {
4085                         if (list_empty(&good->list))
4086                                 list_add_tail(&good->list,
4087                                               &duplicate_extents);
4088                         good->num_duplicates += tmp->num_duplicates + 1;
4089                         list_splice_init(&tmp->dups, &good->dups);
4090                         list_del_init(&tmp->list);
4091                         list_add_tail(&tmp->list, &good->dups);
4092                         remove_cache_extent(extent_cache, &tmp->cache);
4093                         continue;
4094                 }
4095
4096                 /*
4097                  * Ok we have another non extent item backed extent rec, so lets
4098                  * just add it to this extent and carry on like we did above.
4099                  */
4100                 good->refs += tmp->refs;
4101                 list_splice_init(&tmp->backrefs, &good->backrefs);
4102                 remove_cache_extent(extent_cache, &tmp->cache);
4103                 free(tmp);
4104         }
4105         ret = insert_cache_extent(extent_cache, &good->cache);
4106         BUG_ON(ret);
4107         free(rec);
4108         return good->num_duplicates ? 0 : 1;
4109 }
4110
4111 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
4112                                     struct btrfs_root *root,
4113                                     struct extent_record *rec)
4114 {
4115         LIST_HEAD(delete_list);
4116         struct btrfs_path *path;
4117         struct extent_record *tmp, *good, *n;
4118         int nr_del = 0;
4119         int ret = 0;
4120         struct btrfs_key key;
4121
4122         path = btrfs_alloc_path();
4123         if (!path) {
4124                 ret = -ENOMEM;
4125                 goto out;
4126         }
4127
4128         good = rec;
4129         /* Find the record that covers all of the duplicates. */
4130         list_for_each_entry(tmp, &rec->dups, list) {
4131                 if (good->start < tmp->start)
4132                         continue;
4133                 if (good->nr > tmp->nr)
4134                         continue;
4135
4136                 if (tmp->start + tmp->nr < good->start + good->nr) {
4137                         fprintf(stderr, "Ok we have overlapping extents that "
4138                                 "aren't completely covered by eachother, this "
4139                                 "is going to require more careful thought.  "
4140                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
4141                                 tmp->start, tmp->nr, good->start, good->nr);
4142                         abort();
4143                 }
4144                 good = tmp;
4145         }
4146
4147         if (good != rec)
4148                 list_add_tail(&rec->list, &delete_list);
4149
4150         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
4151                 if (tmp == good)
4152                         continue;
4153                 list_move_tail(&tmp->list, &delete_list);
4154         }
4155
4156         root = root->fs_info->extent_root;
4157         list_for_each_entry(tmp, &delete_list, list) {
4158                 if (tmp->found_rec == 0)
4159                         continue;
4160                 key.objectid = tmp->start;
4161                 key.type = BTRFS_EXTENT_ITEM_KEY;
4162                 key.offset = tmp->nr;
4163
4164                 /* Shouldn't happen but just in case */
4165                 if (tmp->metadata) {
4166                         fprintf(stderr, "Well this shouldn't happen, extent "
4167                                 "record overlaps but is metadata? "
4168                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
4169                         abort();
4170                 }
4171
4172                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4173                 if (ret) {
4174                         if (ret > 0)
4175                                 ret = -EINVAL;
4176                         goto out;
4177                 }
4178                 ret = btrfs_del_item(trans, root, path);
4179                 if (ret)
4180                         goto out;
4181                 btrfs_release_path(root, path);
4182                 nr_del++;
4183         }
4184
4185 out:
4186         while (!list_empty(&delete_list)) {
4187                 tmp = list_entry(delete_list.next, struct extent_record, list);
4188                 list_del_init(&tmp->list);
4189                 if (tmp == rec)
4190                         continue;
4191                 free(tmp);
4192         }
4193
4194         while (!list_empty(&rec->dups)) {
4195                 tmp = list_entry(rec->dups.next, struct extent_record, list);
4196                 list_del_init(&tmp->list);
4197                 free(tmp);
4198         }
4199
4200         btrfs_free_path(path);
4201
4202         if (!ret && !nr_del)
4203                 rec->num_duplicates = 0;
4204
4205         return ret ? ret : nr_del;
4206 }
4207
4208 /*
4209  * when an incorrect extent item is found, this will delete
4210  * all of the existing entries for it and recreate them
4211  * based on what the tree scan found.
4212  */
4213 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
4214                              struct btrfs_fs_info *info,
4215                              struct extent_record *rec)
4216 {
4217         int ret;
4218         struct btrfs_path *path;
4219         struct list_head *cur = rec->backrefs.next;
4220         struct cache_extent *cache;
4221         struct extent_backref *back;
4222         int allocated = 0;
4223         u64 flags = 0;
4224
4225         /* remember our flags for recreating the extent */
4226         ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start,
4227                                        rec->max_size, rec->metadata, NULL,
4228                                        &flags);
4229         if (ret < 0)
4230                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4231
4232         path = btrfs_alloc_path();
4233
4234         /* step one, make sure all of the backrefs agree */
4235         ret = verify_backrefs(trans, info, path, rec);
4236         if (ret < 0)
4237                 goto out;
4238
4239         /* step two, delete all the existing records */
4240         ret = delete_extent_records(trans, info->extent_root, path,
4241                                     rec->start, rec->max_size);
4242
4243         if (ret < 0)
4244                 goto out;
4245
4246         /* was this block corrupt?  If so, don't add references to it */
4247         cache = find_cache_extent(info->corrupt_blocks, rec->start, rec->max_size);
4248         if (cache) {
4249                 ret = 0;
4250                 goto out;
4251         }
4252
4253         /* step three, recreate all the refs we did find */
4254         while(cur != &rec->backrefs) {
4255                 back = list_entry(cur, struct extent_backref, list);
4256                 cur = cur->next;
4257
4258                 /*
4259                  * if we didn't find any references, don't create a
4260                  * new extent record
4261                  */
4262                 if (!back->found_ref)
4263                         continue;
4264
4265                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
4266                 allocated = 1;
4267
4268                 if (ret)
4269                         goto out;
4270         }
4271 out:
4272         btrfs_free_path(path);
4273         return ret;
4274 }
4275
4276 /* right now we only prune from the extent allocation tree */
4277 static int prune_one_block(struct btrfs_trans_handle *trans,
4278                            struct btrfs_fs_info *info,
4279                            struct btrfs_corrupt_block *corrupt)
4280 {
4281         int ret;
4282         struct btrfs_path path;
4283         struct extent_buffer *eb;
4284         u64 found;
4285         int slot;
4286         int nritems;
4287         int level = corrupt->level + 1;
4288
4289         btrfs_init_path(&path);
4290 again:
4291         /* we want to stop at the parent to our busted block */
4292         path.lowest_level = level;
4293
4294         ret = btrfs_search_slot(trans, info->extent_root,
4295                                 &corrupt->key, &path, -1, 1);
4296
4297         if (ret < 0)
4298                 goto out;
4299
4300         eb = path.nodes[level];
4301         if (!eb) {
4302                 ret = -ENOENT;
4303                 goto out;
4304         }
4305
4306         /*
4307          * hopefully the search gave us the block we want to prune,
4308          * lets try that first
4309          */
4310         slot = path.slots[level];
4311         found =  btrfs_node_blockptr(eb, slot);
4312         if (found == corrupt->cache.start)
4313                 goto del_ptr;
4314
4315         nritems = btrfs_header_nritems(eb);
4316
4317         /* the search failed, lets scan this node and hope we find it */
4318         for (slot = 0; slot < nritems; slot++) {
4319                 found =  btrfs_node_blockptr(eb, slot);
4320                 if (found == corrupt->cache.start)
4321                         goto del_ptr;
4322         }
4323         /*
4324          * we couldn't find the bad block.  TODO, search all the nodes for pointers
4325          * to this block
4326          */
4327         if (eb == info->extent_root->node) {
4328                 ret = -ENOENT;
4329                 goto out;
4330         } else {
4331                 level++;
4332                 btrfs_release_path(NULL, &path);
4333                 goto again;
4334         }
4335
4336 del_ptr:
4337         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
4338         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
4339
4340 out:
4341         btrfs_release_path(NULL, &path);
4342         return ret;
4343 }
4344
4345 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
4346                                 struct btrfs_fs_info *info)
4347 {
4348         struct cache_extent *cache;
4349         struct btrfs_corrupt_block *corrupt;
4350
4351         cache = find_first_cache_extent(info->corrupt_blocks, 0);
4352         while (1) {
4353                 if (!cache)
4354                         break;
4355                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4356                 prune_one_block(trans, info, corrupt);
4357                 cache = next_cache_extent(cache);
4358         }
4359         return 0;
4360 }
4361
4362 static void free_corrupt_block(struct cache_extent *cache)
4363 {
4364         struct btrfs_corrupt_block *corrupt;
4365
4366         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4367         free(corrupt);
4368 }
4369
4370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4371
4372 static int check_block_group(struct btrfs_trans_handle *trans,
4373                               struct btrfs_fs_info *info,
4374                               struct map_lookup *map,
4375                               int *reinit)
4376 {
4377         struct btrfs_key key;
4378         struct btrfs_path path;
4379         int ret;
4380
4381         key.objectid = map->ce.start;
4382         key.offset = map->ce.size;
4383         key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
4384
4385         btrfs_init_path(&path);
4386         ret = btrfs_search_slot(NULL, info->extent_root,
4387                                 &key, &path, 0, 0);
4388         btrfs_release_path(NULL, &path);
4389         if (ret <= 0)
4390                 goto out;
4391
4392         ret = btrfs_make_block_group(trans, info->extent_root, 0, map->type,
4393                                BTRFS_FIRST_CHUNK_TREE_OBJECTID,
4394                                key.objectid, key.offset);
4395         *reinit = 1;
4396 out:
4397         return ret;
4398 }
4399
4400 static int check_block_groups(struct btrfs_trans_handle *trans,
4401                               struct btrfs_fs_info *info, int *reinit)
4402 {
4403         struct cache_extent *ce;
4404         struct map_lookup *map;
4405         struct btrfs_mapping_tree *map_tree = &info->mapping_tree;
4406
4407         /* this isn't quite working */
4408         return 0;
4409
4410         ce = find_first_cache_extent(&map_tree->cache_tree, 0);
4411         while (1) {
4412                 if (!ce)
4413                         break;
4414                 map = container_of(ce, struct map_lookup, ce);
4415                 check_block_group(trans, info, map, reinit);
4416                 ce = next_cache_extent(ce);
4417         }
4418         return 0;
4419 }
4420
4421 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
4422 {
4423         struct btrfs_block_group_cache *cache;
4424         u64 start, end;
4425         int ret;
4426
4427         while (1) {
4428                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
4429                                             &start, &end, EXTENT_DIRTY);
4430                 if (ret)
4431                         break;
4432                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
4433                                    GFP_NOFS);
4434         }
4435
4436         start = 0;
4437         while (1) {
4438                 cache = btrfs_lookup_first_block_group(fs_info, start);
4439                 if (!cache)
4440                         break;
4441                 if (cache->cached)
4442                         cache->cached = 0;
4443                 start = cache->key.objectid + cache->key.offset;
4444         }
4445 }
4446
4447 static int check_extent_refs(struct btrfs_trans_handle *trans,
4448                              struct btrfs_root *root,
4449                              struct cache_tree *extent_cache, int repair)
4450 {
4451         struct extent_record *rec;
4452         struct cache_extent *cache;
4453         int err = 0;
4454         int ret = 0;
4455         int fixed = 0;
4456         int reinit = 0;
4457         int had_dups = 0;
4458
4459         if (repair) {
4460                 /*
4461                  * if we're doing a repair, we have to make sure
4462                  * we don't allocate from the problem extents.
4463                  * In the worst case, this will be all the
4464                  * extents in the FS
4465                  */
4466                 cache = find_first_cache_extent(extent_cache, 0);
4467                 while(cache) {
4468                         rec = container_of(cache, struct extent_record, cache);
4469                         btrfs_pin_extent(root->fs_info,
4470                                          rec->start, rec->max_size);
4471                         cache = next_cache_extent(cache);
4472                 }
4473
4474                 /* pin down all the corrupted blocks too */
4475                 cache = find_first_cache_extent(root->fs_info->corrupt_blocks, 0);
4476                 while(cache) {
4477                         rec = container_of(cache, struct extent_record, cache);
4478                         btrfs_pin_extent(root->fs_info,
4479                                          rec->start, rec->max_size);
4480                         cache = next_cache_extent(cache);
4481                 }
4482                 prune_corrupt_blocks(trans, root->fs_info);
4483                 check_block_groups(trans, root->fs_info, &reinit);
4484                 if (reinit)
4485                         btrfs_read_block_groups(root->fs_info->extent_root);
4486                 reset_cached_block_groups(root->fs_info);
4487         }
4488
4489         /*
4490          * We need to delete any duplicate entries we find first otherwise we
4491          * could mess up the extent tree when we have backrefs that actually
4492          * belong to a different extent item and not the weird duplicate one.
4493          */
4494         while (repair && !list_empty(&duplicate_extents)) {
4495                 rec = list_entry(duplicate_extents.next, struct extent_record,
4496                                  list);
4497                 list_del_init(&rec->list);
4498
4499                 /* Sometimes we can find a backref before we find an actual
4500                  * extent, so we need to process it a little bit to see if there
4501                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
4502                  * if this is a backref screwup.  If we need to delete stuff
4503                  * process_duplicates() will return 0, otherwise it will return
4504                  * 1 and we
4505                  */
4506                 if (process_duplicates(root, extent_cache, rec))
4507                         continue;
4508                 ret = delete_duplicate_records(trans, root, rec);
4509                 if (ret < 0)
4510                         return ret;
4511                 /*
4512                  * delete_duplicate_records will return the number of entries
4513                  * deleted, so if it's greater than 0 then we know we actually
4514                  * did something and we need to remove.
4515                  */
4516                 if (ret)
4517                         had_dups = 1;
4518         }
4519
4520         if (had_dups)
4521                 return -EAGAIN;
4522
4523         while(1) {
4524                 fixed = 0;
4525                 cache = find_first_cache_extent(extent_cache, 0);
4526                 if (!cache)
4527                         break;
4528                 rec = container_of(cache, struct extent_record, cache);
4529                 if (rec->num_duplicates) {
4530                         fprintf(stderr, "extent item %llu has multiple extent "
4531                                 "items\n", (unsigned long long)rec->start);
4532                         err = 1;
4533                 }
4534
4535                 if (rec->refs != rec->extent_item_refs) {
4536                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
4537                                 (unsigned long long)rec->start,
4538                                 (unsigned long long)rec->nr);
4539                         fprintf(stderr, "extent item %llu, found %llu\n",
4540                                 (unsigned long long)rec->extent_item_refs,
4541                                 (unsigned long long)rec->refs);
4542                         if (!fixed && repair) {
4543                                 ret = fixup_extent_refs(trans, root->fs_info, rec);
4544                                 if (ret)
4545                                         goto repair_abort;
4546                                 fixed = 1;
4547                         }
4548                         err = 1;
4549
4550                 }
4551                 if (all_backpointers_checked(rec, 1)) {
4552                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
4553                                 (unsigned long long)rec->start,
4554                                 (unsigned long long)rec->nr);
4555
4556                         if (!fixed && repair) {
4557                                 ret = fixup_extent_refs(trans, root->fs_info, rec);
4558                                 if (ret)
4559                                         goto repair_abort;
4560                                 fixed = 1;
4561                         }
4562
4563                         err = 1;
4564                 }
4565                 if (!rec->owner_ref_checked) {
4566                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
4567                                 (unsigned long long)rec->start,
4568                                 (unsigned long long)rec->nr);
4569                         if (!fixed && repair) {
4570                                 ret = fixup_extent_refs(trans, root->fs_info, rec);
4571                                 if (ret)
4572                                         goto repair_abort;
4573                                 fixed = 1;
4574                         }
4575                         err = 1;
4576                 }
4577
4578                 remove_cache_extent(extent_cache, cache);
4579                 free_all_extent_backrefs(rec);
4580                 free(rec);
4581         }
4582 repair_abort:
4583         if (repair) {
4584                 if (ret && ret != -EAGAIN) {
4585                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
4586                         exit(1);
4587                 } else if (!ret) {
4588                         btrfs_fix_block_accounting(trans, root);
4589                 }
4590                 if (err)
4591                         fprintf(stderr, "repaired damaged extent references\n");
4592                 return ret;
4593         }
4594         return err;
4595 }
4596
4597 static void free_cache_tree(struct cache_tree *tree)
4598 {
4599         struct cache_extent *cache;
4600
4601         while (1) {
4602                 cache = find_first_cache_extent(tree, 0);
4603                 if (!cache)
4604                         break;
4605                 remove_cache_extent(tree, cache);
4606                 free(cache);
4607         }
4608 }
4609
4610 static int check_extents(struct btrfs_root *root, int repair)
4611 {
4612         struct cache_tree extent_cache;
4613         struct cache_tree seen;
4614         struct cache_tree pending;
4615         struct cache_tree reada;
4616         struct cache_tree nodes;
4617         struct cache_tree corrupt_blocks;
4618         struct btrfs_path path;
4619         struct btrfs_key key;
4620         struct btrfs_key found_key;
4621         int ret;
4622         u64 last = 0;
4623         struct block_info *bits;
4624         int bits_nr;
4625         struct extent_buffer *leaf;
4626         struct btrfs_trans_handle *trans = NULL;
4627         int slot;
4628         struct btrfs_root_item ri;
4629
4630         cache_tree_init(&extent_cache);
4631         cache_tree_init(&seen);
4632         cache_tree_init(&pending);
4633         cache_tree_init(&nodes);
4634         cache_tree_init(&reada);
4635         cache_tree_init(&corrupt_blocks);
4636
4637         if (repair) {
4638                 trans = btrfs_start_transaction(root, 1);
4639                 if (IS_ERR(trans)) {
4640                         fprintf(stderr, "Error starting transaction\n");
4641                         return PTR_ERR(trans);
4642                 }
4643                 root->fs_info->fsck_extent_cache = &extent_cache;
4644                 root->fs_info->free_extent_hook = free_extent_hook;
4645                 root->fs_info->corrupt_blocks = &corrupt_blocks;
4646         }
4647
4648         bits_nr = 1024;
4649         bits = malloc(bits_nr * sizeof(struct block_info));
4650         if (!bits) {
4651                 perror("malloc");
4652                 exit(1);
4653         }
4654
4655 again:
4656         add_root_to_pending(root->fs_info->tree_root->node,
4657                             &extent_cache, &pending, &seen, &nodes,
4658                             &root->fs_info->tree_root->root_key);
4659
4660         add_root_to_pending(root->fs_info->chunk_root->node,
4661                             &extent_cache, &pending, &seen, &nodes,
4662                             &root->fs_info->chunk_root->root_key);
4663
4664         btrfs_init_path(&path);
4665         key.offset = 0;
4666         key.objectid = 0;
4667         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
4668         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
4669                                         &key, &path, 0, 0);
4670         BUG_ON(ret < 0);
4671         while(1) {
4672                 leaf = path.nodes[0];
4673                 slot = path.slots[0];
4674                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
4675                         ret = btrfs_next_leaf(root, &path);
4676                         if (ret != 0)
4677                                 break;
4678                         leaf = path.nodes[0];
4679                         slot = path.slots[0];
4680                 }
4681                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
4682                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
4683                         unsigned long offset;
4684                         struct extent_buffer *buf;
4685
4686                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
4687                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
4688                         buf = read_tree_block(root->fs_info->tree_root,
4689                                               btrfs_root_bytenr(&ri),
4690                                               btrfs_level_size(root,
4691                                                btrfs_root_level(&ri)), 0);
4692                         add_root_to_pending(buf, &extent_cache, &pending,
4693                                             &seen, &nodes, &found_key);
4694                         free_extent_buffer(buf);
4695                 }
4696                 path.slots[0]++;
4697         }
4698         btrfs_release_path(root, &path);
4699         while(1) {
4700                 ret = run_next_block(root, bits, bits_nr, &last, &pending,
4701                                      &seen, &reada, &nodes, &extent_cache);
4702                 if (ret != 0)
4703                         break;
4704         }
4705         ret = check_extent_refs(trans, root, &extent_cache, repair);
4706
4707         if (ret == -EAGAIN) {
4708                 ret = btrfs_commit_transaction(trans, root);
4709                 if (ret)
4710                         goto out;
4711
4712                 trans = btrfs_start_transaction(root, 1);
4713                 if (IS_ERR(trans)) {
4714                         ret = PTR_ERR(trans);
4715                         goto out;
4716                 }
4717
4718                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
4719                 free_cache_tree(&seen);
4720                 free_cache_tree(&pending);
4721                 free_cache_tree(&reada);
4722                 free_cache_tree(&nodes);
4723                 free_extent_cache(root->fs_info, &extent_cache);
4724                 goto again;
4725         }
4726
4727         if (trans) {
4728                 int err;
4729
4730                 err = btrfs_commit_transaction(trans, root);
4731                 if (!ret)
4732                         ret = err;
4733         }
4734 out:
4735         if (repair) {
4736                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
4737                 root->fs_info->fsck_extent_cache = NULL;
4738                 root->fs_info->free_extent_hook = NULL;
4739                 root->fs_info->corrupt_blocks = NULL;
4740         }
4741         free(bits);
4742         return ret;
4743 }
4744
4745 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
4746                                 struct extent_buffer *eb, int tree_root)
4747 {
4748         struct extent_buffer *tmp;
4749         struct btrfs_root_item *ri;
4750         struct btrfs_key key;
4751         u64 bytenr;
4752         u32 leafsize;
4753         int level = btrfs_header_level(eb);
4754         int nritems;
4755         int ret;
4756         int i;
4757
4758         btrfs_pin_extent(fs_info, eb->start, eb->len);
4759
4760         leafsize = btrfs_super_leafsize(fs_info->super_copy);
4761         nritems = btrfs_header_nritems(eb);
4762         for (i = 0; i < nritems; i++) {
4763                 if (level == 0) {
4764                         btrfs_item_key_to_cpu(eb, &key, i);
4765                         if (key.type != BTRFS_ROOT_ITEM_KEY)
4766                                 continue;
4767                         /* Skip the extent root and reloc roots */
4768                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
4769                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
4770                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4771                                 continue;
4772                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
4773                         bytenr = btrfs_disk_root_bytenr(eb, ri);
4774
4775                         /*
4776                          * If at any point we start needing the real root we
4777                          * will have to build a stump root for the root we are
4778                          * in, but for now this doesn't actually use the root so
4779                          * just pass in extent_root.
4780                          */
4781                         tmp = read_tree_block(fs_info->extent_root, bytenr,
4782                                               leafsize, 0);
4783                         if (!tmp) {
4784                                 fprintf(stderr, "Error reading root block\n");
4785                                 return -EIO;
4786                         }
4787                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
4788                         free_extent_buffer(tmp);
4789                         if (ret)
4790                                 return ret;
4791                 } else {
4792                         bytenr = btrfs_node_blockptr(eb, i);
4793
4794                         /* If we aren't the tree root don't read the block */
4795                         if (level == 1 && !tree_root) {
4796                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
4797                                 continue;
4798                         }
4799
4800                         tmp = read_tree_block(fs_info->extent_root, bytenr,
4801                                               leafsize, 0);
4802                         if (!tmp) {
4803                                 fprintf(stderr, "Error reading tree block\n");
4804                                 return -EIO;
4805                         }
4806                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
4807                         free_extent_buffer(tmp);
4808                         if (ret)
4809                                 return ret;
4810                 }
4811         }
4812
4813         return 0;
4814 }
4815
4816 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
4817 {
4818         int ret;
4819
4820         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
4821         if (ret)
4822                 return ret;
4823
4824         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
4825 }
4826
4827 static int reset_block_groups(struct btrfs_fs_info *fs_info)
4828 {
4829         struct btrfs_path *path;
4830         struct extent_buffer *leaf;
4831         struct btrfs_chunk *chunk;
4832         struct btrfs_key key;
4833         int ret;
4834
4835         path = btrfs_alloc_path();
4836         if (!path)
4837                 return -ENOMEM;
4838
4839         key.objectid = 0;
4840         key.type = BTRFS_CHUNK_ITEM_KEY;
4841         key.offset = 0;
4842
4843         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
4844         if (ret < 0) {
4845                 btrfs_free_path(path);
4846                 return ret;
4847         }
4848
4849         /*
4850          * We do this in case the block groups were screwed up and had alloc
4851          * bits that aren't actually set on the chunks.  This happens with
4852          * restored images every time and could happen in real life I guess.
4853          */
4854         fs_info->avail_data_alloc_bits = 0;
4855         fs_info->avail_metadata_alloc_bits = 0;
4856         fs_info->avail_system_alloc_bits = 0;
4857
4858         /* First we need to create the in-memory block groups */
4859         while (1) {
4860                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4861                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
4862                         if (ret < 0) {
4863                                 btrfs_free_path(path);
4864                                 return ret;
4865                         }
4866                         if (ret) {
4867                                 ret = 0;
4868                                 break;
4869                         }
4870                 }
4871                 leaf = path->nodes[0];
4872                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4873                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
4874                         path->slots[0]++;
4875                         continue;
4876                 }
4877
4878                 chunk = btrfs_item_ptr(leaf, path->slots[0],
4879                                        struct btrfs_chunk);
4880                 btrfs_add_block_group(fs_info, 0,
4881                                       btrfs_chunk_type(leaf, chunk),
4882                                       key.objectid, key.offset,
4883                                       btrfs_chunk_length(leaf, chunk));
4884                 path->slots[0]++;
4885         }
4886
4887         btrfs_free_path(path);
4888         return 0;
4889 }
4890
4891 static int reset_balance(struct btrfs_trans_handle *trans,
4892                          struct btrfs_fs_info *fs_info)
4893 {
4894         struct btrfs_root *root = fs_info->tree_root;
4895         struct btrfs_path *path;
4896         struct extent_buffer *leaf;
4897         struct btrfs_key key;
4898         int del_slot, del_nr = 0;
4899         int ret;
4900         int found = 0;
4901
4902         path = btrfs_alloc_path();
4903         if (!path)
4904                 return -ENOMEM;
4905
4906         key.objectid = BTRFS_BALANCE_OBJECTID;
4907         key.type = BTRFS_BALANCE_ITEM_KEY;
4908         key.offset = 0;
4909
4910         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4911         if (ret) {
4912                 if (ret > 0)
4913                         ret = 0;
4914                 goto out;
4915         }
4916
4917         ret = btrfs_del_item(trans, root, path);
4918         if (ret)
4919                 goto out;
4920         btrfs_release_path(root, path);
4921
4922         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4923         key.type = BTRFS_ROOT_ITEM_KEY;
4924         key.offset = 0;
4925
4926         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4927         if (ret < 0)
4928                 goto out;
4929         while (1) {
4930                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4931                         if (!found)
4932                                 break;
4933
4934                         if (del_nr) {
4935                                 ret = btrfs_del_items(trans, root, path,
4936                                                       del_slot, del_nr);
4937                                 del_nr = 0;
4938                                 if (ret)
4939                                         goto out;
4940                         }
4941                         key.offset++;
4942                         btrfs_release_path(root, path);
4943
4944                         found = 0;
4945                         ret = btrfs_search_slot(trans, root, &key, path,
4946                                                 -1, 1);
4947                         if (ret < 0)
4948                                 goto out;
4949                         continue;
4950                 }
4951                 found = 1;
4952                 leaf = path->nodes[0];
4953                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4954                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
4955                         break;
4956                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4957                         path->slots[0]++;
4958                         continue;
4959                 }
4960                 if (!del_nr) {
4961                         del_slot = path->slots[0];
4962                         del_nr = 1;
4963                 } else {
4964                         del_nr++;
4965                 }
4966                 path->slots[0]++;
4967         }
4968
4969         if (del_nr) {
4970                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
4971                 if (ret)
4972                         goto out;
4973         }
4974         btrfs_release_path(root, path);
4975
4976         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4977         key.type = BTRFS_ROOT_ITEM_KEY;
4978         key.offset = (u64)-1;
4979         root = btrfs_read_fs_root(fs_info, &key);
4980         if (IS_ERR(root)) {
4981                 fprintf(stderr, "Error reading data reloc tree\n");
4982                 return PTR_ERR(root);
4983         }
4984         root->track_dirty = 1;
4985         if (root->last_trans != trans->transid) {
4986                 root->last_trans = trans->transid;
4987                 root->commit_root = root->node;
4988                 extent_buffer_get(root->node);
4989         }
4990         ret = btrfs_fsck_reinit_root(trans, root, 0);
4991 out:
4992         btrfs_free_path(path);
4993         return ret;
4994 }
4995
4996 static int reinit_extent_tree(struct btrfs_fs_info *fs_info)
4997 {
4998         struct btrfs_trans_handle *trans;
4999         u64 start = 0;
5000         int ret;
5001
5002         /*
5003          * The only reason we don't do this is because right now we're just
5004          * walking the trees we find and pinning down their bytes, we don't look
5005          * at any of the leaves.  In order to do mixed groups we'd have to check
5006          * the leaves of any fs roots and pin down the bytes for any file
5007          * extents we find.  Not hard but why do it if we don't have to?
5008          */
5009         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
5010                 fprintf(stderr, "We don't support re-initing the extent tree "
5011                         "for mixed block groups yet, please notify a btrfs "
5012                         "developer you want to do this so they can add this "
5013                         "functionality.\n");
5014                 return -EINVAL;
5015         }
5016
5017         trans = btrfs_start_transaction(fs_info->extent_root, 1);
5018         if (IS_ERR(trans)) {
5019                 fprintf(stderr, "Error starting transaction\n");
5020                 return PTR_ERR(trans);
5021         }
5022
5023         /*
5024          * first we need to walk all of the trees except the extent tree and pin
5025          * down the bytes that are in use so we don't overwrite any existing
5026          * metadata.
5027          */
5028         ret = pin_metadata_blocks(fs_info);
5029         if (ret) {
5030                 fprintf(stderr, "error pinning down used bytes\n");
5031                 return ret;
5032         }
5033
5034         /*
5035          * Need to drop all the block groups since we're going to recreate all
5036          * of them again.
5037          */
5038         btrfs_free_block_groups(fs_info);
5039         ret = reset_block_groups(fs_info);
5040         if (ret) {
5041                 fprintf(stderr, "error resetting the block groups\n");
5042                 return ret;
5043         }
5044
5045         /* Ok we can allocate now, reinit the extent root */
5046         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 1);
5047         if (ret) {
5048                 fprintf(stderr, "extent root initialization failed\n");
5049                 /*
5050                  * When the transaction code is updated we should end the
5051                  * transaction, but for now progs only knows about commit so
5052                  * just return an error.
5053                  */
5054                 return ret;
5055         }
5056
5057         ret = reset_balance(trans, fs_info);
5058         if (ret) {
5059                 fprintf(stderr, "error reseting the pending balance\n");
5060                 return ret;
5061         }
5062
5063         /*
5064          * Now we have all the in-memory block groups setup so we can make
5065          * allocations properly, and the metadata we care about is safe since we
5066          * pinned all of it above.
5067          */
5068         while (1) {
5069                 struct btrfs_block_group_cache *cache;
5070
5071                 cache = btrfs_lookup_first_block_group(fs_info, start);
5072                 if (!cache)
5073                         break;
5074                 start = cache->key.objectid + cache->key.offset;
5075                 ret = btrfs_insert_item(trans, fs_info->extent_root,
5076                                         &cache->key, &cache->item,
5077                                         sizeof(cache->item));
5078                 if (ret) {
5079                         fprintf(stderr, "Error adding block group\n");
5080                         return ret;
5081                 }
5082                 btrfs_extent_post_op(trans, fs_info->extent_root);
5083         }
5084
5085         /*
5086          * Ok now we commit and run the normal fsck, which will add extent
5087          * entries for all of the items it finds.
5088          */
5089         return btrfs_commit_transaction(trans, fs_info->extent_root);
5090 }
5091
5092 static struct option long_options[] = {
5093         { "super", 1, NULL, 's' },
5094         { "repair", 0, NULL, 0 },
5095         { "init-csum-tree", 0, NULL, 0 },
5096         { "init-extent-tree", 0, NULL, 0 },
5097         { 0, 0, 0, 0}
5098 };
5099
5100 const char * const cmd_check_usage[] = {
5101         "btrfs check [options] <device>",
5102         "Check an unmounted btrfs filesystem.",
5103         "",
5104         "-s|--super <superblock>     use this superblock copy",
5105         "--repair                    try to repair the filesystem",
5106         "--init-csum-tree            create a new CRC tree",
5107         "--init-extent-tree          create a new extent tree",
5108         NULL
5109 };
5110
5111 int cmd_check(int argc, char **argv)
5112 {
5113         struct cache_tree root_cache;
5114         struct btrfs_root *root;
5115         struct btrfs_fs_info *info;
5116         u64 bytenr = 0;
5117         char uuidbuf[37];
5118         int ret;
5119         int num;
5120         int repair = 0;
5121         int option_index = 0;
5122         int init_csum_tree = 0;
5123         int init_extent_tree = 0;
5124         int rw = 0;
5125
5126         while(1) {
5127                 int c;
5128                 c = getopt_long(argc, argv, "as:", long_options,
5129                                 &option_index);
5130                 if (c < 0)
5131                         break;
5132                 switch(c) {
5133                         case 'a': /* ignored */ break;
5134                         case 's':
5135                                 num = atol(optarg);
5136                                 bytenr = btrfs_sb_offset(num);
5137                                 printf("using SB copy %d, bytenr %llu\n", num,
5138                                        (unsigned long long)bytenr);
5139                                 break;
5140                         case '?':
5141                         case 'h':
5142                                 usage(cmd_check_usage);
5143                 }
5144                 if (option_index == 1) {
5145                         printf("enabling repair mode\n");
5146                         repair = 1;
5147                         rw = 1;
5148                 } else if (option_index == 2) {
5149                         printf("Creating a new CRC tree\n");
5150                         init_csum_tree = 1;
5151                         rw = 1;
5152                 } else if (option_index == 3) {
5153                         init_extent_tree = 1;
5154                         rw = 1;
5155                         repair = 1;
5156                 }
5157
5158         }
5159         argc = argc - optind;
5160
5161         if (argc != 1)
5162                 usage(cmd_check_usage);
5163
5164         radix_tree_init();
5165         cache_tree_init(&root_cache);
5166
5167         if((ret = check_mounted(argv[optind])) < 0) {
5168                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
5169                 return ret;
5170         } else if(ret) {
5171                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
5172                 return -EBUSY;
5173         }
5174
5175         info = open_ctree_fs_info(argv[optind], bytenr, 0, rw, 1);
5176         if (!info) {
5177                 fprintf(stderr, "Couldn't open file system\n");
5178                 return -EIO;
5179         }
5180
5181         uuid_unparse(info->super_copy->fsid, uuidbuf);
5182         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
5183
5184         if (!extent_buffer_uptodate(info->tree_root->node) ||
5185             !extent_buffer_uptodate(info->dev_root->node) ||
5186             !extent_buffer_uptodate(info->extent_root->node) ||
5187             !extent_buffer_uptodate(info->chunk_root->node)) {
5188                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
5189                 return -EIO;
5190         }
5191
5192         root = info->fs_root;
5193
5194         if (init_extent_tree) {
5195                 printf("Creating a new extent tree\n");
5196                 ret = reinit_extent_tree(info);
5197                 if (ret)
5198                         return ret;
5199         }
5200         fprintf(stderr, "checking extents\n");
5201         if (init_csum_tree) {
5202                 struct btrfs_trans_handle *trans;
5203
5204                 fprintf(stderr, "Reinit crc root\n");
5205                 trans = btrfs_start_transaction(info->csum_root, 1);
5206                 if (IS_ERR(trans)) {
5207                         fprintf(stderr, "Error starting transaction\n");
5208                         return PTR_ERR(trans);
5209                 }
5210
5211                 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
5212                 if (ret) {
5213                         fprintf(stderr, "crc root initialization failed\n");
5214                         return -EIO;
5215                 }
5216
5217                 ret = btrfs_commit_transaction(trans, root);
5218                 if (ret)
5219                         exit(1);
5220                 goto out;
5221         }
5222         ret = check_extents(root, repair);
5223         if (ret)
5224                 fprintf(stderr, "Errors found in extent allocation tree\n");
5225
5226         fprintf(stderr, "checking free space cache\n");
5227         ret = check_space_cache(root);
5228         if (ret)
5229                 goto out;
5230
5231         fprintf(stderr, "checking fs roots\n");
5232         ret = check_fs_roots(root, &root_cache);
5233         if (ret)
5234                 goto out;
5235
5236         fprintf(stderr, "checking csums\n");
5237         ret = check_csums(root);
5238         if (ret)
5239                 goto out;
5240
5241         fprintf(stderr, "checking root refs\n");
5242         ret = check_root_refs(root, &root_cache);
5243 out:
5244         free_root_recs_tree(&root_cache);
5245         close_ctree(root);
5246
5247         if (found_old_backref) { /*
5248                  * there was a disk format change when mixed
5249                  * backref was in testing tree. The old format
5250                  * existed about one week.
5251                  */
5252                 printf("\n * Found old mixed backref format. "
5253                        "The old format is not supported! *"
5254                        "\n * Please mount the FS in readonly mode, "
5255                        "backup data and re-format the FS. *\n\n");
5256                 ret = 1;
5257         }
5258         printf("found %llu bytes used err is %d\n",
5259                (unsigned long long)bytes_used, ret);
5260         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
5261         printf("total tree bytes: %llu\n",
5262                (unsigned long long)total_btree_bytes);
5263         printf("total fs tree bytes: %llu\n",
5264                (unsigned long long)total_fs_tree_bytes);
5265         printf("total extent tree bytes: %llu\n",
5266                (unsigned long long)total_extent_tree_bytes);
5267         printf("btree space waste bytes: %llu\n",
5268                (unsigned long long)btree_space_waste);
5269         printf("file data blocks allocated: %llu\n referenced %llu\n",
5270                 (unsigned long long)data_bytes_allocated,
5271                 (unsigned long long)data_bytes_referenced);
5272         printf("%s\n", BTRFS_BUILD_VERSION);
5273         return ret;
5274 }