Btrfs-progs: deal with invalid key orderings and bad orphan items V2
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41
42 static u64 bytes_used = 0;
43 static u64 total_csum_bytes = 0;
44 static u64 total_btree_bytes = 0;
45 static u64 total_fs_tree_bytes = 0;
46 static u64 total_extent_tree_bytes = 0;
47 static u64 btree_space_waste = 0;
48 static u64 data_bytes_allocated = 0;
49 static u64 data_bytes_referenced = 0;
50 static int found_old_backref = 0;
51 static LIST_HEAD(duplicate_extents);
52 static LIST_HEAD(delete_items);
53 static int repair = 0;
54
55 struct extent_backref {
56         struct list_head list;
57         unsigned int is_data:1;
58         unsigned int found_extent_tree:1;
59         unsigned int full_backref:1;
60         unsigned int found_ref:1;
61         unsigned int broken:1;
62 };
63
64 struct data_backref {
65         struct extent_backref node;
66         union {
67                 u64 parent;
68                 u64 root;
69         };
70         u64 owner;
71         u64 offset;
72         u64 disk_bytenr;
73         u64 bytes;
74         u64 ram_bytes;
75         u32 num_refs;
76         u32 found_ref;
77 };
78
79 struct tree_backref {
80         struct extent_backref node;
81         union {
82                 u64 parent;
83                 u64 root;
84         };
85 };
86
87 struct extent_record {
88         struct list_head backrefs;
89         struct list_head dups;
90         struct list_head list;
91         struct cache_extent cache;
92         struct btrfs_disk_key parent_key;
93         unsigned int found_rec;
94         u64 start;
95         u64 max_size;
96         u64 nr;
97         u64 refs;
98         u64 extent_item_refs;
99         u64 generation;
100         u64 info_objectid;
101         u64 num_duplicates;
102         u8 info_level;
103         unsigned int content_checked:1;
104         unsigned int owner_ref_checked:1;
105         unsigned int is_root:1;
106         unsigned int metadata:1;
107 };
108
109 struct inode_backref {
110         struct list_head list;
111         unsigned int found_dir_item:1;
112         unsigned int found_dir_index:1;
113         unsigned int found_inode_ref:1;
114         unsigned int filetype:8;
115         int errors;
116         unsigned int ref_type;
117         u64 dir;
118         u64 index;
119         u16 namelen;
120         char name[0];
121 };
122
123 struct dropping_root_item_record {
124         struct list_head list;
125         struct btrfs_root_item ri;
126         struct btrfs_key found_key;
127 };
128
129 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
130 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
131 #define REF_ERR_NO_INODE_REF            (1 << 2)
132 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
133 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
134 #define REF_ERR_DUP_INODE_REF           (1 << 5)
135 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
136 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
137 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
138 #define REF_ERR_NO_ROOT_REF             (1 << 9)
139 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
140 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
141 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
142
143 struct inode_record {
144         struct list_head backrefs;
145         unsigned int checked:1;
146         unsigned int merging:1;
147         unsigned int found_inode_item:1;
148         unsigned int found_dir_item:1;
149         unsigned int found_file_extent:1;
150         unsigned int found_csum_item:1;
151         unsigned int some_csum_missing:1;
152         unsigned int nodatasum:1;
153         int errors;
154
155         u64 ino;
156         u32 nlink;
157         u32 imode;
158         u64 isize;
159         u64 nbytes;
160
161         u32 found_link;
162         u64 found_size;
163         u64 extent_start;
164         u64 extent_end;
165         u64 first_extent_gap;
166
167         u32 refs;
168 };
169
170 #define I_ERR_NO_INODE_ITEM             (1 << 0)
171 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
172 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
173 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
174 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
175 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
176 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
177 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
178 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
179 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
180 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
181 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
182 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
183 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
184
185 struct root_backref {
186         struct list_head list;
187         unsigned int found_dir_item:1;
188         unsigned int found_dir_index:1;
189         unsigned int found_back_ref:1;
190         unsigned int found_forward_ref:1;
191         unsigned int reachable:1;
192         int errors;
193         u64 ref_root;
194         u64 dir;
195         u64 index;
196         u16 namelen;
197         char name[0];
198 };
199
200 struct root_record {
201         struct list_head backrefs;
202         struct cache_extent cache;
203         unsigned int found_root_item:1;
204         u64 objectid;
205         u32 found_ref;
206 };
207
208 struct ptr_node {
209         struct cache_extent cache;
210         void *data;
211 };
212
213 struct shared_node {
214         struct cache_extent cache;
215         struct cache_tree root_cache;
216         struct cache_tree inode_cache;
217         struct inode_record *current;
218         u32 refs;
219 };
220
221 struct block_info {
222         u64 start;
223         u32 size;
224 };
225
226 struct walk_control {
227         struct cache_tree shared;
228         struct shared_node *nodes[BTRFS_MAX_LEVEL];
229         int active_node;
230         int root_level;
231 };
232
233 struct bad_item {
234         struct btrfs_key key;
235         u64 root_id;
236         struct list_head list;
237 };
238
239 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
240
241 static u8 imode_to_type(u32 imode)
242 {
243 #define S_SHIFT 12
244         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
245                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
246                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
247                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
248                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
249                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
250                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
251                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
252         };
253
254         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
255 #undef S_SHIFT
256 }
257
258 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
259 {
260         struct device_record *rec1;
261         struct device_record *rec2;
262
263         rec1 = rb_entry(node1, struct device_record, node);
264         rec2 = rb_entry(node2, struct device_record, node);
265         if (rec1->devid > rec2->devid)
266                 return -1;
267         else if (rec1->devid < rec2->devid)
268                 return 1;
269         else
270                 return 0;
271 }
272
273 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
274 {
275         struct inode_record *rec;
276         struct inode_backref *backref;
277         struct inode_backref *orig;
278         size_t size;
279
280         rec = malloc(sizeof(*rec));
281         memcpy(rec, orig_rec, sizeof(*rec));
282         rec->refs = 1;
283         INIT_LIST_HEAD(&rec->backrefs);
284
285         list_for_each_entry(orig, &orig_rec->backrefs, list) {
286                 size = sizeof(*orig) + orig->namelen + 1;
287                 backref = malloc(size);
288                 memcpy(backref, orig, size);
289                 list_add_tail(&backref->list, &rec->backrefs);
290         }
291         return rec;
292 }
293
294 static void print_inode_error(int errors)
295 {
296         if (errors & I_ERR_NO_INODE_ITEM)
297                 fprintf(stderr, ", no inode item");
298         if (errors & I_ERR_NO_ORPHAN_ITEM)
299                 fprintf(stderr, ", no orphan item");
300         if (errors & I_ERR_DUP_INODE_ITEM)
301                 fprintf(stderr, ", dup inode item");
302         if (errors & I_ERR_DUP_DIR_INDEX)
303                 fprintf(stderr, ", dup dir index");
304         if (errors & I_ERR_ODD_DIR_ITEM)
305                 fprintf(stderr, ", odd dir item");
306         if (errors & I_ERR_ODD_FILE_EXTENT)
307                 fprintf(stderr, ", odd file extent");
308         if (errors & I_ERR_BAD_FILE_EXTENT)
309                 fprintf(stderr, ", bad file extent");
310         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
311                 fprintf(stderr, ", file extent overlap");
312         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
313                 fprintf(stderr, ", file extent discount");
314         if (errors & I_ERR_DIR_ISIZE_WRONG)
315                 fprintf(stderr, ", dir isize wrong");
316         if (errors & I_ERR_FILE_NBYTES_WRONG)
317                 fprintf(stderr, ", nbytes wrong");
318         if (errors & I_ERR_ODD_CSUM_ITEM)
319                 fprintf(stderr, ", odd csum item");
320         if (errors & I_ERR_SOME_CSUM_MISSING)
321                 fprintf(stderr, ", some csum missing");
322         if (errors & I_ERR_LINK_COUNT_WRONG)
323                 fprintf(stderr, ", link count wrong");
324         fprintf(stderr, "\n");
325 }
326
327 static void print_ref_error(int errors)
328 {
329         if (errors & REF_ERR_NO_DIR_ITEM)
330                 fprintf(stderr, ", no dir item");
331         if (errors & REF_ERR_NO_DIR_INDEX)
332                 fprintf(stderr, ", no dir index");
333         if (errors & REF_ERR_NO_INODE_REF)
334                 fprintf(stderr, ", no inode ref");
335         if (errors & REF_ERR_DUP_DIR_ITEM)
336                 fprintf(stderr, ", dup dir item");
337         if (errors & REF_ERR_DUP_DIR_INDEX)
338                 fprintf(stderr, ", dup dir index");
339         if (errors & REF_ERR_DUP_INODE_REF)
340                 fprintf(stderr, ", dup inode ref");
341         if (errors & REF_ERR_INDEX_UNMATCH)
342                 fprintf(stderr, ", index unmatch");
343         if (errors & REF_ERR_FILETYPE_UNMATCH)
344                 fprintf(stderr, ", filetype unmatch");
345         if (errors & REF_ERR_NAME_TOO_LONG)
346                 fprintf(stderr, ", name too long");
347         if (errors & REF_ERR_NO_ROOT_REF)
348                 fprintf(stderr, ", no root ref");
349         if (errors & REF_ERR_NO_ROOT_BACKREF)
350                 fprintf(stderr, ", no root backref");
351         if (errors & REF_ERR_DUP_ROOT_REF)
352                 fprintf(stderr, ", dup root ref");
353         if (errors & REF_ERR_DUP_ROOT_BACKREF)
354                 fprintf(stderr, ", dup root backref");
355         fprintf(stderr, "\n");
356 }
357
358 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
359                                           u64 ino, int mod)
360 {
361         struct ptr_node *node;
362         struct cache_extent *cache;
363         struct inode_record *rec = NULL;
364         int ret;
365
366         cache = lookup_cache_extent(inode_cache, ino, 1);
367         if (cache) {
368                 node = container_of(cache, struct ptr_node, cache);
369                 rec = node->data;
370                 if (mod && rec->refs > 1) {
371                         node->data = clone_inode_rec(rec);
372                         rec->refs--;
373                         rec = node->data;
374                 }
375         } else if (mod) {
376                 rec = calloc(1, sizeof(*rec));
377                 rec->ino = ino;
378                 rec->extent_start = (u64)-1;
379                 rec->first_extent_gap = (u64)-1;
380                 rec->refs = 1;
381                 INIT_LIST_HEAD(&rec->backrefs);
382
383                 node = malloc(sizeof(*node));
384                 node->cache.start = ino;
385                 node->cache.size = 1;
386                 node->data = rec;
387
388                 if (ino == BTRFS_FREE_INO_OBJECTID)
389                         rec->found_link = 1;
390
391                 ret = insert_cache_extent(inode_cache, &node->cache);
392                 BUG_ON(ret);
393         }
394         return rec;
395 }
396
397 static void free_inode_rec(struct inode_record *rec)
398 {
399         struct inode_backref *backref;
400
401         if (--rec->refs > 0)
402                 return;
403
404         while (!list_empty(&rec->backrefs)) {
405                 backref = list_entry(rec->backrefs.next,
406                                      struct inode_backref, list);
407                 list_del(&backref->list);
408                 free(backref);
409         }
410         free(rec);
411 }
412
413 static int can_free_inode_rec(struct inode_record *rec)
414 {
415         if (!rec->errors && rec->checked && rec->found_inode_item &&
416             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
417                 return 1;
418         return 0;
419 }
420
421 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
422                                  struct inode_record *rec)
423 {
424         struct cache_extent *cache;
425         struct inode_backref *tmp, *backref;
426         struct ptr_node *node;
427         unsigned char filetype;
428
429         if (!rec->found_inode_item)
430                 return;
431
432         filetype = imode_to_type(rec->imode);
433         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
434                 if (backref->found_dir_item && backref->found_dir_index) {
435                         if (backref->filetype != filetype)
436                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
437                         if (!backref->errors && backref->found_inode_ref) {
438                                 list_del(&backref->list);
439                                 free(backref);
440                         }
441                 }
442         }
443
444         if (!rec->checked || rec->merging)
445                 return;
446
447         if (S_ISDIR(rec->imode)) {
448                 if (rec->found_size != rec->isize)
449                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
450                 if (rec->found_file_extent)
451                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
452         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
453                 if (rec->found_dir_item)
454                         rec->errors |= I_ERR_ODD_DIR_ITEM;
455                 if (rec->found_size != rec->nbytes)
456                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
457                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
458                         rec->first_extent_gap = 0;
459                 if (rec->nlink > 0 && (rec->extent_end < rec->isize ||
460                     rec->first_extent_gap < rec->isize))
461                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
462         }
463
464         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
465                 if (rec->found_csum_item && rec->nodatasum)
466                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
467                 if (rec->some_csum_missing && !rec->nodatasum)
468                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
469         }
470
471         BUG_ON(rec->refs != 1);
472         if (can_free_inode_rec(rec)) {
473                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
474                 node = container_of(cache, struct ptr_node, cache);
475                 BUG_ON(node->data != rec);
476                 remove_cache_extent(inode_cache, &node->cache);
477                 free(node);
478                 free_inode_rec(rec);
479         }
480 }
481
482 static int check_orphan_item(struct btrfs_root *root, u64 ino)
483 {
484         struct btrfs_path path;
485         struct btrfs_key key;
486         int ret;
487
488         key.objectid = BTRFS_ORPHAN_OBJECTID;
489         key.type = BTRFS_ORPHAN_ITEM_KEY;
490         key.offset = ino;
491
492         btrfs_init_path(&path);
493         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
494         btrfs_release_path(&path);
495         if (ret > 0)
496                 ret = -ENOENT;
497         return ret;
498 }
499
500 static int process_inode_item(struct extent_buffer *eb,
501                               int slot, struct btrfs_key *key,
502                               struct shared_node *active_node)
503 {
504         struct inode_record *rec;
505         struct btrfs_inode_item *item;
506
507         rec = active_node->current;
508         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
509         if (rec->found_inode_item) {
510                 rec->errors |= I_ERR_DUP_INODE_ITEM;
511                 return 1;
512         }
513         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
514         rec->nlink = btrfs_inode_nlink(eb, item);
515         rec->isize = btrfs_inode_size(eb, item);
516         rec->nbytes = btrfs_inode_nbytes(eb, item);
517         rec->imode = btrfs_inode_mode(eb, item);
518         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
519                 rec->nodatasum = 1;
520         rec->found_inode_item = 1;
521         if (rec->nlink == 0)
522                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
523         maybe_free_inode_rec(&active_node->inode_cache, rec);
524         return 0;
525 }
526
527 static struct inode_backref *get_inode_backref(struct inode_record *rec,
528                                                 const char *name,
529                                                 int namelen, u64 dir)
530 {
531         struct inode_backref *backref;
532
533         list_for_each_entry(backref, &rec->backrefs, list) {
534                 if (backref->dir != dir || backref->namelen != namelen)
535                         continue;
536                 if (memcmp(name, backref->name, namelen))
537                         continue;
538                 return backref;
539         }
540
541         backref = malloc(sizeof(*backref) + namelen + 1);
542         memset(backref, 0, sizeof(*backref));
543         backref->dir = dir;
544         backref->namelen = namelen;
545         memcpy(backref->name, name, namelen);
546         backref->name[namelen] = '\0';
547         list_add_tail(&backref->list, &rec->backrefs);
548         return backref;
549 }
550
551 static int add_inode_backref(struct cache_tree *inode_cache,
552                              u64 ino, u64 dir, u64 index,
553                              const char *name, int namelen,
554                              int filetype, int itemtype, int errors)
555 {
556         struct inode_record *rec;
557         struct inode_backref *backref;
558
559         rec = get_inode_rec(inode_cache, ino, 1);
560         backref = get_inode_backref(rec, name, namelen, dir);
561         if (errors)
562                 backref->errors |= errors;
563         if (itemtype == BTRFS_DIR_INDEX_KEY) {
564                 if (backref->found_dir_index)
565                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
566                 if (backref->found_inode_ref && backref->index != index)
567                         backref->errors |= REF_ERR_INDEX_UNMATCH;
568                 if (backref->found_dir_item && backref->filetype != filetype)
569                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
570
571                 backref->index = index;
572                 backref->filetype = filetype;
573                 backref->found_dir_index = 1;
574         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
575                 rec->found_link++;
576                 if (backref->found_dir_item)
577                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
578                 if (backref->found_dir_index && backref->filetype != filetype)
579                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
580
581                 backref->filetype = filetype;
582                 backref->found_dir_item = 1;
583         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
584                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
585                 if (backref->found_inode_ref)
586                         backref->errors |= REF_ERR_DUP_INODE_REF;
587                 if (backref->found_dir_index && backref->index != index)
588                         backref->errors |= REF_ERR_INDEX_UNMATCH;
589
590                 backref->ref_type = itemtype;
591                 backref->index = index;
592                 backref->found_inode_ref = 1;
593         } else {
594                 BUG_ON(1);
595         }
596
597         maybe_free_inode_rec(inode_cache, rec);
598         return 0;
599 }
600
601 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
602                             struct cache_tree *dst_cache)
603 {
604         struct inode_backref *backref;
605         u32 dir_count = 0;
606
607         dst->merging = 1;
608         list_for_each_entry(backref, &src->backrefs, list) {
609                 if (backref->found_dir_index) {
610                         add_inode_backref(dst_cache, dst->ino, backref->dir,
611                                         backref->index, backref->name,
612                                         backref->namelen, backref->filetype,
613                                         BTRFS_DIR_INDEX_KEY, backref->errors);
614                 }
615                 if (backref->found_dir_item) {
616                         dir_count++;
617                         add_inode_backref(dst_cache, dst->ino,
618                                         backref->dir, 0, backref->name,
619                                         backref->namelen, backref->filetype,
620                                         BTRFS_DIR_ITEM_KEY, backref->errors);
621                 }
622                 if (backref->found_inode_ref) {
623                         add_inode_backref(dst_cache, dst->ino,
624                                         backref->dir, backref->index,
625                                         backref->name, backref->namelen, 0,
626                                         backref->ref_type, backref->errors);
627                 }
628         }
629
630         if (src->found_dir_item)
631                 dst->found_dir_item = 1;
632         if (src->found_file_extent)
633                 dst->found_file_extent = 1;
634         if (src->found_csum_item)
635                 dst->found_csum_item = 1;
636         if (src->some_csum_missing)
637                 dst->some_csum_missing = 1;
638         if (dst->first_extent_gap > src->first_extent_gap)
639                 dst->first_extent_gap = src->first_extent_gap;
640
641         BUG_ON(src->found_link < dir_count);
642         dst->found_link += src->found_link - dir_count;
643         dst->found_size += src->found_size;
644         if (src->extent_start != (u64)-1) {
645                 if (dst->extent_start == (u64)-1) {
646                         dst->extent_start = src->extent_start;
647                         dst->extent_end = src->extent_end;
648                 } else {
649                         if (dst->extent_end > src->extent_start)
650                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
651                         else if (dst->extent_end < src->extent_start &&
652                                  dst->extent_end < dst->first_extent_gap)
653                                 dst->first_extent_gap = dst->extent_end;
654                         if (dst->extent_end < src->extent_end)
655                                 dst->extent_end = src->extent_end;
656                 }
657         }
658
659         dst->errors |= src->errors;
660         if (src->found_inode_item) {
661                 if (!dst->found_inode_item) {
662                         dst->nlink = src->nlink;
663                         dst->isize = src->isize;
664                         dst->nbytes = src->nbytes;
665                         dst->imode = src->imode;
666                         dst->nodatasum = src->nodatasum;
667                         dst->found_inode_item = 1;
668                 } else {
669                         dst->errors |= I_ERR_DUP_INODE_ITEM;
670                 }
671         }
672         dst->merging = 0;
673
674         return 0;
675 }
676
677 static int splice_shared_node(struct shared_node *src_node,
678                               struct shared_node *dst_node)
679 {
680         struct cache_extent *cache;
681         struct ptr_node *node, *ins;
682         struct cache_tree *src, *dst;
683         struct inode_record *rec, *conflict;
684         u64 current_ino = 0;
685         int splice = 0;
686         int ret;
687
688         if (--src_node->refs == 0)
689                 splice = 1;
690         if (src_node->current)
691                 current_ino = src_node->current->ino;
692
693         src = &src_node->root_cache;
694         dst = &dst_node->root_cache;
695 again:
696         cache = search_cache_extent(src, 0);
697         while (cache) {
698                 node = container_of(cache, struct ptr_node, cache);
699                 rec = node->data;
700                 cache = next_cache_extent(cache);
701
702                 if (splice) {
703                         remove_cache_extent(src, &node->cache);
704                         ins = node;
705                 } else {
706                         ins = malloc(sizeof(*ins));
707                         ins->cache.start = node->cache.start;
708                         ins->cache.size = node->cache.size;
709                         ins->data = rec;
710                         rec->refs++;
711                 }
712                 ret = insert_cache_extent(dst, &ins->cache);
713                 if (ret == -EEXIST) {
714                         conflict = get_inode_rec(dst, rec->ino, 1);
715                         merge_inode_recs(rec, conflict, dst);
716                         if (rec->checked) {
717                                 conflict->checked = 1;
718                                 if (dst_node->current == conflict)
719                                         dst_node->current = NULL;
720                         }
721                         maybe_free_inode_rec(dst, conflict);
722                         free_inode_rec(rec);
723                         free(ins);
724                 } else {
725                         BUG_ON(ret);
726                 }
727         }
728
729         if (src == &src_node->root_cache) {
730                 src = &src_node->inode_cache;
731                 dst = &dst_node->inode_cache;
732                 goto again;
733         }
734
735         if (current_ino > 0 && (!dst_node->current ||
736             current_ino > dst_node->current->ino)) {
737                 if (dst_node->current) {
738                         dst_node->current->checked = 1;
739                         maybe_free_inode_rec(dst, dst_node->current);
740                 }
741                 dst_node->current = get_inode_rec(dst, current_ino, 1);
742         }
743         return 0;
744 }
745
746 static void free_inode_ptr(struct cache_extent *cache)
747 {
748         struct ptr_node *node;
749         struct inode_record *rec;
750
751         node = container_of(cache, struct ptr_node, cache);
752         rec = node->data;
753         free_inode_rec(rec);
754         free(node);
755 }
756
757 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
758
759 static struct shared_node *find_shared_node(struct cache_tree *shared,
760                                             u64 bytenr)
761 {
762         struct cache_extent *cache;
763         struct shared_node *node;
764
765         cache = lookup_cache_extent(shared, bytenr, 1);
766         if (cache) {
767                 node = container_of(cache, struct shared_node, cache);
768                 return node;
769         }
770         return NULL;
771 }
772
773 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
774 {
775         int ret;
776         struct shared_node *node;
777
778         node = calloc(1, sizeof(*node));
779         node->cache.start = bytenr;
780         node->cache.size = 1;
781         cache_tree_init(&node->root_cache);
782         cache_tree_init(&node->inode_cache);
783         node->refs = refs;
784
785         ret = insert_cache_extent(shared, &node->cache);
786         BUG_ON(ret);
787         return 0;
788 }
789
790 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
791                              struct walk_control *wc, int level)
792 {
793         struct shared_node *node;
794         struct shared_node *dest;
795
796         if (level == wc->active_node)
797                 return 0;
798
799         BUG_ON(wc->active_node <= level);
800         node = find_shared_node(&wc->shared, bytenr);
801         if (!node) {
802                 add_shared_node(&wc->shared, bytenr, refs);
803                 node = find_shared_node(&wc->shared, bytenr);
804                 wc->nodes[level] = node;
805                 wc->active_node = level;
806                 return 0;
807         }
808
809         if (wc->root_level == wc->active_node &&
810             btrfs_root_refs(&root->root_item) == 0) {
811                 if (--node->refs == 0) {
812                         free_inode_recs_tree(&node->root_cache);
813                         free_inode_recs_tree(&node->inode_cache);
814                         remove_cache_extent(&wc->shared, &node->cache);
815                         free(node);
816                 }
817                 return 1;
818         }
819
820         dest = wc->nodes[wc->active_node];
821         splice_shared_node(node, dest);
822         if (node->refs == 0) {
823                 remove_cache_extent(&wc->shared, &node->cache);
824                 free(node);
825         }
826         return 1;
827 }
828
829 static int leave_shared_node(struct btrfs_root *root,
830                              struct walk_control *wc, int level)
831 {
832         struct shared_node *node;
833         struct shared_node *dest;
834         int i;
835
836         if (level == wc->root_level)
837                 return 0;
838
839         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
840                 if (wc->nodes[i])
841                         break;
842         }
843         BUG_ON(i >= BTRFS_MAX_LEVEL);
844
845         node = wc->nodes[wc->active_node];
846         wc->nodes[wc->active_node] = NULL;
847         wc->active_node = i;
848
849         dest = wc->nodes[wc->active_node];
850         if (wc->active_node < wc->root_level ||
851             btrfs_root_refs(&root->root_item) > 0) {
852                 BUG_ON(node->refs <= 1);
853                 splice_shared_node(node, dest);
854         } else {
855                 BUG_ON(node->refs < 2);
856                 node->refs--;
857         }
858         return 0;
859 }
860
861 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
862                          u64 child_root_id)
863 {
864         struct btrfs_path path;
865         struct btrfs_key key;
866         struct extent_buffer *leaf;
867         int has_parent = 0;
868         int ret;
869
870         btrfs_init_path(&path);
871
872         key.objectid = parent_root_id;
873         key.type = BTRFS_ROOT_REF_KEY;
874         key.offset = child_root_id;
875         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
876                                 0, 0);
877         BUG_ON(ret < 0);
878         btrfs_release_path(&path);
879         if (!ret)
880                 return 1;
881
882         key.objectid = child_root_id;
883         key.type = BTRFS_ROOT_BACKREF_KEY;
884         key.offset = 0;
885         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
886                                 0, 0);
887         BUG_ON(ret <= 0);
888
889         while (1) {
890                 leaf = path.nodes[0];
891                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
892                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
893                         BUG_ON(ret < 0);
894
895                         if (ret > 0)
896                                 break;
897                         leaf = path.nodes[0];
898                 }
899
900                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
901                 if (key.objectid != child_root_id ||
902                     key.type != BTRFS_ROOT_BACKREF_KEY)
903                         break;
904
905                 has_parent = 1;
906
907                 if (key.offset == parent_root_id) {
908                         btrfs_release_path(&path);
909                         return 1;
910                 }
911
912                 path.slots[0]++;
913         }
914
915         btrfs_release_path(&path);
916         return has_parent? 0 : -1;
917 }
918
919 static int process_dir_item(struct btrfs_root *root,
920                             struct extent_buffer *eb,
921                             int slot, struct btrfs_key *key,
922                             struct shared_node *active_node)
923 {
924         u32 total;
925         u32 cur = 0;
926         u32 len;
927         u32 name_len;
928         u32 data_len;
929         int error;
930         int nritems = 0;
931         int filetype;
932         struct btrfs_dir_item *di;
933         struct inode_record *rec;
934         struct cache_tree *root_cache;
935         struct cache_tree *inode_cache;
936         struct btrfs_key location;
937         char namebuf[BTRFS_NAME_LEN];
938
939         root_cache = &active_node->root_cache;
940         inode_cache = &active_node->inode_cache;
941         rec = active_node->current;
942         rec->found_dir_item = 1;
943
944         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
945         total = btrfs_item_size_nr(eb, slot);
946         while (cur < total) {
947                 nritems++;
948                 btrfs_dir_item_key_to_cpu(eb, di, &location);
949                 name_len = btrfs_dir_name_len(eb, di);
950                 data_len = btrfs_dir_data_len(eb, di);
951                 filetype = btrfs_dir_type(eb, di);
952
953                 rec->found_size += name_len;
954                 if (name_len <= BTRFS_NAME_LEN) {
955                         len = name_len;
956                         error = 0;
957                 } else {
958                         len = BTRFS_NAME_LEN;
959                         error = REF_ERR_NAME_TOO_LONG;
960                 }
961                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
962
963                 if (location.type == BTRFS_INODE_ITEM_KEY) {
964                         add_inode_backref(inode_cache, location.objectid,
965                                           key->objectid, key->offset, namebuf,
966                                           len, filetype, key->type, error);
967                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
968                         add_inode_backref(root_cache, location.objectid,
969                                           key->objectid, key->offset,
970                                           namebuf, len, filetype,
971                                           key->type, error);
972                 } else {
973                         fprintf(stderr, "warning line %d\n", __LINE__);
974                 }
975
976                 len = sizeof(*di) + name_len + data_len;
977                 di = (struct btrfs_dir_item *)((char *)di + len);
978                 cur += len;
979         }
980         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
981                 rec->errors |= I_ERR_DUP_DIR_INDEX;
982
983         return 0;
984 }
985
986 static int process_inode_ref(struct extent_buffer *eb,
987                              int slot, struct btrfs_key *key,
988                              struct shared_node *active_node)
989 {
990         u32 total;
991         u32 cur = 0;
992         u32 len;
993         u32 name_len;
994         u64 index;
995         int error;
996         struct cache_tree *inode_cache;
997         struct btrfs_inode_ref *ref;
998         char namebuf[BTRFS_NAME_LEN];
999
1000         inode_cache = &active_node->inode_cache;
1001
1002         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1003         total = btrfs_item_size_nr(eb, slot);
1004         while (cur < total) {
1005                 name_len = btrfs_inode_ref_name_len(eb, ref);
1006                 index = btrfs_inode_ref_index(eb, ref);
1007                 if (name_len <= BTRFS_NAME_LEN) {
1008                         len = name_len;
1009                         error = 0;
1010                 } else {
1011                         len = BTRFS_NAME_LEN;
1012                         error = REF_ERR_NAME_TOO_LONG;
1013                 }
1014                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1015                 add_inode_backref(inode_cache, key->objectid, key->offset,
1016                                   index, namebuf, len, 0, key->type, error);
1017
1018                 len = sizeof(*ref) + name_len;
1019                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1020                 cur += len;
1021         }
1022         return 0;
1023 }
1024
1025 static int process_inode_extref(struct extent_buffer *eb,
1026                                 int slot, struct btrfs_key *key,
1027                                 struct shared_node *active_node)
1028 {
1029         u32 total;
1030         u32 cur = 0;
1031         u32 len;
1032         u32 name_len;
1033         u64 index;
1034         u64 parent;
1035         int error;
1036         struct cache_tree *inode_cache;
1037         struct btrfs_inode_extref *extref;
1038         char namebuf[BTRFS_NAME_LEN];
1039
1040         inode_cache = &active_node->inode_cache;
1041
1042         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1043         total = btrfs_item_size_nr(eb, slot);
1044         while (cur < total) {
1045                 name_len = btrfs_inode_extref_name_len(eb, extref);
1046                 index = btrfs_inode_extref_index(eb, extref);
1047                 parent = btrfs_inode_extref_parent(eb, extref);
1048                 if (name_len <= BTRFS_NAME_LEN) {
1049                         len = name_len;
1050                         error = 0;
1051                 } else {
1052                         len = BTRFS_NAME_LEN;
1053                         error = REF_ERR_NAME_TOO_LONG;
1054                 }
1055                 read_extent_buffer(eb, namebuf,
1056                                    (unsigned long)(extref + 1), len);
1057                 add_inode_backref(inode_cache, key->objectid, parent,
1058                                   index, namebuf, len, 0, key->type, error);
1059
1060                 len = sizeof(*extref) + name_len;
1061                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1062                 cur += len;
1063         }
1064         return 0;
1065
1066 }
1067
1068 static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len)
1069 {
1070         struct btrfs_key key;
1071         struct btrfs_path path;
1072         struct extent_buffer *leaf;
1073         int ret ;
1074         size_t size;
1075         u64 found = 0;
1076         u64 csum_end;
1077         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1078
1079         btrfs_init_path(&path);
1080
1081         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1082         key.offset = start;
1083         key.type = BTRFS_EXTENT_CSUM_KEY;
1084
1085         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1086                                 &key, &path, 0, 0);
1087         BUG_ON(ret < 0);
1088         if (ret > 0 && path.slots[0] > 0) {
1089                 leaf = path.nodes[0];
1090                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1091                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1092                     key.type == BTRFS_EXTENT_CSUM_KEY)
1093                         path.slots[0]--;
1094         }
1095
1096         while (len > 0) {
1097                 leaf = path.nodes[0];
1098                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1099                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1100                         BUG_ON(ret < 0);
1101                         if (ret > 0)
1102                                 break;
1103                         leaf = path.nodes[0];
1104                 }
1105
1106                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1107                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1108                     key.type != BTRFS_EXTENT_CSUM_KEY)
1109                         break;
1110
1111                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1112                 if (key.offset >= start + len)
1113                         break;
1114
1115                 if (key.offset > start)
1116                         start = key.offset;
1117
1118                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1119                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1120                 if (csum_end > start) {
1121                         size = min(csum_end - start, len);
1122                         len -= size;
1123                         start += size;
1124                         found += size;
1125                 }
1126
1127                 path.slots[0]++;
1128         }
1129         btrfs_release_path(&path);
1130         return found;
1131 }
1132
1133 static int process_file_extent(struct btrfs_root *root,
1134                                 struct extent_buffer *eb,
1135                                 int slot, struct btrfs_key *key,
1136                                 struct shared_node *active_node)
1137 {
1138         struct inode_record *rec;
1139         struct btrfs_file_extent_item *fi;
1140         u64 num_bytes = 0;
1141         u64 disk_bytenr = 0;
1142         u64 extent_offset = 0;
1143         u64 mask = root->sectorsize - 1;
1144         int extent_type;
1145
1146         rec = active_node->current;
1147         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1148         rec->found_file_extent = 1;
1149
1150         if (rec->extent_start == (u64)-1) {
1151                 rec->extent_start = key->offset;
1152                 rec->extent_end = key->offset;
1153         }
1154
1155         if (rec->extent_end > key->offset)
1156                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1157         else if (rec->extent_end < key->offset &&
1158                  rec->extent_end < rec->first_extent_gap)
1159                 rec->first_extent_gap = rec->extent_end;
1160
1161         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1162         extent_type = btrfs_file_extent_type(eb, fi);
1163
1164         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1165                 num_bytes = btrfs_file_extent_inline_len(eb, fi);
1166                 if (num_bytes == 0)
1167                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1168                 rec->found_size += num_bytes;
1169                 num_bytes = (num_bytes + mask) & ~mask;
1170         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1171                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1172                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1173                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1174                 extent_offset = btrfs_file_extent_offset(eb, fi);
1175                 if (num_bytes == 0 || (num_bytes & mask))
1176                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1177                 if (num_bytes + extent_offset >
1178                     btrfs_file_extent_ram_bytes(eb, fi))
1179                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1180                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1181                     (btrfs_file_extent_compression(eb, fi) ||
1182                      btrfs_file_extent_encryption(eb, fi) ||
1183                      btrfs_file_extent_other_encoding(eb, fi)))
1184                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1185                 if (disk_bytenr > 0)
1186                         rec->found_size += num_bytes;
1187         } else {
1188                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1189         }
1190         rec->extent_end = key->offset + num_bytes;
1191
1192         if (disk_bytenr > 0) {
1193                 u64 found;
1194                 if (btrfs_file_extent_compression(eb, fi))
1195                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1196                 else
1197                         disk_bytenr += extent_offset;
1198
1199                 found = count_csum_range(root, disk_bytenr, num_bytes);
1200                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1201                         if (found > 0)
1202                                 rec->found_csum_item = 1;
1203                         if (found < num_bytes)
1204                                 rec->some_csum_missing = 1;
1205                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1206                         if (found > 0)
1207                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1208                 }
1209         }
1210         return 0;
1211 }
1212
1213 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1214                             struct walk_control *wc)
1215 {
1216         struct btrfs_key key;
1217         u32 nritems;
1218         int i;
1219         int ret = 0;
1220         struct cache_tree *inode_cache;
1221         struct shared_node *active_node;
1222
1223         if (wc->root_level == wc->active_node &&
1224             btrfs_root_refs(&root->root_item) == 0)
1225                 return 0;
1226
1227         active_node = wc->nodes[wc->active_node];
1228         inode_cache = &active_node->inode_cache;
1229         nritems = btrfs_header_nritems(eb);
1230         for (i = 0; i < nritems; i++) {
1231                 btrfs_item_key_to_cpu(eb, &key, i);
1232
1233                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1234                         continue;
1235                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1236                         continue;
1237
1238                 if (active_node->current == NULL ||
1239                     active_node->current->ino < key.objectid) {
1240                         if (active_node->current) {
1241                                 active_node->current->checked = 1;
1242                                 maybe_free_inode_rec(inode_cache,
1243                                                      active_node->current);
1244                         }
1245                         active_node->current = get_inode_rec(inode_cache,
1246                                                              key.objectid, 1);
1247                 }
1248                 switch (key.type) {
1249                 case BTRFS_DIR_ITEM_KEY:
1250                 case BTRFS_DIR_INDEX_KEY:
1251                         ret = process_dir_item(root, eb, i, &key, active_node);
1252                         break;
1253                 case BTRFS_INODE_REF_KEY:
1254                         ret = process_inode_ref(eb, i, &key, active_node);
1255                         break;
1256                 case BTRFS_INODE_EXTREF_KEY:
1257                         ret = process_inode_extref(eb, i, &key, active_node);
1258                         break;
1259                 case BTRFS_INODE_ITEM_KEY:
1260                         ret = process_inode_item(eb, i, &key, active_node);
1261                         break;
1262                 case BTRFS_EXTENT_DATA_KEY:
1263                         ret = process_file_extent(root, eb, i, &key,
1264                                                   active_node);
1265                         break;
1266                 default:
1267                         break;
1268                 };
1269         }
1270         return ret;
1271 }
1272
1273 static void reada_walk_down(struct btrfs_root *root,
1274                             struct extent_buffer *node, int slot)
1275 {
1276         u64 bytenr;
1277         u64 ptr_gen;
1278         u32 nritems;
1279         u32 blocksize;
1280         int i;
1281         int ret;
1282         int level;
1283
1284         level = btrfs_header_level(node);
1285         if (level != 1)
1286                 return;
1287
1288         nritems = btrfs_header_nritems(node);
1289         blocksize = btrfs_level_size(root, level - 1);
1290         for (i = slot; i < nritems; i++) {
1291                 bytenr = btrfs_node_blockptr(node, i);
1292                 ptr_gen = btrfs_node_ptr_generation(node, i);
1293                 ret = readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1294                 if (ret)
1295                         break;
1296         }
1297 }
1298
1299 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1300                           struct walk_control *wc, int *level)
1301 {
1302         u64 bytenr;
1303         u64 ptr_gen;
1304         struct extent_buffer *next;
1305         struct extent_buffer *cur;
1306         u32 blocksize;
1307         int ret, err = 0;
1308         u64 refs;
1309
1310         WARN_ON(*level < 0);
1311         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1312         ret = btrfs_lookup_extent_info(NULL, root,
1313                                        path->nodes[*level]->start,
1314                                        *level, 1, &refs, NULL);
1315         if (ret < 0) {
1316                 err = ret;
1317                 goto out;
1318         }
1319
1320         if (refs > 1) {
1321                 ret = enter_shared_node(root, path->nodes[*level]->start,
1322                                         refs, wc, *level);
1323                 if (ret > 0) {
1324                         err = ret;
1325                         goto out;
1326                 }
1327         }
1328
1329         while (*level >= 0) {
1330                 WARN_ON(*level < 0);
1331                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1332                 cur = path->nodes[*level];
1333
1334                 if (btrfs_header_level(cur) != *level)
1335                         WARN_ON(1);
1336
1337                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1338                         break;
1339                 if (*level == 0) {
1340                         ret = process_one_leaf(root, cur, wc);
1341                         break;
1342                 }
1343                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1344                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1345                 blocksize = btrfs_level_size(root, *level - 1);
1346                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1347                                                1, &refs, NULL);
1348                 if (ret < 0)
1349                         refs = 0;
1350
1351                 if (refs > 1) {
1352                         ret = enter_shared_node(root, bytenr, refs,
1353                                                 wc, *level - 1);
1354                         if (ret > 0) {
1355                                 path->slots[*level]++;
1356                                 continue;
1357                         }
1358                 }
1359
1360                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1361                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1362                         free_extent_buffer(next);
1363                         reada_walk_down(root, cur, path->slots[*level]);
1364                         next = read_tree_block(root, bytenr, blocksize,
1365                                                ptr_gen);
1366                         if (!next) {
1367                                 err = -EIO;
1368                                 goto out;
1369                         }
1370                 }
1371
1372                 *level = *level - 1;
1373                 free_extent_buffer(path->nodes[*level]);
1374                 path->nodes[*level] = next;
1375                 path->slots[*level] = 0;
1376         }
1377 out:
1378         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1379         return err;
1380 }
1381
1382 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1383                         struct walk_control *wc, int *level)
1384 {
1385         int i;
1386         struct extent_buffer *leaf;
1387
1388         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1389                 leaf = path->nodes[i];
1390                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1391                         path->slots[i]++;
1392                         *level = i;
1393                         return 0;
1394                 } else {
1395                         free_extent_buffer(path->nodes[*level]);
1396                         path->nodes[*level] = NULL;
1397                         BUG_ON(*level > wc->active_node);
1398                         if (*level == wc->active_node)
1399                                 leave_shared_node(root, wc, *level);
1400                         *level = i + 1;
1401                 }
1402         }
1403         return 1;
1404 }
1405
1406 static int check_root_dir(struct inode_record *rec)
1407 {
1408         struct inode_backref *backref;
1409         int ret = -1;
1410
1411         if (!rec->found_inode_item || rec->errors)
1412                 goto out;
1413         if (rec->nlink != 1 || rec->found_link != 0)
1414                 goto out;
1415         if (list_empty(&rec->backrefs))
1416                 goto out;
1417         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1418         if (!backref->found_inode_ref)
1419                 goto out;
1420         if (backref->index != 0 || backref->namelen != 2 ||
1421             memcmp(backref->name, "..", 2))
1422                 goto out;
1423         if (backref->found_dir_index || backref->found_dir_item)
1424                 goto out;
1425         ret = 0;
1426 out:
1427         return ret;
1428 }
1429
1430 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1431                               struct btrfs_root *root, struct btrfs_path *path,
1432                               struct inode_record *rec)
1433 {
1434         struct btrfs_inode_item *ei;
1435         struct btrfs_key key;
1436         int ret;
1437
1438         key.objectid = rec->ino;
1439         key.type = BTRFS_INODE_ITEM_KEY;
1440         key.offset = (u64)-1;
1441
1442         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1443         if (ret < 0)
1444                 goto out;
1445         if (ret) {
1446                 if (!path->slots[0]) {
1447                         ret = -ENOENT;
1448                         goto out;
1449                 }
1450                 path->slots[0]--;
1451                 ret = 0;
1452         }
1453         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1454         if (key.objectid != rec->ino) {
1455                 ret = -ENOENT;
1456                 goto out;
1457         }
1458
1459         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1460                             struct btrfs_inode_item);
1461         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1462         btrfs_mark_buffer_dirty(path->nodes[0]);
1463         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1464         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1465                root->root_key.objectid);
1466 out:
1467         btrfs_release_path(path);
1468         return ret;
1469 }
1470
1471 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1472                                     struct btrfs_root *root,
1473                                     struct btrfs_path *path,
1474                                     struct inode_record *rec)
1475 {
1476         struct btrfs_key key;
1477         int ret;
1478
1479         key.objectid = BTRFS_ORPHAN_OBJECTID;
1480         key.type = BTRFS_ORPHAN_ITEM_KEY;
1481         key.offset = rec->ino;
1482
1483         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1484         btrfs_release_path(path);
1485         if (!ret)
1486                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1487         return ret;
1488 }
1489
1490 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1491 {
1492         struct btrfs_trans_handle *trans;
1493         struct btrfs_path *path;
1494         int ret = 0;
1495
1496         /* So far we just fix dir isize wrong */
1497         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1498                 return 1;
1499
1500         path = btrfs_alloc_path();
1501         if (!path)
1502                 return -ENOMEM;
1503
1504         trans = btrfs_start_transaction(root, 1);
1505         if (IS_ERR(trans)) {
1506                 btrfs_free_path(path);
1507                 return PTR_ERR(trans);
1508         }
1509
1510         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1511                 ret = repair_inode_isize(trans, root, path, rec);
1512         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1513                 ret = repair_inode_orphan_item(trans, root, path, rec);
1514         btrfs_commit_transaction(trans, root);
1515         btrfs_free_path(path);
1516         return ret;
1517 }
1518
1519 static int check_inode_recs(struct btrfs_root *root,
1520                             struct cache_tree *inode_cache)
1521 {
1522         struct cache_extent *cache;
1523         struct ptr_node *node;
1524         struct inode_record *rec;
1525         struct inode_backref *backref;
1526         int ret;
1527         u64 error = 0;
1528         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1529
1530         if (btrfs_root_refs(&root->root_item) == 0) {
1531                 if (!cache_tree_empty(inode_cache))
1532                         fprintf(stderr, "warning line %d\n", __LINE__);
1533                 return 0;
1534         }
1535
1536         rec = get_inode_rec(inode_cache, root_dirid, 0);
1537         if (rec) {
1538                 ret = check_root_dir(rec);
1539                 if (ret) {
1540                         fprintf(stderr, "root %llu root dir %llu error\n",
1541                                 (unsigned long long)root->root_key.objectid,
1542                                 (unsigned long long)root_dirid);
1543                         error++;
1544                 }
1545         } else {
1546                 fprintf(stderr, "root %llu root dir %llu not found\n",
1547                         (unsigned long long)root->root_key.objectid,
1548                         (unsigned long long)root_dirid);
1549         }
1550
1551         while (1) {
1552                 cache = search_cache_extent(inode_cache, 0);
1553                 if (!cache)
1554                         break;
1555                 node = container_of(cache, struct ptr_node, cache);
1556                 rec = node->data;
1557                 remove_cache_extent(inode_cache, &node->cache);
1558                 free(node);
1559                 if (rec->ino == root_dirid ||
1560                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1561                         free_inode_rec(rec);
1562                         continue;
1563                 }
1564
1565                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1566                         ret = check_orphan_item(root, rec->ino);
1567                         if (ret == 0)
1568                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1569                         if (can_free_inode_rec(rec)) {
1570                                 free_inode_rec(rec);
1571                                 continue;
1572                         }
1573                 }
1574
1575                 if (repair) {
1576                         ret = try_repair_inode(root, rec);
1577                         if (ret == 0 && can_free_inode_rec(rec)) {
1578                                 free_inode_rec(rec);
1579                                 continue;
1580                         }
1581                         ret = 0;
1582                 }
1583
1584                 error++;
1585                 if (!rec->found_inode_item)
1586                         rec->errors |= I_ERR_NO_INODE_ITEM;
1587                 if (rec->found_link != rec->nlink)
1588                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1589                 fprintf(stderr, "root %llu inode %llu errors %x",
1590                         (unsigned long long) root->root_key.objectid,
1591                         (unsigned long long) rec->ino, rec->errors);
1592                 print_inode_error(rec->errors);
1593                 list_for_each_entry(backref, &rec->backrefs, list) {
1594                         if (!backref->found_dir_item)
1595                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1596                         if (!backref->found_dir_index)
1597                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1598                         if (!backref->found_inode_ref)
1599                                 backref->errors |= REF_ERR_NO_INODE_REF;
1600                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1601                                 " namelen %u name %s filetype %d error %x",
1602                                 (unsigned long long)backref->dir,
1603                                 (unsigned long long)backref->index,
1604                                 backref->namelen, backref->name,
1605                                 backref->filetype, backref->errors);
1606                         print_ref_error(backref->errors);
1607                 }
1608                 free_inode_rec(rec);
1609         }
1610         return (error > 0) ? -1 : 0;
1611 }
1612
1613 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1614                                         u64 objectid)
1615 {
1616         struct cache_extent *cache;
1617         struct root_record *rec = NULL;
1618         int ret;
1619
1620         cache = lookup_cache_extent(root_cache, objectid, 1);
1621         if (cache) {
1622                 rec = container_of(cache, struct root_record, cache);
1623         } else {
1624                 rec = calloc(1, sizeof(*rec));
1625                 rec->objectid = objectid;
1626                 INIT_LIST_HEAD(&rec->backrefs);
1627                 rec->cache.start = objectid;
1628                 rec->cache.size = 1;
1629
1630                 ret = insert_cache_extent(root_cache, &rec->cache);
1631                 BUG_ON(ret);
1632         }
1633         return rec;
1634 }
1635
1636 static struct root_backref *get_root_backref(struct root_record *rec,
1637                                              u64 ref_root, u64 dir, u64 index,
1638                                              const char *name, int namelen)
1639 {
1640         struct root_backref *backref;
1641
1642         list_for_each_entry(backref, &rec->backrefs, list) {
1643                 if (backref->ref_root != ref_root || backref->dir != dir ||
1644                     backref->namelen != namelen)
1645                         continue;
1646                 if (memcmp(name, backref->name, namelen))
1647                         continue;
1648                 return backref;
1649         }
1650
1651         backref = malloc(sizeof(*backref) + namelen + 1);
1652         memset(backref, 0, sizeof(*backref));
1653         backref->ref_root = ref_root;
1654         backref->dir = dir;
1655         backref->index = index;
1656         backref->namelen = namelen;
1657         memcpy(backref->name, name, namelen);
1658         backref->name[namelen] = '\0';
1659         list_add_tail(&backref->list, &rec->backrefs);
1660         return backref;
1661 }
1662
1663 static void free_root_record(struct cache_extent *cache)
1664 {
1665         struct root_record *rec;
1666         struct root_backref *backref;
1667
1668         rec = container_of(cache, struct root_record, cache);
1669         while (!list_empty(&rec->backrefs)) {
1670                 backref = list_entry(rec->backrefs.next,
1671                                      struct root_backref, list);
1672                 list_del(&backref->list);
1673                 free(backref);
1674         }
1675
1676         kfree(rec);
1677 }
1678
1679 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1680
1681 static int add_root_backref(struct cache_tree *root_cache,
1682                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1683                             const char *name, int namelen,
1684                             int item_type, int errors)
1685 {
1686         struct root_record *rec;
1687         struct root_backref *backref;
1688
1689         rec = get_root_rec(root_cache, root_id);
1690         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1691
1692         backref->errors |= errors;
1693
1694         if (item_type != BTRFS_DIR_ITEM_KEY) {
1695                 if (backref->found_dir_index || backref->found_back_ref ||
1696                     backref->found_forward_ref) {
1697                         if (backref->index != index)
1698                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1699                 } else {
1700                         backref->index = index;
1701                 }
1702         }
1703
1704         if (item_type == BTRFS_DIR_ITEM_KEY) {
1705                 if (backref->found_forward_ref)
1706                         rec->found_ref++;
1707                 backref->found_dir_item = 1;
1708         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1709                 backref->found_dir_index = 1;
1710         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1711                 if (backref->found_forward_ref)
1712                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1713                 else if (backref->found_dir_item)
1714                         rec->found_ref++;
1715                 backref->found_forward_ref = 1;
1716         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1717                 if (backref->found_back_ref)
1718                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1719                 backref->found_back_ref = 1;
1720         } else {
1721                 BUG_ON(1);
1722         }
1723
1724         if (backref->found_forward_ref && backref->found_dir_item)
1725                 backref->reachable = 1;
1726         return 0;
1727 }
1728
1729 static int merge_root_recs(struct btrfs_root *root,
1730                            struct cache_tree *src_cache,
1731                            struct cache_tree *dst_cache)
1732 {
1733         struct cache_extent *cache;
1734         struct ptr_node *node;
1735         struct inode_record *rec;
1736         struct inode_backref *backref;
1737
1738         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1739                 free_inode_recs_tree(src_cache);
1740                 return 0;
1741         }
1742
1743         while (1) {
1744                 cache = search_cache_extent(src_cache, 0);
1745                 if (!cache)
1746                         break;
1747                 node = container_of(cache, struct ptr_node, cache);
1748                 rec = node->data;
1749                 remove_cache_extent(src_cache, &node->cache);
1750                 free(node);
1751
1752                 if (!is_child_root(root, root->objectid, rec->ino))
1753                         goto skip;
1754
1755                 list_for_each_entry(backref, &rec->backrefs, list) {
1756                         BUG_ON(backref->found_inode_ref);
1757                         if (backref->found_dir_item)
1758                                 add_root_backref(dst_cache, rec->ino,
1759                                         root->root_key.objectid, backref->dir,
1760                                         backref->index, backref->name,
1761                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1762                                         backref->errors);
1763                         if (backref->found_dir_index)
1764                                 add_root_backref(dst_cache, rec->ino,
1765                                         root->root_key.objectid, backref->dir,
1766                                         backref->index, backref->name,
1767                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1768                                         backref->errors);
1769                 }
1770 skip:
1771                 free_inode_rec(rec);
1772         }
1773         return 0;
1774 }
1775
1776 static int check_root_refs(struct btrfs_root *root,
1777                            struct cache_tree *root_cache)
1778 {
1779         struct root_record *rec;
1780         struct root_record *ref_root;
1781         struct root_backref *backref;
1782         struct cache_extent *cache;
1783         int loop = 1;
1784         int ret;
1785         int error;
1786         int errors = 0;
1787
1788         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1789         rec->found_ref = 1;
1790
1791         /* fixme: this can not detect circular references */
1792         while (loop) {
1793                 loop = 0;
1794                 cache = search_cache_extent(root_cache, 0);
1795                 while (1) {
1796                         if (!cache)
1797                                 break;
1798                         rec = container_of(cache, struct root_record, cache);
1799                         cache = next_cache_extent(cache);
1800
1801                         if (rec->found_ref == 0)
1802                                 continue;
1803
1804                         list_for_each_entry(backref, &rec->backrefs, list) {
1805                                 if (!backref->reachable)
1806                                         continue;
1807
1808                                 ref_root = get_root_rec(root_cache,
1809                                                         backref->ref_root);
1810                                 if (ref_root->found_ref > 0)
1811                                         continue;
1812
1813                                 backref->reachable = 0;
1814                                 rec->found_ref--;
1815                                 if (rec->found_ref == 0)
1816                                         loop = 1;
1817                         }
1818                 }
1819         }
1820
1821         cache = search_cache_extent(root_cache, 0);
1822         while (1) {
1823                 if (!cache)
1824                         break;
1825                 rec = container_of(cache, struct root_record, cache);
1826                 cache = next_cache_extent(cache);
1827
1828                 if (rec->found_ref == 0 &&
1829                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1830                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1831                         ret = check_orphan_item(root->fs_info->tree_root,
1832                                                 rec->objectid);
1833                         if (ret == 0)
1834                                 continue;
1835
1836                         /*
1837                          * If we don't have a root item then we likely just have
1838                          * a dir item in a snapshot for this root but no actual
1839                          * ref key or anything so it's meaningless.
1840                          */
1841                         if (!rec->found_root_item)
1842                                 continue;
1843                         errors++;
1844                         fprintf(stderr, "fs tree %llu not referenced\n",
1845                                 (unsigned long long)rec->objectid);
1846                 }
1847
1848                 error = 0;
1849                 if (rec->found_ref > 0 && !rec->found_root_item)
1850                         error = 1;
1851                 list_for_each_entry(backref, &rec->backrefs, list) {
1852                         if (!backref->found_dir_item)
1853                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1854                         if (!backref->found_dir_index)
1855                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1856                         if (!backref->found_back_ref)
1857                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1858                         if (!backref->found_forward_ref)
1859                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1860                         if (backref->reachable && backref->errors)
1861                                 error = 1;
1862                 }
1863                 if (!error)
1864                         continue;
1865
1866                 errors++;
1867                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1868                         (unsigned long long)rec->objectid, rec->found_ref,
1869                          rec->found_root_item ? "" : "not found");
1870
1871                 list_for_each_entry(backref, &rec->backrefs, list) {
1872                         if (!backref->reachable)
1873                                 continue;
1874                         if (!backref->errors && rec->found_root_item)
1875                                 continue;
1876                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1877                                 " index %llu namelen %u name %s error %x\n",
1878                                 (unsigned long long)backref->ref_root,
1879                                 (unsigned long long)backref->dir,
1880                                 (unsigned long long)backref->index,
1881                                 backref->namelen, backref->name,
1882                                 backref->errors);
1883                 }
1884         }
1885         return errors > 0 ? 1 : 0;
1886 }
1887
1888 static int process_root_ref(struct extent_buffer *eb, int slot,
1889                             struct btrfs_key *key,
1890                             struct cache_tree *root_cache)
1891 {
1892         u64 dirid;
1893         u64 index;
1894         u32 len;
1895         u32 name_len;
1896         struct btrfs_root_ref *ref;
1897         char namebuf[BTRFS_NAME_LEN];
1898         int error;
1899
1900         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
1901
1902         dirid = btrfs_root_ref_dirid(eb, ref);
1903         index = btrfs_root_ref_sequence(eb, ref);
1904         name_len = btrfs_root_ref_name_len(eb, ref);
1905
1906         if (name_len <= BTRFS_NAME_LEN) {
1907                 len = name_len;
1908                 error = 0;
1909         } else {
1910                 len = BTRFS_NAME_LEN;
1911                 error = REF_ERR_NAME_TOO_LONG;
1912         }
1913         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1914
1915         if (key->type == BTRFS_ROOT_REF_KEY) {
1916                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
1917                                  index, namebuf, len, key->type, error);
1918         } else {
1919                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
1920                                  index, namebuf, len, key->type, error);
1921         }
1922         return 0;
1923 }
1924
1925 static int check_fs_root(struct btrfs_root *root,
1926                          struct cache_tree *root_cache,
1927                          struct walk_control *wc)
1928 {
1929         int ret = 0;
1930         int wret;
1931         int level;
1932         struct btrfs_path path;
1933         struct shared_node root_node;
1934         struct root_record *rec;
1935         struct btrfs_root_item *root_item = &root->root_item;
1936
1937         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1938                 rec = get_root_rec(root_cache, root->root_key.objectid);
1939                 if (btrfs_root_refs(root_item) > 0)
1940                         rec->found_root_item = 1;
1941         }
1942
1943         btrfs_init_path(&path);
1944         memset(&root_node, 0, sizeof(root_node));
1945         cache_tree_init(&root_node.root_cache);
1946         cache_tree_init(&root_node.inode_cache);
1947
1948         level = btrfs_header_level(root->node);
1949         memset(wc->nodes, 0, sizeof(wc->nodes));
1950         wc->nodes[level] = &root_node;
1951         wc->active_node = level;
1952         wc->root_level = level;
1953
1954         if (btrfs_root_refs(root_item) > 0 ||
1955             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1956                 path.nodes[level] = root->node;
1957                 extent_buffer_get(root->node);
1958                 path.slots[level] = 0;
1959         } else {
1960                 struct btrfs_key key;
1961                 struct btrfs_disk_key found_key;
1962
1963                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1964                 level = root_item->drop_level;
1965                 path.lowest_level = level;
1966                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1967                 BUG_ON(wret < 0);
1968                 btrfs_node_key(path.nodes[level], &found_key,
1969                                 path.slots[level]);
1970                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1971                                         sizeof(found_key)));
1972         }
1973
1974         while (1) {
1975                 wret = walk_down_tree(root, &path, wc, &level);
1976                 if (wret < 0)
1977                         ret = wret;
1978                 if (wret != 0)
1979                         break;
1980
1981                 wret = walk_up_tree(root, &path, wc, &level);
1982                 if (wret < 0)
1983                         ret = wret;
1984                 if (wret != 0)
1985                         break;
1986         }
1987         btrfs_release_path(&path);
1988
1989         merge_root_recs(root, &root_node.root_cache, root_cache);
1990
1991         if (root_node.current) {
1992                 root_node.current->checked = 1;
1993                 maybe_free_inode_rec(&root_node.inode_cache,
1994                                 root_node.current);
1995         }
1996
1997         ret = check_inode_recs(root, &root_node.inode_cache);
1998         return ret;
1999 }
2000
2001 static int fs_root_objectid(u64 objectid)
2002 {
2003         if (objectid == BTRFS_FS_TREE_OBJECTID ||
2004             objectid == BTRFS_TREE_RELOC_OBJECTID ||
2005             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
2006             (objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2007              objectid <= BTRFS_LAST_FREE_OBJECTID))
2008                 return 1;
2009         return 0;
2010 }
2011
2012 static int check_fs_roots(struct btrfs_root *root,
2013                           struct cache_tree *root_cache)
2014 {
2015         struct btrfs_path path;
2016         struct btrfs_key key;
2017         struct walk_control wc;
2018         struct extent_buffer *leaf;
2019         struct btrfs_root *tmp_root;
2020         struct btrfs_root *tree_root = root->fs_info->tree_root;
2021         int ret;
2022         int err = 0;
2023
2024         /*
2025          * Just in case we made any changes to the extent tree that weren't
2026          * reflected into the free space cache yet.
2027          */
2028         if (repair)
2029                 reset_cached_block_groups(root->fs_info);
2030         memset(&wc, 0, sizeof(wc));
2031         cache_tree_init(&wc.shared);
2032         btrfs_init_path(&path);
2033
2034         key.offset = 0;
2035         key.objectid = 0;
2036         key.type = BTRFS_ROOT_ITEM_KEY;
2037         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2038         BUG_ON(ret < 0);
2039         while (1) {
2040                 leaf = path.nodes[0];
2041                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2042                         ret = btrfs_next_leaf(tree_root, &path);
2043                         if (ret != 0)
2044                                 break;
2045                         leaf = path.nodes[0];
2046                 }
2047                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2048                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2049                     fs_root_objectid(key.objectid)) {
2050                         key.offset = (u64)-1;
2051                         tmp_root = btrfs_read_fs_root(root->fs_info, &key);
2052                         if (IS_ERR(tmp_root)) {
2053                                 err = 1;
2054                                 goto next;
2055                         }
2056                         ret = check_fs_root(tmp_root, root_cache, &wc);
2057                         if (ret)
2058                                 err = 1;
2059                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2060                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2061                         process_root_ref(leaf, path.slots[0], &key,
2062                                          root_cache);
2063                 }
2064 next:
2065                 path.slots[0]++;
2066         }
2067         btrfs_release_path(&path);
2068
2069         if (!cache_tree_empty(&wc.shared))
2070                 fprintf(stderr, "warning line %d\n", __LINE__);
2071
2072         return err;
2073 }
2074
2075 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2076 {
2077         struct list_head *cur = rec->backrefs.next;
2078         struct extent_backref *back;
2079         struct tree_backref *tback;
2080         struct data_backref *dback;
2081         u64 found = 0;
2082         int err = 0;
2083
2084         while(cur != &rec->backrefs) {
2085                 back = list_entry(cur, struct extent_backref, list);
2086                 cur = cur->next;
2087                 if (!back->found_extent_tree) {
2088                         err = 1;
2089                         if (!print_errs)
2090                                 goto out;
2091                         if (back->is_data) {
2092                                 dback = (struct data_backref *)back;
2093                                 fprintf(stderr, "Backref %llu %s %llu"
2094                                         " owner %llu offset %llu num_refs %lu"
2095                                         " not found in extent tree\n",
2096                                         (unsigned long long)rec->start,
2097                                         back->full_backref ?
2098                                         "parent" : "root",
2099                                         back->full_backref ?
2100                                         (unsigned long long)dback->parent:
2101                                         (unsigned long long)dback->root,
2102                                         (unsigned long long)dback->owner,
2103                                         (unsigned long long)dback->offset,
2104                                         (unsigned long)dback->num_refs);
2105                         } else {
2106                                 tback = (struct tree_backref *)back;
2107                                 fprintf(stderr, "Backref %llu parent %llu"
2108                                         " root %llu not found in extent tree\n",
2109                                         (unsigned long long)rec->start,
2110                                         (unsigned long long)tback->parent,
2111                                         (unsigned long long)tback->root);
2112                         }
2113                 }
2114                 if (!back->is_data && !back->found_ref) {
2115                         err = 1;
2116                         if (!print_errs)
2117                                 goto out;
2118                         tback = (struct tree_backref *)back;
2119                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2120                                 (unsigned long long)rec->start,
2121                                 back->full_backref ? "parent" : "root",
2122                                 back->full_backref ?
2123                                 (unsigned long long)tback->parent :
2124                                 (unsigned long long)tback->root, back);
2125                 }
2126                 if (back->is_data) {
2127                         dback = (struct data_backref *)back;
2128                         if (dback->found_ref != dback->num_refs) {
2129                                 err = 1;
2130                                 if (!print_errs)
2131                                         goto out;
2132                                 fprintf(stderr, "Incorrect local backref count"
2133                                         " on %llu %s %llu owner %llu"
2134                                         " offset %llu found %u wanted %u back %p\n",
2135                                         (unsigned long long)rec->start,
2136                                         back->full_backref ?
2137                                         "parent" : "root",
2138                                         back->full_backref ?
2139                                         (unsigned long long)dback->parent:
2140                                         (unsigned long long)dback->root,
2141                                         (unsigned long long)dback->owner,
2142                                         (unsigned long long)dback->offset,
2143                                         dback->found_ref, dback->num_refs, back);
2144                         }
2145                         if (dback->disk_bytenr != rec->start) {
2146                                 err = 1;
2147                                 if (!print_errs)
2148                                         goto out;
2149                                 fprintf(stderr, "Backref disk bytenr does not"
2150                                         " match extent record, bytenr=%llu, "
2151                                         "ref bytenr=%llu\n",
2152                                         (unsigned long long)rec->start,
2153                                         (unsigned long long)dback->disk_bytenr);
2154                         }
2155
2156                         if (dback->bytes != rec->nr) {
2157                                 err = 1;
2158                                 if (!print_errs)
2159                                         goto out;
2160                                 fprintf(stderr, "Backref bytes do not match "
2161                                         "extent backref, bytenr=%llu, ref "
2162                                         "bytes=%llu, backref bytes=%llu\n",
2163                                         (unsigned long long)rec->start,
2164                                         (unsigned long long)rec->nr,
2165                                         (unsigned long long)dback->bytes);
2166                         }
2167                 }
2168                 if (!back->is_data) {
2169                         found += 1;
2170                 } else {
2171                         dback = (struct data_backref *)back;
2172                         found += dback->found_ref;
2173                 }
2174         }
2175         if (found != rec->refs) {
2176                 err = 1;
2177                 if (!print_errs)
2178                         goto out;
2179                 fprintf(stderr, "Incorrect global backref count "
2180                         "on %llu found %llu wanted %llu\n",
2181                         (unsigned long long)rec->start,
2182                         (unsigned long long)found,
2183                         (unsigned long long)rec->refs);
2184         }
2185 out:
2186         return err;
2187 }
2188
2189 static int free_all_extent_backrefs(struct extent_record *rec)
2190 {
2191         struct extent_backref *back;
2192         struct list_head *cur;
2193         while (!list_empty(&rec->backrefs)) {
2194                 cur = rec->backrefs.next;
2195                 back = list_entry(cur, struct extent_backref, list);
2196                 list_del(cur);
2197                 free(back);
2198         }
2199         return 0;
2200 }
2201
2202 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2203                                      struct cache_tree *extent_cache)
2204 {
2205         struct cache_extent *cache;
2206         struct extent_record *rec;
2207
2208         while (1) {
2209                 cache = first_cache_extent(extent_cache);
2210                 if (!cache)
2211                         break;
2212                 rec = container_of(cache, struct extent_record, cache);
2213                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2214                 remove_cache_extent(extent_cache, cache);
2215                 free_all_extent_backrefs(rec);
2216                 free(rec);
2217         }
2218 }
2219
2220 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2221                                  struct extent_record *rec)
2222 {
2223         if (rec->content_checked && rec->owner_ref_checked &&
2224             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2225             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2226                 remove_cache_extent(extent_cache, &rec->cache);
2227                 free_all_extent_backrefs(rec);
2228                 list_del_init(&rec->list);
2229                 free(rec);
2230         }
2231         return 0;
2232 }
2233
2234 static int check_owner_ref(struct btrfs_root *root,
2235                             struct extent_record *rec,
2236                             struct extent_buffer *buf)
2237 {
2238         struct extent_backref *node;
2239         struct tree_backref *back;
2240         struct btrfs_root *ref_root;
2241         struct btrfs_key key;
2242         struct btrfs_path path;
2243         struct extent_buffer *parent;
2244         int level;
2245         int found = 0;
2246         int ret;
2247
2248         list_for_each_entry(node, &rec->backrefs, list) {
2249                 if (node->is_data)
2250                         continue;
2251                 if (!node->found_ref)
2252                         continue;
2253                 if (node->full_backref)
2254                         continue;
2255                 back = (struct tree_backref *)node;
2256                 if (btrfs_header_owner(buf) == back->root)
2257                         return 0;
2258         }
2259         BUG_ON(rec->is_root);
2260
2261         /* try to find the block by search corresponding fs tree */
2262         key.objectid = btrfs_header_owner(buf);
2263         key.type = BTRFS_ROOT_ITEM_KEY;
2264         key.offset = (u64)-1;
2265
2266         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2267         if (IS_ERR(ref_root))
2268                 return 1;
2269
2270         level = btrfs_header_level(buf);
2271         if (level == 0)
2272                 btrfs_item_key_to_cpu(buf, &key, 0);
2273         else
2274                 btrfs_node_key_to_cpu(buf, &key, 0);
2275
2276         btrfs_init_path(&path);
2277         path.lowest_level = level + 1;
2278         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2279         if (ret < 0)
2280                 return 0;
2281
2282         parent = path.nodes[level + 1];
2283         if (parent && buf->start == btrfs_node_blockptr(parent,
2284                                                         path.slots[level + 1]))
2285                 found = 1;
2286
2287         btrfs_release_path(&path);
2288         return found ? 0 : 1;
2289 }
2290
2291 static int is_extent_tree_record(struct extent_record *rec)
2292 {
2293         struct list_head *cur = rec->backrefs.next;
2294         struct extent_backref *node;
2295         struct tree_backref *back;
2296         int is_extent = 0;
2297
2298         while(cur != &rec->backrefs) {
2299                 node = list_entry(cur, struct extent_backref, list);
2300                 cur = cur->next;
2301                 if (node->is_data)
2302                         return 0;
2303                 back = (struct tree_backref *)node;
2304                 if (node->full_backref)
2305                         return 0;
2306                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2307                         is_extent = 1;
2308         }
2309         return is_extent;
2310 }
2311
2312
2313 static int record_bad_block_io(struct btrfs_fs_info *info,
2314                                struct cache_tree *extent_cache,
2315                                u64 start, u64 len)
2316 {
2317         struct extent_record *rec;
2318         struct cache_extent *cache;
2319         struct btrfs_key key;
2320
2321         cache = lookup_cache_extent(extent_cache, start, len);
2322         if (!cache)
2323                 return 0;
2324
2325         rec = container_of(cache, struct extent_record, cache);
2326         if (!is_extent_tree_record(rec))
2327                 return 0;
2328
2329         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2330         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2331 }
2332
2333 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2334                        struct extent_buffer *buf, int slot)
2335 {
2336         if (btrfs_header_level(buf)) {
2337                 struct btrfs_key_ptr ptr1, ptr2;
2338
2339                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2340                                    sizeof(struct btrfs_key_ptr));
2341                 read_extent_buffer(buf, &ptr2,
2342                                    btrfs_node_key_ptr_offset(slot + 1),
2343                                    sizeof(struct btrfs_key_ptr));
2344                 write_extent_buffer(buf, &ptr1,
2345                                     btrfs_node_key_ptr_offset(slot + 1),
2346                                     sizeof(struct btrfs_key_ptr));
2347                 write_extent_buffer(buf, &ptr2,
2348                                     btrfs_node_key_ptr_offset(slot),
2349                                     sizeof(struct btrfs_key_ptr));
2350                 if (slot == 0) {
2351                         struct btrfs_disk_key key;
2352                         btrfs_node_key(buf, &key, 0);
2353                         btrfs_fixup_low_keys(root, path, &key,
2354                                              btrfs_header_level(buf) + 1);
2355                 }
2356         } else {
2357                 struct btrfs_item *item1, *item2;
2358                 struct btrfs_key k1, k2;
2359                 char *item1_data, *item2_data;
2360                 u32 item1_offset, item2_offset, item1_size, item2_size;
2361
2362                 item1 = btrfs_item_nr(slot);
2363                 item2 = btrfs_item_nr(slot + 1);
2364                 btrfs_item_key_to_cpu(buf, &k1, slot);
2365                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2366                 item1_offset = btrfs_item_offset(buf, item1);
2367                 item2_offset = btrfs_item_offset(buf, item2);
2368                 item1_size = btrfs_item_size(buf, item1);
2369                 item2_size = btrfs_item_size(buf, item2);
2370
2371                 item1_data = malloc(item1_size);
2372                 if (!item1_data)
2373                         return -ENOMEM;
2374                 item2_data = malloc(item2_size);
2375                 if (!item2_data) {
2376                         free(item2_data);
2377                         return -ENOMEM;
2378                 }
2379
2380                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2381                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2382
2383                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2384                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2385                 free(item1_data);
2386                 free(item2_data);
2387
2388                 btrfs_set_item_offset(buf, item1, item2_offset);
2389                 btrfs_set_item_offset(buf, item2, item1_offset);
2390                 btrfs_set_item_size(buf, item1, item2_size);
2391                 btrfs_set_item_size(buf, item2, item1_size);
2392
2393                 path->slots[0] = slot;
2394                 btrfs_set_item_key_unsafe(root, path, &k2);
2395                 path->slots[0] = slot + 1;
2396                 btrfs_set_item_key_unsafe(root, path, &k1);
2397         }
2398         return 0;
2399 }
2400
2401 /*
2402  * Attempt to fix basic block failures.  Currently we only handle bad key
2403  * orders, we will cycle through the keys and swap them if necessary.
2404  */
2405 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2406                                 struct btrfs_root *root,
2407                                 struct extent_buffer *buf,
2408                                 struct btrfs_disk_key *parent_key,
2409                                 enum btrfs_tree_block_status status)
2410 {
2411         struct btrfs_path *path;
2412         struct btrfs_key k1, k2;
2413         int i;
2414         int ret;
2415
2416         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2417                 return -EIO;
2418
2419         k1.objectid = btrfs_header_owner(buf);
2420         k1.type = BTRFS_ROOT_ITEM_KEY;
2421         k1.offset = (u64)-1;
2422
2423         root = btrfs_read_fs_root(root->fs_info, &k1);
2424         if (IS_ERR(root))
2425                 return -EIO;
2426
2427         path = btrfs_alloc_path();
2428         if (!path)
2429                 return -EIO;
2430
2431         path->lowest_level = btrfs_header_level(buf);
2432         path->skip_check_block = 1;
2433         if (btrfs_header_level(buf))
2434                 btrfs_node_key_to_cpu(buf, &k1, 0);
2435         else
2436                 btrfs_item_key_to_cpu(buf, &k1, 0);
2437
2438         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2439         if (ret) {
2440                 btrfs_free_path(path);
2441                 return -EIO;
2442         }
2443
2444         buf = path->nodes[0];
2445         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2446                 if (btrfs_header_level(buf)) {
2447                         btrfs_node_key_to_cpu(buf, &k1, i);
2448                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2449                 } else {
2450                         btrfs_item_key_to_cpu(buf, &k1, i);
2451                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2452                 }
2453                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2454                         continue;
2455                 ret = swap_values(root, path, buf, i);
2456                 if (ret)
2457                         break;
2458                 btrfs_mark_buffer_dirty(buf);
2459                 i = 0;
2460         }
2461
2462         btrfs_free_path(path);
2463         return ret;
2464 }
2465
2466 static int check_block(struct btrfs_trans_handle *trans,
2467                        struct btrfs_root *root,
2468                        struct cache_tree *extent_cache,
2469                        struct extent_buffer *buf, u64 flags)
2470 {
2471         struct extent_record *rec;
2472         struct cache_extent *cache;
2473         struct btrfs_key key;
2474         enum btrfs_tree_block_status status;
2475         int ret = 1;
2476         int level;
2477
2478         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2479         if (!cache)
2480                 return 1;
2481         rec = container_of(cache, struct extent_record, cache);
2482         rec->generation = btrfs_header_generation(buf);
2483
2484         level = btrfs_header_level(buf);
2485         if (btrfs_header_nritems(buf) > 0) {
2486
2487                 if (level == 0)
2488                         btrfs_item_key_to_cpu(buf, &key, 0);
2489                 else
2490                         btrfs_node_key_to_cpu(buf, &key, 0);
2491
2492                 rec->info_objectid = key.objectid;
2493         }
2494         rec->info_level = level;
2495
2496         if (btrfs_is_leaf(buf))
2497                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2498         else
2499                 status = btrfs_check_node(root, &rec->parent_key, buf);
2500
2501         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2502                 if (repair)
2503                         status = try_to_fix_bad_block(trans, root, buf,
2504                                                       &rec->parent_key,
2505                                                       status);
2506                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2507                         ret = -EIO;
2508                         fprintf(stderr, "bad block %llu\n",
2509                                 (unsigned long long)buf->start);
2510                 } else {
2511                         /*
2512                          * Signal to callers we need to start the scan over
2513                          * again since we'll have cow'ed blocks.
2514                          */
2515                         ret = -EAGAIN;
2516                 }
2517         } else {
2518                 rec->content_checked = 1;
2519                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2520                         rec->owner_ref_checked = 1;
2521                 else {
2522                         ret = check_owner_ref(root, rec, buf);
2523                         if (!ret)
2524                                 rec->owner_ref_checked = 1;
2525                 }
2526         }
2527         if (!ret)
2528                 maybe_free_extent_rec(extent_cache, rec);
2529         return ret;
2530 }
2531
2532 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2533                                                 u64 parent, u64 root)
2534 {
2535         struct list_head *cur = rec->backrefs.next;
2536         struct extent_backref *node;
2537         struct tree_backref *back;
2538
2539         while(cur != &rec->backrefs) {
2540                 node = list_entry(cur, struct extent_backref, list);
2541                 cur = cur->next;
2542                 if (node->is_data)
2543                         continue;
2544                 back = (struct tree_backref *)node;
2545                 if (parent > 0) {
2546                         if (!node->full_backref)
2547                                 continue;
2548                         if (parent == back->parent)
2549                                 return back;
2550                 } else {
2551                         if (node->full_backref)
2552                                 continue;
2553                         if (back->root == root)
2554                                 return back;
2555                 }
2556         }
2557         return NULL;
2558 }
2559
2560 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2561                                                 u64 parent, u64 root)
2562 {
2563         struct tree_backref *ref = malloc(sizeof(*ref));
2564         memset(&ref->node, 0, sizeof(ref->node));
2565         if (parent > 0) {
2566                 ref->parent = parent;
2567                 ref->node.full_backref = 1;
2568         } else {
2569                 ref->root = root;
2570                 ref->node.full_backref = 0;
2571         }
2572         list_add_tail(&ref->node.list, &rec->backrefs);
2573
2574         return ref;
2575 }
2576
2577 static struct data_backref *find_data_backref(struct extent_record *rec,
2578                                                 u64 parent, u64 root,
2579                                                 u64 owner, u64 offset,
2580                                                 int found_ref,
2581                                                 u64 disk_bytenr, u64 bytes)
2582 {
2583         struct list_head *cur = rec->backrefs.next;
2584         struct extent_backref *node;
2585         struct data_backref *back;
2586
2587         while(cur != &rec->backrefs) {
2588                 node = list_entry(cur, struct extent_backref, list);
2589                 cur = cur->next;
2590                 if (!node->is_data)
2591                         continue;
2592                 back = (struct data_backref *)node;
2593                 if (parent > 0) {
2594                         if (!node->full_backref)
2595                                 continue;
2596                         if (parent == back->parent)
2597                                 return back;
2598                 } else {
2599                         if (node->full_backref)
2600                                 continue;
2601                         if (back->root == root && back->owner == owner &&
2602                             back->offset == offset) {
2603                                 if (found_ref && node->found_ref &&
2604                                     (back->bytes != bytes ||
2605                                     back->disk_bytenr != disk_bytenr))
2606                                         continue;
2607                                 return back;
2608                         }
2609                 }
2610         }
2611         return NULL;
2612 }
2613
2614 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2615                                                 u64 parent, u64 root,
2616                                                 u64 owner, u64 offset,
2617                                                 u64 max_size)
2618 {
2619         struct data_backref *ref = malloc(sizeof(*ref));
2620         memset(&ref->node, 0, sizeof(ref->node));
2621         ref->node.is_data = 1;
2622
2623         if (parent > 0) {
2624                 ref->parent = parent;
2625                 ref->owner = 0;
2626                 ref->offset = 0;
2627                 ref->node.full_backref = 1;
2628         } else {
2629                 ref->root = root;
2630                 ref->owner = owner;
2631                 ref->offset = offset;
2632                 ref->node.full_backref = 0;
2633         }
2634         ref->bytes = max_size;
2635         ref->found_ref = 0;
2636         ref->num_refs = 0;
2637         list_add_tail(&ref->node.list, &rec->backrefs);
2638         if (max_size > rec->max_size)
2639                 rec->max_size = max_size;
2640         return ref;
2641 }
2642
2643 static int add_extent_rec(struct cache_tree *extent_cache,
2644                           struct btrfs_key *parent_key,
2645                           u64 start, u64 nr, u64 extent_item_refs,
2646                           int is_root, int inc_ref, int set_checked,
2647                           int metadata, int extent_rec, u64 max_size)
2648 {
2649         struct extent_record *rec;
2650         struct cache_extent *cache;
2651         int ret = 0;
2652         int dup = 0;
2653
2654         cache = lookup_cache_extent(extent_cache, start, nr);
2655         if (cache) {
2656                 rec = container_of(cache, struct extent_record, cache);
2657                 if (inc_ref)
2658                         rec->refs++;
2659                 if (rec->nr == 1)
2660                         rec->nr = max(nr, max_size);
2661
2662                 /*
2663                  * We need to make sure to reset nr to whatever the extent
2664                  * record says was the real size, this way we can compare it to
2665                  * the backrefs.
2666                  */
2667                 if (extent_rec) {
2668                         if (start != rec->start || rec->found_rec) {
2669                                 struct extent_record *tmp;
2670
2671                                 dup = 1;
2672                                 if (list_empty(&rec->list))
2673                                         list_add_tail(&rec->list,
2674                                                       &duplicate_extents);
2675
2676                                 /*
2677                                  * We have to do this song and dance in case we
2678                                  * find an extent record that falls inside of
2679                                  * our current extent record but does not have
2680                                  * the same objectid.
2681                                  */
2682                                 tmp = malloc(sizeof(*tmp));
2683                                 if (!tmp)
2684                                         return -ENOMEM;
2685                                 tmp->start = start;
2686                                 tmp->max_size = max_size;
2687                                 tmp->nr = nr;
2688                                 tmp->found_rec = 1;
2689                                 tmp->metadata = metadata;
2690                                 tmp->extent_item_refs = extent_item_refs;
2691                                 INIT_LIST_HEAD(&tmp->list);
2692                                 list_add_tail(&tmp->list, &rec->dups);
2693                                 rec->num_duplicates++;
2694                         } else {
2695                                 rec->nr = nr;
2696                                 rec->found_rec = 1;
2697                         }
2698                 }
2699
2700                 if (extent_item_refs && !dup) {
2701                         if (rec->extent_item_refs) {
2702                                 fprintf(stderr, "block %llu rec "
2703                                         "extent_item_refs %llu, passed %llu\n",
2704                                         (unsigned long long)start,
2705                                         (unsigned long long)
2706                                                         rec->extent_item_refs,
2707                                         (unsigned long long)extent_item_refs);
2708                         }
2709                         rec->extent_item_refs = extent_item_refs;
2710                 }
2711                 if (is_root)
2712                         rec->is_root = 1;
2713                 if (set_checked) {
2714                         rec->content_checked = 1;
2715                         rec->owner_ref_checked = 1;
2716                 }
2717
2718                 if (parent_key)
2719                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2720
2721                 if (rec->max_size < max_size)
2722                         rec->max_size = max_size;
2723
2724                 maybe_free_extent_rec(extent_cache, rec);
2725                 return ret;
2726         }
2727         rec = malloc(sizeof(*rec));
2728         rec->start = start;
2729         rec->max_size = max_size;
2730         rec->nr = max(nr, max_size);
2731         rec->found_rec = extent_rec;
2732         rec->content_checked = 0;
2733         rec->owner_ref_checked = 0;
2734         rec->num_duplicates = 0;
2735         rec->metadata = metadata;
2736         INIT_LIST_HEAD(&rec->backrefs);
2737         INIT_LIST_HEAD(&rec->dups);
2738         INIT_LIST_HEAD(&rec->list);
2739
2740         if (is_root)
2741                 rec->is_root = 1;
2742         else
2743                 rec->is_root = 0;
2744
2745         if (inc_ref)
2746                 rec->refs = 1;
2747         else
2748                 rec->refs = 0;
2749
2750         if (extent_item_refs)
2751                 rec->extent_item_refs = extent_item_refs;
2752         else
2753                 rec->extent_item_refs = 0;
2754
2755         if (parent_key)
2756                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2757         else
2758                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2759
2760         rec->cache.start = start;
2761         rec->cache.size = nr;
2762         ret = insert_cache_extent(extent_cache, &rec->cache);
2763         BUG_ON(ret);
2764         bytes_used += nr;
2765         if (set_checked) {
2766                 rec->content_checked = 1;
2767                 rec->owner_ref_checked = 1;
2768         }
2769         return ret;
2770 }
2771
2772 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2773                             u64 parent, u64 root, int found_ref)
2774 {
2775         struct extent_record *rec;
2776         struct tree_backref *back;
2777         struct cache_extent *cache;
2778
2779         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2780         if (!cache) {
2781                 add_extent_rec(extent_cache, NULL, bytenr,
2782                                1, 0, 0, 0, 0, 1, 0, 0);
2783                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2784                 if (!cache)
2785                         abort();
2786         }
2787
2788         rec = container_of(cache, struct extent_record, cache);
2789         if (rec->start != bytenr) {
2790                 abort();
2791         }
2792
2793         back = find_tree_backref(rec, parent, root);
2794         if (!back)
2795                 back = alloc_tree_backref(rec, parent, root);
2796
2797         if (found_ref) {
2798                 if (back->node.found_ref) {
2799                         fprintf(stderr, "Extent back ref already exists "
2800                                 "for %llu parent %llu root %llu \n",
2801                                 (unsigned long long)bytenr,
2802                                 (unsigned long long)parent,
2803                                 (unsigned long long)root);
2804                 }
2805                 back->node.found_ref = 1;
2806         } else {
2807                 if (back->node.found_extent_tree) {
2808                         fprintf(stderr, "Extent back ref already exists "
2809                                 "for %llu parent %llu root %llu \n",
2810                                 (unsigned long long)bytenr,
2811                                 (unsigned long long)parent,
2812                                 (unsigned long long)root);
2813                 }
2814                 back->node.found_extent_tree = 1;
2815         }
2816         return 0;
2817 }
2818
2819 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2820                             u64 parent, u64 root, u64 owner, u64 offset,
2821                             u32 num_refs, int found_ref, u64 max_size)
2822 {
2823         struct extent_record *rec;
2824         struct data_backref *back;
2825         struct cache_extent *cache;
2826
2827         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2828         if (!cache) {
2829                 add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0,
2830                                0, 0, max_size);
2831                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2832                 if (!cache)
2833                         abort();
2834         }
2835
2836         rec = container_of(cache, struct extent_record, cache);
2837         if (rec->max_size < max_size)
2838                 rec->max_size = max_size;
2839
2840         /*
2841          * If found_ref is set then max_size is the real size and must match the
2842          * existing refs.  So if we have already found a ref then we need to
2843          * make sure that this ref matches the existing one, otherwise we need
2844          * to add a new backref so we can notice that the backrefs don't match
2845          * and we need to figure out who is telling the truth.  This is to
2846          * account for that awful fsync bug I introduced where we'd end up with
2847          * a btrfs_file_extent_item that would have its length include multiple
2848          * prealloc extents or point inside of a prealloc extent.
2849          */
2850         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2851                                  bytenr, max_size);
2852         if (!back)
2853                 back = alloc_data_backref(rec, parent, root, owner, offset,
2854                                           max_size);
2855
2856         if (found_ref) {
2857                 BUG_ON(num_refs != 1);
2858                 if (back->node.found_ref)
2859                         BUG_ON(back->bytes != max_size);
2860                 back->node.found_ref = 1;
2861                 back->found_ref += 1;
2862                 back->bytes = max_size;
2863                 back->disk_bytenr = bytenr;
2864                 rec->refs += 1;
2865                 rec->content_checked = 1;
2866                 rec->owner_ref_checked = 1;
2867         } else {
2868                 if (back->node.found_extent_tree) {
2869                         fprintf(stderr, "Extent back ref already exists "
2870                                 "for %llu parent %llu root %llu"
2871                                 "owner %llu offset %llu num_refs %lu\n",
2872                                 (unsigned long long)bytenr,
2873                                 (unsigned long long)parent,
2874                                 (unsigned long long)root,
2875                                 (unsigned long long)owner,
2876                                 (unsigned long long)offset,
2877                                 (unsigned long)num_refs);
2878                 }
2879                 back->num_refs = num_refs;
2880                 back->node.found_extent_tree = 1;
2881         }
2882         return 0;
2883 }
2884
2885 static int add_pending(struct cache_tree *pending,
2886                        struct cache_tree *seen, u64 bytenr, u32 size)
2887 {
2888         int ret;
2889         ret = add_cache_extent(seen, bytenr, size);
2890         if (ret)
2891                 return ret;
2892         add_cache_extent(pending, bytenr, size);
2893         return 0;
2894 }
2895
2896 static int pick_next_pending(struct cache_tree *pending,
2897                         struct cache_tree *reada,
2898                         struct cache_tree *nodes,
2899                         u64 last, struct block_info *bits, int bits_nr,
2900                         int *reada_bits)
2901 {
2902         unsigned long node_start = last;
2903         struct cache_extent *cache;
2904         int ret;
2905
2906         cache = search_cache_extent(reada, 0);
2907         if (cache) {
2908                 bits[0].start = cache->start;
2909                 bits[1].size = cache->size;
2910                 *reada_bits = 1;
2911                 return 1;
2912         }
2913         *reada_bits = 0;
2914         if (node_start > 32768)
2915                 node_start -= 32768;
2916
2917         cache = search_cache_extent(nodes, node_start);
2918         if (!cache)
2919                 cache = search_cache_extent(nodes, 0);
2920
2921         if (!cache) {
2922                  cache = search_cache_extent(pending, 0);
2923                  if (!cache)
2924                          return 0;
2925                  ret = 0;
2926                  do {
2927                          bits[ret].start = cache->start;
2928                          bits[ret].size = cache->size;
2929                          cache = next_cache_extent(cache);
2930                          ret++;
2931                  } while (cache && ret < bits_nr);
2932                  return ret;
2933         }
2934
2935         ret = 0;
2936         do {
2937                 bits[ret].start = cache->start;
2938                 bits[ret].size = cache->size;
2939                 cache = next_cache_extent(cache);
2940                 ret++;
2941         } while (cache && ret < bits_nr);
2942
2943         if (bits_nr - ret > 8) {
2944                 u64 lookup = bits[0].start + bits[0].size;
2945                 struct cache_extent *next;
2946                 next = search_cache_extent(pending, lookup);
2947                 while(next) {
2948                         if (next->start - lookup > 32768)
2949                                 break;
2950                         bits[ret].start = next->start;
2951                         bits[ret].size = next->size;
2952                         lookup = next->start + next->size;
2953                         ret++;
2954                         if (ret == bits_nr)
2955                                 break;
2956                         next = next_cache_extent(next);
2957                         if (!next)
2958                                 break;
2959                 }
2960         }
2961         return ret;
2962 }
2963
2964 static void free_chunk_record(struct cache_extent *cache)
2965 {
2966         struct chunk_record *rec;
2967
2968         rec = container_of(cache, struct chunk_record, cache);
2969         free(rec);
2970 }
2971
2972 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
2973 {
2974         cache_tree_free_extents(chunk_cache, free_chunk_record);
2975 }
2976
2977 static void free_device_record(struct rb_node *node)
2978 {
2979         struct device_record *rec;
2980
2981         rec = container_of(node, struct device_record, node);
2982         free(rec);
2983 }
2984
2985 FREE_RB_BASED_TREE(device_cache, free_device_record);
2986
2987 int insert_block_group_record(struct block_group_tree *tree,
2988                               struct block_group_record *bg_rec)
2989 {
2990         int ret;
2991
2992         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
2993         if (ret)
2994                 return ret;
2995
2996         list_add_tail(&bg_rec->list, &tree->block_groups);
2997         return 0;
2998 }
2999
3000 static void free_block_group_record(struct cache_extent *cache)
3001 {
3002         struct block_group_record *rec;
3003
3004         rec = container_of(cache, struct block_group_record, cache);
3005         free(rec);
3006 }
3007
3008 void free_block_group_tree(struct block_group_tree *tree)
3009 {
3010         cache_tree_free_extents(&tree->tree, free_block_group_record);
3011 }
3012
3013 int insert_device_extent_record(struct device_extent_tree *tree,
3014                                 struct device_extent_record *de_rec)
3015 {
3016         int ret;
3017
3018         /*
3019          * Device extent is a bit different from the other extents, because
3020          * the extents which belong to the different devices may have the
3021          * same start and size, so we need use the special extent cache
3022          * search/insert functions.
3023          */
3024         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3025         if (ret)
3026                 return ret;
3027
3028         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3029         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3030         return 0;
3031 }
3032
3033 static void free_device_extent_record(struct cache_extent *cache)
3034 {
3035         struct device_extent_record *rec;
3036
3037         rec = container_of(cache, struct device_extent_record, cache);
3038         free(rec);
3039 }
3040
3041 void free_device_extent_tree(struct device_extent_tree *tree)
3042 {
3043         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3044 }
3045
3046 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3047 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3048                                  struct extent_buffer *leaf, int slot)
3049 {
3050         struct btrfs_extent_ref_v0 *ref0;
3051         struct btrfs_key key;
3052
3053         btrfs_item_key_to_cpu(leaf, &key, slot);
3054         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3055         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3056                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3057         } else {
3058                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3059                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3060         }
3061         return 0;
3062 }
3063 #endif
3064
3065 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3066                                             struct btrfs_key *key,
3067                                             int slot)
3068 {
3069         struct btrfs_chunk *ptr;
3070         struct chunk_record *rec;
3071         int num_stripes, i;
3072
3073         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3074         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3075
3076         rec = malloc(btrfs_chunk_record_size(num_stripes));
3077         if (!rec) {
3078                 fprintf(stderr, "memory allocation failed\n");
3079                 exit(-1);
3080         }
3081
3082         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3083
3084         INIT_LIST_HEAD(&rec->list);
3085         INIT_LIST_HEAD(&rec->dextents);
3086         rec->bg_rec = NULL;
3087
3088         rec->cache.start = key->offset;
3089         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3090
3091         rec->generation = btrfs_header_generation(leaf);
3092
3093         rec->objectid = key->objectid;
3094         rec->type = key->type;
3095         rec->offset = key->offset;
3096
3097         rec->length = rec->cache.size;
3098         rec->owner = btrfs_chunk_owner(leaf, ptr);
3099         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3100         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3101         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3102         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3103         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3104         rec->num_stripes = num_stripes;
3105         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3106
3107         for (i = 0; i < rec->num_stripes; ++i) {
3108                 rec->stripes[i].devid =
3109                         btrfs_stripe_devid_nr(leaf, ptr, i);
3110                 rec->stripes[i].offset =
3111                         btrfs_stripe_offset_nr(leaf, ptr, i);
3112                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3113                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3114                                 BTRFS_UUID_SIZE);
3115         }
3116
3117         return rec;
3118 }
3119
3120 static int process_chunk_item(struct cache_tree *chunk_cache,
3121                               struct btrfs_key *key, struct extent_buffer *eb,
3122                               int slot)
3123 {
3124         struct chunk_record *rec;
3125         int ret = 0;
3126
3127         rec = btrfs_new_chunk_record(eb, key, slot);
3128         ret = insert_cache_extent(chunk_cache, &rec->cache);
3129         if (ret) {
3130                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3131                         rec->offset, rec->length);
3132                 free(rec);
3133         }
3134
3135         return ret;
3136 }
3137
3138 static int process_device_item(struct rb_root *dev_cache,
3139                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3140 {
3141         struct btrfs_dev_item *ptr;
3142         struct device_record *rec;
3143         int ret = 0;
3144
3145         ptr = btrfs_item_ptr(eb,
3146                 slot, struct btrfs_dev_item);
3147
3148         rec = malloc(sizeof(*rec));
3149         if (!rec) {
3150                 fprintf(stderr, "memory allocation failed\n");
3151                 return -ENOMEM;
3152         }
3153
3154         rec->devid = key->offset;
3155         rec->generation = btrfs_header_generation(eb);
3156
3157         rec->objectid = key->objectid;
3158         rec->type = key->type;
3159         rec->offset = key->offset;
3160
3161         rec->devid = btrfs_device_id(eb, ptr);
3162         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3163         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3164
3165         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3166         if (ret) {
3167                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3168                 free(rec);
3169         }
3170
3171         return ret;
3172 }
3173
3174 struct block_group_record *
3175 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3176                              int slot)
3177 {
3178         struct btrfs_block_group_item *ptr;
3179         struct block_group_record *rec;
3180
3181         rec = malloc(sizeof(*rec));
3182         if (!rec) {
3183                 fprintf(stderr, "memory allocation failed\n");
3184                 exit(-1);
3185         }
3186         memset(rec, 0, sizeof(*rec));
3187
3188         rec->cache.start = key->objectid;
3189         rec->cache.size = key->offset;
3190
3191         rec->generation = btrfs_header_generation(leaf);
3192
3193         rec->objectid = key->objectid;
3194         rec->type = key->type;
3195         rec->offset = key->offset;
3196
3197         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3198         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3199
3200         INIT_LIST_HEAD(&rec->list);
3201
3202         return rec;
3203 }
3204
3205 static int process_block_group_item(struct block_group_tree *block_group_cache,
3206                                     struct btrfs_key *key,
3207                                     struct extent_buffer *eb, int slot)
3208 {
3209         struct block_group_record *rec;
3210         int ret = 0;
3211
3212         rec = btrfs_new_block_group_record(eb, key, slot);
3213         ret = insert_block_group_record(block_group_cache, rec);
3214         if (ret) {
3215                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3216                         rec->objectid, rec->offset);
3217                 free(rec);
3218         }
3219
3220         return ret;
3221 }
3222
3223 struct device_extent_record *
3224 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3225                                struct btrfs_key *key, int slot)
3226 {
3227         struct device_extent_record *rec;
3228         struct btrfs_dev_extent *ptr;
3229
3230         rec = malloc(sizeof(*rec));
3231         if (!rec) {
3232                 fprintf(stderr, "memory allocation failed\n");
3233                 exit(-1);
3234         }
3235         memset(rec, 0, sizeof(*rec));
3236
3237         rec->cache.objectid = key->objectid;
3238         rec->cache.start = key->offset;
3239
3240         rec->generation = btrfs_header_generation(leaf);
3241
3242         rec->objectid = key->objectid;
3243         rec->type = key->type;
3244         rec->offset = key->offset;
3245
3246         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3247         rec->chunk_objecteid =
3248                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3249         rec->chunk_offset =
3250                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3251         rec->length = btrfs_dev_extent_length(leaf, ptr);
3252         rec->cache.size = rec->length;
3253
3254         INIT_LIST_HEAD(&rec->chunk_list);
3255         INIT_LIST_HEAD(&rec->device_list);
3256
3257         return rec;
3258 }
3259
3260 static int
3261 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3262                            struct btrfs_key *key, struct extent_buffer *eb,
3263                            int slot)
3264 {
3265         struct device_extent_record *rec;
3266         int ret;
3267
3268         rec = btrfs_new_device_extent_record(eb, key, slot);
3269         ret = insert_device_extent_record(dev_extent_cache, rec);
3270         if (ret) {
3271                 fprintf(stderr,
3272                         "Device extent[%llu, %llu, %llu] existed.\n",
3273                         rec->objectid, rec->offset, rec->length);
3274                 free(rec);
3275         }
3276
3277         return ret;
3278 }
3279
3280 static int process_extent_item(struct btrfs_root *root,
3281                                struct cache_tree *extent_cache,
3282                                struct extent_buffer *eb, int slot)
3283 {
3284         struct btrfs_extent_item *ei;
3285         struct btrfs_extent_inline_ref *iref;
3286         struct btrfs_extent_data_ref *dref;
3287         struct btrfs_shared_data_ref *sref;
3288         struct btrfs_key key;
3289         unsigned long end;
3290         unsigned long ptr;
3291         int type;
3292         u32 item_size = btrfs_item_size_nr(eb, slot);
3293         u64 refs = 0;
3294         u64 offset;
3295         u64 num_bytes;
3296         int metadata = 0;
3297
3298         btrfs_item_key_to_cpu(eb, &key, slot);
3299
3300         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3301                 metadata = 1;
3302                 num_bytes = root->leafsize;
3303         } else {
3304                 num_bytes = key.offset;
3305         }
3306
3307         if (item_size < sizeof(*ei)) {
3308 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3309                 struct btrfs_extent_item_v0 *ei0;
3310                 BUG_ON(item_size != sizeof(*ei0));
3311                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3312                 refs = btrfs_extent_refs_v0(eb, ei0);
3313 #else
3314                 BUG();
3315 #endif
3316                 return add_extent_rec(extent_cache, NULL, key.objectid,
3317                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3318                                       num_bytes);
3319         }
3320
3321         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3322         refs = btrfs_extent_refs(eb, ei);
3323
3324         add_extent_rec(extent_cache, NULL, key.objectid, num_bytes,
3325                        refs, 0, 0, 0, metadata, 1, num_bytes);
3326
3327         ptr = (unsigned long)(ei + 1);
3328         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3329             key.type == BTRFS_EXTENT_ITEM_KEY)
3330                 ptr += sizeof(struct btrfs_tree_block_info);
3331
3332         end = (unsigned long)ei + item_size;
3333         while (ptr < end) {
3334                 iref = (struct btrfs_extent_inline_ref *)ptr;
3335                 type = btrfs_extent_inline_ref_type(eb, iref);
3336                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3337                 switch (type) {
3338                 case BTRFS_TREE_BLOCK_REF_KEY:
3339                         add_tree_backref(extent_cache, key.objectid,
3340                                          0, offset, 0);
3341                         break;
3342                 case BTRFS_SHARED_BLOCK_REF_KEY:
3343                         add_tree_backref(extent_cache, key.objectid,
3344                                          offset, 0, 0);
3345                         break;
3346                 case BTRFS_EXTENT_DATA_REF_KEY:
3347                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3348                         add_data_backref(extent_cache, key.objectid, 0,
3349                                         btrfs_extent_data_ref_root(eb, dref),
3350                                         btrfs_extent_data_ref_objectid(eb,
3351                                                                        dref),
3352                                         btrfs_extent_data_ref_offset(eb, dref),
3353                                         btrfs_extent_data_ref_count(eb, dref),
3354                                         0, num_bytes);
3355                         break;
3356                 case BTRFS_SHARED_DATA_REF_KEY:
3357                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3358                         add_data_backref(extent_cache, key.objectid, offset,
3359                                         0, 0, 0,
3360                                         btrfs_shared_data_ref_count(eb, sref),
3361                                         0, num_bytes);
3362                         break;
3363                 default:
3364                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3365                                 key.objectid, key.type, num_bytes);
3366                         goto out;
3367                 }
3368                 ptr += btrfs_extent_inline_ref_size(type);
3369         }
3370         WARN_ON(ptr > end);
3371 out:
3372         return 0;
3373 }
3374
3375 static int check_cache_range(struct btrfs_root *root,
3376                              struct btrfs_block_group_cache *cache,
3377                              u64 offset, u64 bytes)
3378 {
3379         struct btrfs_free_space *entry;
3380         u64 *logical;
3381         u64 bytenr;
3382         int stripe_len;
3383         int i, nr, ret;
3384
3385         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3386                 bytenr = btrfs_sb_offset(i);
3387                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3388                                        cache->key.objectid, bytenr, 0,
3389                                        &logical, &nr, &stripe_len);
3390                 if (ret)
3391                         return ret;
3392
3393                 while (nr--) {
3394                         if (logical[nr] + stripe_len <= offset)
3395                                 continue;
3396                         if (offset + bytes <= logical[nr])
3397                                 continue;
3398                         if (logical[nr] == offset) {
3399                                 if (stripe_len >= bytes) {
3400                                         kfree(logical);
3401                                         return 0;
3402                                 }
3403                                 bytes -= stripe_len;
3404                                 offset += stripe_len;
3405                         } else if (logical[nr] < offset) {
3406                                 if (logical[nr] + stripe_len >=
3407                                     offset + bytes) {
3408                                         kfree(logical);
3409                                         return 0;
3410                                 }
3411                                 bytes = (offset + bytes) -
3412                                         (logical[nr] + stripe_len);
3413                                 offset = logical[nr] + stripe_len;
3414                         } else {
3415                                 /*
3416                                  * Could be tricky, the super may land in the
3417                                  * middle of the area we're checking.  First
3418                                  * check the easiest case, it's at the end.
3419                                  */
3420                                 if (logical[nr] + stripe_len >=
3421                                     bytes + offset) {
3422                                         bytes = logical[nr] - offset;
3423                                         continue;
3424                                 }
3425
3426                                 /* Check the left side */
3427                                 ret = check_cache_range(root, cache,
3428                                                         offset,
3429                                                         logical[nr] - offset);
3430                                 if (ret) {
3431                                         kfree(logical);
3432                                         return ret;
3433                                 }
3434
3435                                 /* Now we continue with the right side */
3436                                 bytes = (offset + bytes) -
3437                                         (logical[nr] + stripe_len);
3438                                 offset = logical[nr] + stripe_len;
3439                         }
3440                 }
3441
3442                 kfree(logical);
3443         }
3444
3445         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3446         if (!entry) {
3447                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3448                         offset, offset+bytes);
3449                 return -EINVAL;
3450         }
3451
3452         if (entry->offset != offset) {
3453                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3454                         entry->offset);
3455                 return -EINVAL;
3456         }
3457
3458         if (entry->bytes != bytes) {
3459                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3460                         bytes, entry->bytes, offset);
3461                 return -EINVAL;
3462         }
3463
3464         unlink_free_space(cache->free_space_ctl, entry);
3465         free(entry);
3466         return 0;
3467 }
3468
3469 static int verify_space_cache(struct btrfs_root *root,
3470                               struct btrfs_block_group_cache *cache)
3471 {
3472         struct btrfs_path *path;
3473         struct extent_buffer *leaf;
3474         struct btrfs_key key;
3475         u64 last;
3476         int ret = 0;
3477
3478         path = btrfs_alloc_path();
3479         if (!path)
3480                 return -ENOMEM;
3481
3482         root = root->fs_info->extent_root;
3483
3484         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3485
3486         key.objectid = last;
3487         key.offset = 0;
3488         key.type = BTRFS_EXTENT_ITEM_KEY;
3489
3490         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3491         if (ret < 0)
3492                 goto out;
3493         ret = 0;
3494         while (1) {
3495                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3496                         ret = btrfs_next_leaf(root, path);
3497                         if (ret < 0)
3498                                 goto out;
3499                         if (ret > 0) {
3500                                 ret = 0;
3501                                 break;
3502                         }
3503                 }
3504                 leaf = path->nodes[0];
3505                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3506                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3507                         break;
3508                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3509                     key.type != BTRFS_METADATA_ITEM_KEY) {
3510                         path->slots[0]++;
3511                         continue;
3512                 }
3513
3514                 if (last == key.objectid) {
3515                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3516                                 last = key.objectid + key.offset;
3517                         else
3518                                 last = key.objectid + root->leafsize;
3519                         path->slots[0]++;
3520                         continue;
3521                 }
3522
3523                 ret = check_cache_range(root, cache, last,
3524                                         key.objectid - last);
3525                 if (ret)
3526                         break;
3527                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3528                         last = key.objectid + key.offset;
3529                 else
3530                         last = key.objectid + root->leafsize;
3531                 path->slots[0]++;
3532         }
3533
3534         if (last < cache->key.objectid + cache->key.offset)
3535                 ret = check_cache_range(root, cache, last,
3536                                         cache->key.objectid +
3537                                         cache->key.offset - last);
3538
3539 out:
3540         btrfs_free_path(path);
3541
3542         if (!ret &&
3543             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3544                 fprintf(stderr, "There are still entries left in the space "
3545                         "cache\n");
3546                 ret = -EINVAL;
3547         }
3548
3549         return ret;
3550 }
3551
3552 static int check_space_cache(struct btrfs_root *root)
3553 {
3554         struct btrfs_block_group_cache *cache;
3555         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3556         int ret;
3557         int error = 0;
3558
3559         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3560             btrfs_super_generation(root->fs_info->super_copy) !=
3561             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3562                 printf("cache and super generation don't match, space cache "
3563                        "will be invalidated\n");
3564                 return 0;
3565         }
3566
3567         while (1) {
3568                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3569                 if (!cache)
3570                         break;
3571
3572                 start = cache->key.objectid + cache->key.offset;
3573                 if (!cache->free_space_ctl) {
3574                         if (btrfs_init_free_space_ctl(cache,
3575                                                       root->sectorsize)) {
3576                                 ret = -ENOMEM;
3577                                 break;
3578                         }
3579                 } else {
3580                         btrfs_remove_free_space_cache(cache);
3581                 }
3582
3583                 ret = load_free_space_cache(root->fs_info, cache);
3584                 if (!ret)
3585                         continue;
3586
3587                 ret = verify_space_cache(root, cache);
3588                 if (ret) {
3589                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3590                                 cache->key.objectid);
3591                         error++;
3592                 }
3593         }
3594
3595         return error ? -EINVAL : 0;
3596 }
3597
3598 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3599                                u64 num_bytes)
3600 {
3601         struct btrfs_path *path;
3602         struct extent_buffer *leaf;
3603         struct btrfs_key key;
3604         int ret;
3605
3606         path = btrfs_alloc_path();
3607         if (!path) {
3608                 fprintf(stderr, "Error allocing path\n");
3609                 return -ENOMEM;
3610         }
3611
3612         key.objectid = bytenr;
3613         key.type = BTRFS_EXTENT_ITEM_KEY;
3614         key.offset = 0;
3615
3616
3617 again:
3618         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3619                                 0, 0);
3620         if (ret < 0) {
3621                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3622                 btrfs_free_path(path);
3623                 return ret;
3624         } else if (ret) {
3625                 if (path->slots[0])
3626                         path->slots[0]--;
3627                 else
3628                         btrfs_prev_leaf(root, path);
3629         }
3630
3631         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3632
3633         /*
3634          * Block group items come before extent items if they have the same
3635          * bytenr, so walk back one more just in case.  Dear future traveler,
3636          * first congrats on mastering time travel.  Now if it's not too much
3637          * trouble could you go back to 2006 and tell Chris to make the
3638          * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
3639          */
3640         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3641                 if (path->slots[0])
3642                         path->slots[0]--;
3643                 else
3644                         btrfs_prev_leaf(root, path);
3645         }
3646
3647         while (num_bytes) {
3648                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3649                         ret = btrfs_next_leaf(root, path);
3650                         if (ret < 0) {
3651                                 fprintf(stderr, "Error going to next leaf "
3652                                         "%d\n", ret);
3653                                 btrfs_free_path(path);
3654                                 return ret;
3655                         } else if (ret) {
3656                                 break;
3657                         }
3658                 }
3659                 leaf = path->nodes[0];
3660                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3661                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3662                         path->slots[0]++;
3663                         continue;
3664                 }
3665                 if (key.objectid + key.offset < bytenr) {
3666                         path->slots[0]++;
3667                         continue;
3668                 }
3669                 if (key.objectid > bytenr + num_bytes)
3670                         break;
3671
3672                 if (key.objectid == bytenr) {
3673                         if (key.offset >= num_bytes) {
3674                                 num_bytes = 0;
3675                                 break;
3676                         }
3677                         num_bytes -= key.offset;
3678                         bytenr += key.offset;
3679                 } else if (key.objectid < bytenr) {
3680                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3681                                 num_bytes = 0;
3682                                 break;
3683                         }
3684                         num_bytes = (bytenr + num_bytes) -
3685                                 (key.objectid + key.offset);
3686                         bytenr = key.objectid + key.offset;
3687                 } else {
3688                         if (key.objectid + key.offset < bytenr + num_bytes) {
3689                                 u64 new_start = key.objectid + key.offset;
3690                                 u64 new_bytes = bytenr + num_bytes - new_start;
3691
3692                                 /*
3693                                  * Weird case, the extent is in the middle of
3694                                  * our range, we'll have to search one side
3695                                  * and then the other.  Not sure if this happens
3696                                  * in real life, but no harm in coding it up
3697                                  * anyway just in case.
3698                                  */
3699                                 btrfs_release_path(path);
3700                                 ret = check_extent_exists(root, new_start,
3701                                                           new_bytes);
3702                                 if (ret) {
3703                                         fprintf(stderr, "Right section didn't "
3704                                                 "have a record\n");
3705                                         break;
3706                                 }
3707                                 num_bytes = key.objectid - bytenr;
3708                                 goto again;
3709                         }
3710                         num_bytes = key.objectid - bytenr;
3711                 }
3712                 path->slots[0]++;
3713         }
3714         ret = 0;
3715
3716         if (num_bytes) {
3717                 fprintf(stderr, "There are no extents for csum range "
3718                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3719                 ret = 1;
3720         }
3721
3722         btrfs_free_path(path);
3723         return ret;
3724 }
3725
3726 static int check_csums(struct btrfs_root *root)
3727 {
3728         struct btrfs_path *path;
3729         struct extent_buffer *leaf;
3730         struct btrfs_key key;
3731         u64 offset = 0, num_bytes = 0;
3732         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3733         int errors = 0;
3734         int ret;
3735
3736         root = root->fs_info->csum_root;
3737
3738         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3739         key.type = BTRFS_EXTENT_CSUM_KEY;
3740         key.offset = 0;
3741
3742         path = btrfs_alloc_path();
3743         if (!path)
3744                 return -ENOMEM;
3745
3746         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3747         if (ret < 0) {
3748                 fprintf(stderr, "Error searching csum tree %d\n", ret);
3749                 btrfs_free_path(path);
3750                 return ret;
3751         }
3752
3753         if (ret > 0 && path->slots[0])
3754                 path->slots[0]--;
3755         ret = 0;
3756
3757         while (1) {
3758                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3759                         ret = btrfs_next_leaf(root, path);
3760                         if (ret < 0) {
3761                                 fprintf(stderr, "Error going to next leaf "
3762                                         "%d\n", ret);
3763                                 break;
3764                         }
3765                         if (ret)
3766                                 break;
3767                 }
3768                 leaf = path->nodes[0];
3769
3770                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3771                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
3772                         path->slots[0]++;
3773                         continue;
3774                 }
3775
3776                 if (!num_bytes) {
3777                         offset = key.offset;
3778                 } else if (key.offset != offset + num_bytes) {
3779                         ret = check_extent_exists(root, offset, num_bytes);
3780                         if (ret) {
3781                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
3782                                         "there is no extent record\n",
3783                                         offset, offset+num_bytes);
3784                                 errors++;
3785                         }
3786                         offset = key.offset;
3787                         num_bytes = 0;
3788                 }
3789
3790                 num_bytes += (btrfs_item_size_nr(leaf, path->slots[0]) /
3791                               csum_size) * root->sectorsize;
3792                 path->slots[0]++;
3793         }
3794
3795         btrfs_free_path(path);
3796         return errors;
3797 }
3798
3799 static int is_dropped_key(struct btrfs_key *key,
3800                           struct btrfs_key *drop_key) {
3801         if (key->objectid < drop_key->objectid)
3802                 return 1;
3803         else if (key->objectid == drop_key->objectid) {
3804                 if (key->type < drop_key->type)
3805                         return 1;
3806                 else if (key->type == drop_key->type) {
3807                         if (key->offset < drop_key->offset)
3808                                 return 1;
3809                 }
3810         }
3811         return 0;
3812 }
3813
3814 static int run_next_block(struct btrfs_trans_handle *trans,
3815                           struct btrfs_root *root,
3816                           struct block_info *bits,
3817                           int bits_nr,
3818                           u64 *last,
3819                           struct cache_tree *pending,
3820                           struct cache_tree *seen,
3821                           struct cache_tree *reada,
3822                           struct cache_tree *nodes,
3823                           struct cache_tree *extent_cache,
3824                           struct cache_tree *chunk_cache,
3825                           struct rb_root *dev_cache,
3826                           struct block_group_tree *block_group_cache,
3827                           struct device_extent_tree *dev_extent_cache,
3828                           struct btrfs_root_item *ri)
3829 {
3830         struct extent_buffer *buf;
3831         u64 bytenr;
3832         u32 size;
3833         u64 parent;
3834         u64 owner;
3835         u64 flags;
3836         u64 ptr;
3837         int ret = 0;
3838         int i;
3839         int nritems;
3840         struct btrfs_key key;
3841         struct cache_extent *cache;
3842         int reada_bits;
3843
3844         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
3845                                     bits_nr, &reada_bits);
3846         if (nritems == 0)
3847                 return 1;
3848
3849         if (!reada_bits) {
3850                 for(i = 0; i < nritems; i++) {
3851                         ret = add_cache_extent(reada, bits[i].start,
3852                                                bits[i].size);
3853                         if (ret == -EEXIST)
3854                                 continue;
3855
3856                         /* fixme, get the parent transid */
3857                         readahead_tree_block(root, bits[i].start,
3858                                              bits[i].size, 0);
3859                 }
3860         }
3861         *last = bits[0].start;
3862         bytenr = bits[0].start;
3863         size = bits[0].size;
3864
3865         cache = lookup_cache_extent(pending, bytenr, size);
3866         if (cache) {
3867                 remove_cache_extent(pending, cache);
3868                 free(cache);
3869         }
3870         cache = lookup_cache_extent(reada, bytenr, size);
3871         if (cache) {
3872                 remove_cache_extent(reada, cache);
3873                 free(cache);
3874         }
3875         cache = lookup_cache_extent(nodes, bytenr, size);
3876         if (cache) {
3877                 remove_cache_extent(nodes, cache);
3878                 free(cache);
3879         }
3880         cache = lookup_cache_extent(seen, bytenr, size);
3881         if (cache) {
3882                 remove_cache_extent(seen, cache);
3883                 free(cache);
3884         }
3885
3886         /* fixme, get the real parent transid */
3887         buf = read_tree_block(root, bytenr, size, 0);
3888         if (!extent_buffer_uptodate(buf)) {
3889                 record_bad_block_io(root->fs_info,
3890                                     extent_cache, bytenr, size);
3891                 goto out;
3892         }
3893
3894         nritems = btrfs_header_nritems(buf);
3895
3896         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
3897                                        btrfs_header_level(buf), 1, NULL,
3898                                        &flags);
3899         if (ret < 0)
3900                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
3901
3902         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
3903                 parent = bytenr;
3904                 owner = 0;
3905         } else {
3906                 parent = 0;
3907                 owner = btrfs_header_owner(buf);
3908         }
3909
3910         ret = check_block(trans, root, extent_cache, buf, flags);
3911         if (ret)
3912                 goto out;
3913
3914         if (btrfs_is_leaf(buf)) {
3915                 btree_space_waste += btrfs_leaf_free_space(root, buf);
3916                 for (i = 0; i < nritems; i++) {
3917                         struct btrfs_file_extent_item *fi;
3918                         btrfs_item_key_to_cpu(buf, &key, i);
3919                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3920                                 process_extent_item(root, extent_cache, buf,
3921                                                     i);
3922                                 continue;
3923                         }
3924                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3925                                 process_extent_item(root, extent_cache, buf,
3926                                                     i);
3927                                 continue;
3928                         }
3929                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
3930                                 total_csum_bytes +=
3931                                         btrfs_item_size_nr(buf, i);
3932                                 continue;
3933                         }
3934                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
3935                                 process_chunk_item(chunk_cache, &key, buf, i);
3936                                 continue;
3937                         }
3938                         if (key.type == BTRFS_DEV_ITEM_KEY) {
3939                                 process_device_item(dev_cache, &key, buf, i);
3940                                 continue;
3941                         }
3942                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3943                                 process_block_group_item(block_group_cache,
3944                                         &key, buf, i);
3945                                 continue;
3946                         }
3947                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
3948                                 process_device_extent_item(dev_extent_cache,
3949                                         &key, buf, i);
3950                                 continue;
3951
3952                         }
3953                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
3954 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3955                                 process_extent_ref_v0(extent_cache, buf, i);
3956 #else
3957                                 BUG();
3958 #endif
3959                                 continue;
3960                         }
3961
3962                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
3963                                 add_tree_backref(extent_cache, key.objectid, 0,
3964                                                  key.offset, 0);
3965                                 continue;
3966                         }
3967                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3968                                 add_tree_backref(extent_cache, key.objectid,
3969                                                  key.offset, 0, 0);
3970                                 continue;
3971                         }
3972                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3973                                 struct btrfs_extent_data_ref *ref;
3974                                 ref = btrfs_item_ptr(buf, i,
3975                                                 struct btrfs_extent_data_ref);
3976                                 add_data_backref(extent_cache,
3977                                         key.objectid, 0,
3978                                         btrfs_extent_data_ref_root(buf, ref),
3979                                         btrfs_extent_data_ref_objectid(buf,
3980                                                                        ref),
3981                                         btrfs_extent_data_ref_offset(buf, ref),
3982                                         btrfs_extent_data_ref_count(buf, ref),
3983                                         0, root->sectorsize);
3984                                 continue;
3985                         }
3986                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3987                                 struct btrfs_shared_data_ref *ref;
3988                                 ref = btrfs_item_ptr(buf, i,
3989                                                 struct btrfs_shared_data_ref);
3990                                 add_data_backref(extent_cache,
3991                                         key.objectid, key.offset, 0, 0, 0,
3992                                         btrfs_shared_data_ref_count(buf, ref),
3993                                         0, root->sectorsize);
3994                                 continue;
3995                         }
3996                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
3997                                 struct bad_item *bad;
3998
3999                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4000                                         continue;
4001                                 if (!owner)
4002                                         continue;
4003                                 bad = malloc(sizeof(struct bad_item));
4004                                 if (!bad)
4005                                         continue;
4006                                 INIT_LIST_HEAD(&bad->list);
4007                                 memcpy(&bad->key, &key,
4008                                        sizeof(struct btrfs_key));
4009                                 bad->root_id = owner;
4010                                 list_add_tail(&bad->list, &delete_items);
4011                                 continue;
4012                         }
4013                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4014                                 continue;
4015                         fi = btrfs_item_ptr(buf, i,
4016                                             struct btrfs_file_extent_item);
4017                         if (btrfs_file_extent_type(buf, fi) ==
4018                             BTRFS_FILE_EXTENT_INLINE)
4019                                 continue;
4020                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4021                                 continue;
4022
4023                         data_bytes_allocated +=
4024                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4025                         if (data_bytes_allocated < root->sectorsize) {
4026                                 abort();
4027                         }
4028                         data_bytes_referenced +=
4029                                 btrfs_file_extent_num_bytes(buf, fi);
4030                         add_data_backref(extent_cache,
4031                                 btrfs_file_extent_disk_bytenr(buf, fi),
4032                                 parent, owner, key.objectid, key.offset -
4033                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4034                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4035                         BUG_ON(ret);
4036                 }
4037         } else {
4038                 int level;
4039                 struct btrfs_key first_key;
4040
4041                 first_key.objectid = 0;
4042
4043                 if (nritems > 0)
4044                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4045                 level = btrfs_header_level(buf);
4046                 for (i = 0; i < nritems; i++) {
4047                         ptr = btrfs_node_blockptr(buf, i);
4048                         size = btrfs_level_size(root, level - 1);
4049                         btrfs_node_key_to_cpu(buf, &key, i);
4050                         if (ri != NULL) {
4051                                 struct btrfs_key drop_key;
4052                                 btrfs_disk_key_to_cpu(&drop_key,
4053                                                       &ri->drop_progress);
4054                                 if ((level == ri->drop_level)
4055                                     && is_dropped_key(&key, &drop_key)) {
4056                                         continue;
4057                                 }
4058                         }
4059                         ret = add_extent_rec(extent_cache, &key,
4060                                              ptr, size, 0, 0, 1, 0, 1, 0,
4061                                              size);
4062                         BUG_ON(ret);
4063
4064                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4065
4066                         if (level > 1) {
4067                                 add_pending(nodes, seen, ptr, size);
4068                         } else {
4069                                 add_pending(pending, seen, ptr, size);
4070                         }
4071                 }
4072                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4073                                       nritems) * sizeof(struct btrfs_key_ptr);
4074         }
4075         total_btree_bytes += buf->len;
4076         if (fs_root_objectid(btrfs_header_owner(buf)))
4077                 total_fs_tree_bytes += buf->len;
4078         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4079                 total_extent_tree_bytes += buf->len;
4080         if (!found_old_backref &&
4081             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4082             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4083             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4084                 found_old_backref = 1;
4085 out:
4086         free_extent_buffer(buf);
4087         return ret;
4088 }
4089
4090 static int add_root_to_pending(struct extent_buffer *buf,
4091                                struct cache_tree *extent_cache,
4092                                struct cache_tree *pending,
4093                                struct cache_tree *seen,
4094                                struct cache_tree *nodes,
4095                                struct btrfs_key *root_key)
4096 {
4097         if (btrfs_header_level(buf) > 0)
4098                 add_pending(nodes, seen, buf->start, buf->len);
4099         else
4100                 add_pending(pending, seen, buf->start, buf->len);
4101         add_extent_rec(extent_cache, NULL, buf->start, buf->len,
4102                        0, 1, 1, 0, 1, 0, buf->len);
4103
4104         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4105             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4106                 add_tree_backref(extent_cache, buf->start, buf->start,
4107                                  0, 1);
4108         else
4109                 add_tree_backref(extent_cache, buf->start, 0,
4110                                  root_key->objectid, 1);
4111         return 0;
4112 }
4113
4114 /* as we fix the tree, we might be deleting blocks that
4115  * we're tracking for repair.  This hook makes sure we
4116  * remove any backrefs for blocks as we are fixing them.
4117  */
4118 static int free_extent_hook(struct btrfs_trans_handle *trans,
4119                             struct btrfs_root *root,
4120                             u64 bytenr, u64 num_bytes, u64 parent,
4121                             u64 root_objectid, u64 owner, u64 offset,
4122                             int refs_to_drop)
4123 {
4124         struct extent_record *rec;
4125         struct cache_extent *cache;
4126         int is_data;
4127         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4128
4129         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4130         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4131         if (!cache)
4132                 return 0;
4133
4134         rec = container_of(cache, struct extent_record, cache);
4135         if (is_data) {
4136                 struct data_backref *back;
4137                 back = find_data_backref(rec, parent, root_objectid, owner,
4138                                          offset, 1, bytenr, num_bytes);
4139                 if (!back)
4140                         goto out;
4141                 if (back->node.found_ref) {
4142                         back->found_ref -= refs_to_drop;
4143                         if (rec->refs)
4144                                 rec->refs -= refs_to_drop;
4145                 }
4146                 if (back->node.found_extent_tree) {
4147                         back->num_refs -= refs_to_drop;
4148                         if (rec->extent_item_refs)
4149                                 rec->extent_item_refs -= refs_to_drop;
4150                 }
4151                 if (back->found_ref == 0)
4152                         back->node.found_ref = 0;
4153                 if (back->num_refs == 0)
4154                         back->node.found_extent_tree = 0;
4155
4156                 if (!back->node.found_extent_tree && back->node.found_ref) {
4157                         list_del(&back->node.list);
4158                         free(back);
4159                 }
4160         } else {
4161                 struct tree_backref *back;
4162                 back = find_tree_backref(rec, parent, root_objectid);
4163                 if (!back)
4164                         goto out;
4165                 if (back->node.found_ref) {
4166                         if (rec->refs)
4167                                 rec->refs--;
4168                         back->node.found_ref = 0;
4169                 }
4170                 if (back->node.found_extent_tree) {
4171                         if (rec->extent_item_refs)
4172                                 rec->extent_item_refs--;
4173                         back->node.found_extent_tree = 0;
4174                 }
4175                 if (!back->node.found_extent_tree && back->node.found_ref) {
4176                         list_del(&back->node.list);
4177                         free(back);
4178                 }
4179         }
4180         maybe_free_extent_rec(extent_cache, rec);
4181 out:
4182         return 0;
4183 }
4184
4185 static int delete_extent_records(struct btrfs_trans_handle *trans,
4186                                  struct btrfs_root *root,
4187                                  struct btrfs_path *path,
4188                                  u64 bytenr, u64 new_len)
4189 {
4190         struct btrfs_key key;
4191         struct btrfs_key found_key;
4192         struct extent_buffer *leaf;
4193         int ret;
4194         int slot;
4195
4196
4197         key.objectid = bytenr;
4198         key.type = (u8)-1;
4199         key.offset = (u64)-1;
4200
4201         while(1) {
4202                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4203                                         &key, path, 0, 1);
4204                 if (ret < 0)
4205                         break;
4206
4207                 if (ret > 0) {
4208                         ret = 0;
4209                         if (path->slots[0] == 0)
4210                                 break;
4211                         path->slots[0]--;
4212                 }
4213                 ret = 0;
4214
4215                 leaf = path->nodes[0];
4216                 slot = path->slots[0];
4217
4218                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4219                 if (found_key.objectid != bytenr)
4220                         break;
4221
4222                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4223                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4224                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4225                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4226                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4227                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4228                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4229                         btrfs_release_path(path);
4230                         if (found_key.type == 0) {
4231                                 if (found_key.offset == 0)
4232                                         break;
4233                                 key.offset = found_key.offset - 1;
4234                                 key.type = found_key.type;
4235                         }
4236                         key.type = found_key.type - 1;
4237                         key.offset = (u64)-1;
4238                         continue;
4239                 }
4240
4241                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4242                         found_key.objectid, found_key.type, found_key.offset);
4243
4244                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4245                 if (ret)
4246                         break;
4247                 btrfs_release_path(path);
4248
4249                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4250                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4251                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4252                                 found_key.offset : root->leafsize;
4253
4254                         ret = btrfs_update_block_group(trans, root, bytenr,
4255                                                        bytes, 0, 0);
4256                         if (ret)
4257                                 break;
4258                 }
4259         }
4260
4261         btrfs_release_path(path);
4262         return ret;
4263 }
4264
4265 /*
4266  * for a single backref, this will allocate a new extent
4267  * and add the backref to it.
4268  */
4269 static int record_extent(struct btrfs_trans_handle *trans,
4270                          struct btrfs_fs_info *info,
4271                          struct btrfs_path *path,
4272                          struct extent_record *rec,
4273                          struct extent_backref *back,
4274                          int allocated, u64 flags)
4275 {
4276         int ret;
4277         struct btrfs_root *extent_root = info->extent_root;
4278         struct extent_buffer *leaf;
4279         struct btrfs_key ins_key;
4280         struct btrfs_extent_item *ei;
4281         struct tree_backref *tback;
4282         struct data_backref *dback;
4283         struct btrfs_tree_block_info *bi;
4284
4285         if (!back->is_data)
4286                 rec->max_size = max_t(u64, rec->max_size,
4287                                     info->extent_root->leafsize);
4288
4289         if (!allocated) {
4290                 u32 item_size = sizeof(*ei);
4291
4292                 if (!back->is_data)
4293                         item_size += sizeof(*bi);
4294
4295                 ins_key.objectid = rec->start;
4296                 ins_key.offset = rec->max_size;
4297                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4298
4299                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4300                                         &ins_key, item_size);
4301                 if (ret)
4302                         goto fail;
4303
4304                 leaf = path->nodes[0];
4305                 ei = btrfs_item_ptr(leaf, path->slots[0],
4306                                     struct btrfs_extent_item);
4307
4308                 btrfs_set_extent_refs(leaf, ei, 0);
4309                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4310
4311                 if (back->is_data) {
4312                         btrfs_set_extent_flags(leaf, ei,
4313                                                BTRFS_EXTENT_FLAG_DATA);
4314                 } else {
4315                         struct btrfs_disk_key copy_key;;
4316
4317                         tback = (struct tree_backref *)back;
4318                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4319                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4320                                              sizeof(*bi));
4321
4322                         btrfs_set_disk_key_objectid(&copy_key,
4323                                                     rec->info_objectid);
4324                         btrfs_set_disk_key_type(&copy_key, 0);
4325                         btrfs_set_disk_key_offset(&copy_key, 0);
4326
4327                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4328                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4329
4330                         btrfs_set_extent_flags(leaf, ei,
4331                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4332                 }
4333
4334                 btrfs_mark_buffer_dirty(leaf);
4335                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4336                                                rec->max_size, 1, 0);
4337                 if (ret)
4338                         goto fail;
4339                 btrfs_release_path(path);
4340         }
4341
4342         if (back->is_data) {
4343                 u64 parent;
4344                 int i;
4345
4346                 dback = (struct data_backref *)back;
4347                 if (back->full_backref)
4348                         parent = dback->parent;
4349                 else
4350                         parent = 0;
4351
4352                 for (i = 0; i < dback->found_ref; i++) {
4353                         /* if parent != 0, we're doing a full backref
4354                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4355                          * just makes the backref allocator create a data
4356                          * backref
4357                          */
4358                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4359                                                    rec->start, rec->max_size,
4360                                                    parent,
4361                                                    dback->root,
4362                                                    parent ?
4363                                                    BTRFS_FIRST_FREE_OBJECTID :
4364                                                    dback->owner,
4365                                                    dback->offset);
4366                         if (ret)
4367                                 break;
4368                 }
4369                 fprintf(stderr, "adding new data backref"
4370                                 " on %llu %s %llu owner %llu"
4371                                 " offset %llu found %d\n",
4372                                 (unsigned long long)rec->start,
4373                                 back->full_backref ?
4374                                 "parent" : "root",
4375                                 back->full_backref ?
4376                                 (unsigned long long)parent :
4377                                 (unsigned long long)dback->root,
4378                                 (unsigned long long)dback->owner,
4379                                 (unsigned long long)dback->offset,
4380                                 dback->found_ref);
4381         } else {
4382                 u64 parent;
4383
4384                 tback = (struct tree_backref *)back;
4385                 if (back->full_backref)
4386                         parent = tback->parent;
4387                 else
4388                         parent = 0;
4389
4390                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4391                                            rec->start, rec->max_size,
4392                                            parent, tback->root, 0, 0);
4393                 fprintf(stderr, "adding new tree backref on "
4394                         "start %llu len %llu parent %llu root %llu\n",
4395                         rec->start, rec->max_size, tback->parent, tback->root);
4396         }
4397         if (ret)
4398                 goto fail;
4399 fail:
4400         btrfs_release_path(path);
4401         return ret;
4402 }
4403
4404 struct extent_entry {
4405         u64 bytenr;
4406         u64 bytes;
4407         int count;
4408         int broken;
4409         struct list_head list;
4410 };
4411
4412 static struct extent_entry *find_entry(struct list_head *entries,
4413                                        u64 bytenr, u64 bytes)
4414 {
4415         struct extent_entry *entry = NULL;
4416
4417         list_for_each_entry(entry, entries, list) {
4418                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4419                         return entry;
4420         }
4421
4422         return NULL;
4423 }
4424
4425 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4426 {
4427         struct extent_entry *entry, *best = NULL, *prev = NULL;
4428
4429         list_for_each_entry(entry, entries, list) {
4430                 if (!prev) {
4431                         prev = entry;
4432                         continue;
4433                 }
4434
4435                 /*
4436                  * If there are as many broken entries as entries then we know
4437                  * not to trust this particular entry.
4438                  */
4439                 if (entry->broken == entry->count)
4440                         continue;
4441
4442                 /*
4443                  * If our current entry == best then we can't be sure our best
4444                  * is really the best, so we need to keep searching.
4445                  */
4446                 if (best && best->count == entry->count) {
4447                         prev = entry;
4448                         best = NULL;
4449                         continue;
4450                 }
4451
4452                 /* Prev == entry, not good enough, have to keep searching */
4453                 if (!prev->broken && prev->count == entry->count)
4454                         continue;
4455
4456                 if (!best)
4457                         best = (prev->count > entry->count) ? prev : entry;
4458                 else if (best->count < entry->count)
4459                         best = entry;
4460                 prev = entry;
4461         }
4462
4463         return best;
4464 }
4465
4466 static int repair_ref(struct btrfs_trans_handle *trans,
4467                       struct btrfs_fs_info *info, struct btrfs_path *path,
4468                       struct data_backref *dback, struct extent_entry *entry)
4469 {
4470         struct btrfs_root *root;
4471         struct btrfs_file_extent_item *fi;
4472         struct extent_buffer *leaf;
4473         struct btrfs_key key;
4474         u64 bytenr, bytes;
4475         int ret;
4476
4477         key.objectid = dback->root;
4478         key.type = BTRFS_ROOT_ITEM_KEY;
4479         key.offset = (u64)-1;
4480         root = btrfs_read_fs_root(info, &key);
4481         if (IS_ERR(root)) {
4482                 fprintf(stderr, "Couldn't find root for our ref\n");
4483                 return -EINVAL;
4484         }
4485
4486         /*
4487          * The backref points to the original offset of the extent if it was
4488          * split, so we need to search down to the offset we have and then walk
4489          * forward until we find the backref we're looking for.
4490          */
4491         key.objectid = dback->owner;
4492         key.type = BTRFS_EXTENT_DATA_KEY;
4493         key.offset = dback->offset;
4494         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4495         if (ret < 0) {
4496                 fprintf(stderr, "Error looking up ref %d\n", ret);
4497                 return ret;
4498         }
4499
4500         while (1) {
4501                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4502                         ret = btrfs_next_leaf(root, path);
4503                         if (ret) {
4504                                 fprintf(stderr, "Couldn't find our ref, next\n");
4505                                 return -EINVAL;
4506                         }
4507                 }
4508                 leaf = path->nodes[0];
4509                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4510                 if (key.objectid != dback->owner ||
4511                     key.type != BTRFS_EXTENT_DATA_KEY) {
4512                         fprintf(stderr, "Couldn't find our ref, search\n");
4513                         return -EINVAL;
4514                 }
4515                 fi = btrfs_item_ptr(leaf, path->slots[0],
4516                                     struct btrfs_file_extent_item);
4517                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4518                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4519
4520                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4521                         break;
4522                 path->slots[0]++;
4523         }
4524
4525         btrfs_release_path(path);
4526
4527         /*
4528          * Have to make sure that this root gets updated when we commit the
4529          * transaction
4530          */
4531         root->track_dirty = 1;
4532         if (root->last_trans != trans->transid) {
4533                 root->last_trans = trans->transid;
4534                 root->commit_root = root->node;
4535                 extent_buffer_get(root->node);
4536         }
4537
4538         /*
4539          * Ok we have the key of the file extent we want to fix, now we can cow
4540          * down to the thing and fix it.
4541          */
4542         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4543         if (ret < 0) {
4544                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4545                         key.objectid, key.type, key.offset, ret);
4546                 return ret;
4547         }
4548         if (ret > 0) {
4549                 fprintf(stderr, "Well that's odd, we just found this key "
4550                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4551                         key.offset);
4552                 return -EINVAL;
4553         }
4554         leaf = path->nodes[0];
4555         fi = btrfs_item_ptr(leaf, path->slots[0],
4556                             struct btrfs_file_extent_item);
4557
4558         if (btrfs_file_extent_compression(leaf, fi) &&
4559             dback->disk_bytenr != entry->bytenr) {
4560                 fprintf(stderr, "Ref doesn't match the record start and is "
4561                         "compressed, please take a btrfs-image of this file "
4562                         "system and send it to a btrfs developer so they can "
4563                         "complete this functionality for bytenr %Lu\n",
4564                         dback->disk_bytenr);
4565                 return -EINVAL;
4566         }
4567
4568         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4569                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4570         } else if (dback->disk_bytenr > entry->bytenr) {
4571                 u64 off_diff, offset;
4572
4573                 off_diff = dback->disk_bytenr - entry->bytenr;
4574                 offset = btrfs_file_extent_offset(leaf, fi);
4575                 if (dback->disk_bytenr + offset +
4576                     btrfs_file_extent_num_bytes(leaf, fi) >
4577                     entry->bytenr + entry->bytes) {
4578                         fprintf(stderr, "Ref is past the entry end, please "
4579                                 "take a btrfs-image of this file system and "
4580                                 "send it to a btrfs developer, ref %Lu\n",
4581                                 dback->disk_bytenr);
4582                         return -EINVAL;
4583                 }
4584                 offset += off_diff;
4585                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4586                 btrfs_set_file_extent_offset(leaf, fi, offset);
4587         } else if (dback->disk_bytenr < entry->bytenr) {
4588                 u64 offset;
4589
4590                 offset = btrfs_file_extent_offset(leaf, fi);
4591                 if (dback->disk_bytenr + offset < entry->bytenr) {
4592                         fprintf(stderr, "Ref is before the entry start, please"
4593                                 " take a btrfs-image of this file system and "
4594                                 "send it to a btrfs developer, ref %Lu\n",
4595                                 dback->disk_bytenr);
4596                         return -EINVAL;
4597                 }
4598
4599                 offset += dback->disk_bytenr;
4600                 offset -= entry->bytenr;
4601                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4602                 btrfs_set_file_extent_offset(leaf, fi, offset);
4603         }
4604
4605         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4606
4607         /*
4608          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4609          * only do this if we aren't using compression, otherwise it's a
4610          * trickier case.
4611          */
4612         if (!btrfs_file_extent_compression(leaf, fi))
4613                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4614         else
4615                 printf("ram bytes may be wrong?\n");
4616         btrfs_mark_buffer_dirty(leaf);
4617         btrfs_release_path(path);
4618         return 0;
4619 }
4620
4621 static int verify_backrefs(struct btrfs_trans_handle *trans,
4622                            struct btrfs_fs_info *info, struct btrfs_path *path,
4623                            struct extent_record *rec)
4624 {
4625         struct extent_backref *back;
4626         struct data_backref *dback;
4627         struct extent_entry *entry, *best = NULL;
4628         LIST_HEAD(entries);
4629         int nr_entries = 0;
4630         int broken_entries = 0;
4631         int ret = 0;
4632         short mismatch = 0;
4633
4634         /*
4635          * Metadata is easy and the backrefs should always agree on bytenr and
4636          * size, if not we've got bigger issues.
4637          */
4638         if (rec->metadata)
4639                 return 0;
4640
4641         list_for_each_entry(back, &rec->backrefs, list) {
4642                 dback = (struct data_backref *)back;
4643                 /*
4644                  * We only pay attention to backrefs that we found a real
4645                  * backref for.
4646                  */
4647                 if (dback->found_ref == 0)
4648                         continue;
4649                 if (back->full_backref)
4650                         continue;
4651
4652                 /*
4653                  * For now we only catch when the bytes don't match, not the
4654                  * bytenr.  We can easily do this at the same time, but I want
4655                  * to have a fs image to test on before we just add repair
4656                  * functionality willy-nilly so we know we won't screw up the
4657                  * repair.
4658                  */
4659
4660                 entry = find_entry(&entries, dback->disk_bytenr,
4661                                    dback->bytes);
4662                 if (!entry) {
4663                         entry = malloc(sizeof(struct extent_entry));
4664                         if (!entry) {
4665                                 ret = -ENOMEM;
4666                                 goto out;
4667                         }
4668                         memset(entry, 0, sizeof(*entry));
4669                         entry->bytenr = dback->disk_bytenr;
4670                         entry->bytes = dback->bytes;
4671                         list_add_tail(&entry->list, &entries);
4672                         nr_entries++;
4673                 }
4674
4675                 /*
4676                  * If we only have on entry we may think the entries agree when
4677                  * in reality they don't so we have to do some extra checking.
4678                  */
4679                 if (dback->disk_bytenr != rec->start ||
4680                     dback->bytes != rec->nr || back->broken)
4681                         mismatch = 1;
4682
4683                 if (back->broken) {
4684                         entry->broken++;
4685                         broken_entries++;
4686                 }
4687
4688                 entry->count++;
4689         }
4690
4691         /* Yay all the backrefs agree, carry on good sir */
4692         if (nr_entries <= 1 && !mismatch)
4693                 goto out;
4694
4695         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4696                 "%Lu\n", rec->start);
4697
4698         /*
4699          * First we want to see if the backrefs can agree amongst themselves who
4700          * is right, so figure out which one of the entries has the highest
4701          * count.
4702          */
4703         best = find_most_right_entry(&entries);
4704
4705         /*
4706          * Ok so we may have an even split between what the backrefs think, so
4707          * this is where we use the extent ref to see what it thinks.
4708          */
4709         if (!best) {
4710                 entry = find_entry(&entries, rec->start, rec->nr);
4711                 if (!entry && (!broken_entries || !rec->found_rec)) {
4712                         fprintf(stderr, "Backrefs don't agree with eachother "
4713                                 "and extent record doesn't agree with anybody,"
4714                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4715                                 rec->start, rec->nr);
4716                         ret = -EINVAL;
4717                         goto out;
4718                 } else if (!entry) {
4719                         /*
4720                          * Ok our backrefs were broken, we'll assume this is the
4721                          * correct value and add an entry for this range.
4722                          */
4723                         entry = malloc(sizeof(struct extent_entry));
4724                         if (!entry) {
4725                                 ret = -ENOMEM;
4726                                 goto out;
4727                         }
4728                         memset(entry, 0, sizeof(*entry));
4729                         entry->bytenr = rec->start;
4730                         entry->bytes = rec->nr;
4731                         list_add_tail(&entry->list, &entries);
4732                         nr_entries++;
4733                 }
4734                 entry->count++;
4735                 best = find_most_right_entry(&entries);
4736                 if (!best) {
4737                         fprintf(stderr, "Backrefs and extent record evenly "
4738                                 "split on who is right, this is going to "
4739                                 "require user input to fix bytenr %Lu bytes "
4740                                 "%Lu\n", rec->start, rec->nr);
4741                         ret = -EINVAL;
4742                         goto out;
4743                 }
4744         }
4745
4746         /*
4747          * I don't think this can happen currently as we'll abort() if we catch
4748          * this case higher up, but in case somebody removes that we still can't
4749          * deal with it properly here yet, so just bail out of that's the case.
4750          */
4751         if (best->bytenr != rec->start) {
4752                 fprintf(stderr, "Extent start and backref starts don't match, "
4753                         "please use btrfs-image on this file system and send "
4754                         "it to a btrfs developer so they can make fsck fix "
4755                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
4756                         rec->start, rec->nr);
4757                 ret = -EINVAL;
4758                 goto out;
4759         }
4760
4761         /*
4762          * Ok great we all agreed on an extent record, let's go find the real
4763          * references and fix up the ones that don't match.
4764          */
4765         list_for_each_entry(back, &rec->backrefs, list) {
4766                 dback = (struct data_backref *)back;
4767
4768                 /*
4769                  * Still ignoring backrefs that don't have a real ref attached
4770                  * to them.
4771                  */
4772                 if (dback->found_ref == 0)
4773                         continue;
4774                 if (back->full_backref)
4775                         continue;
4776
4777                 if (dback->bytes == best->bytes &&
4778                     dback->disk_bytenr == best->bytenr)
4779                         continue;
4780
4781                 ret = repair_ref(trans, info, path, dback, best);
4782                 if (ret)
4783                         goto out;
4784         }
4785
4786         /*
4787          * Ok we messed with the actual refs, which means we need to drop our
4788          * entire cache and go back and rescan.  I know this is a huge pain and
4789          * adds a lot of extra work, but it's the only way to be safe.  Once all
4790          * the backrefs agree we may not need to do anything to the extent
4791          * record itself.
4792          */
4793         ret = -EAGAIN;
4794 out:
4795         while (!list_empty(&entries)) {
4796                 entry = list_entry(entries.next, struct extent_entry, list);
4797                 list_del_init(&entry->list);
4798                 free(entry);
4799         }
4800         return ret;
4801 }
4802
4803 static int process_duplicates(struct btrfs_root *root,
4804                               struct cache_tree *extent_cache,
4805                               struct extent_record *rec)
4806 {
4807         struct extent_record *good, *tmp;
4808         struct cache_extent *cache;
4809         int ret;
4810
4811         /*
4812          * If we found a extent record for this extent then return, or if we
4813          * have more than one duplicate we are likely going to need to delete
4814          * something.
4815          */
4816         if (rec->found_rec || rec->num_duplicates > 1)
4817                 return 0;
4818
4819         /* Shouldn't happen but just in case */
4820         BUG_ON(!rec->num_duplicates);
4821
4822         /*
4823          * So this happens if we end up with a backref that doesn't match the
4824          * actual extent entry.  So either the backref is bad or the extent
4825          * entry is bad.  Either way we want to have the extent_record actually
4826          * reflect what we found in the extent_tree, so we need to take the
4827          * duplicate out and use that as the extent_record since the only way we
4828          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
4829          */
4830         remove_cache_extent(extent_cache, &rec->cache);
4831
4832         good = list_entry(rec->dups.next, struct extent_record, list);
4833         list_del_init(&good->list);
4834         INIT_LIST_HEAD(&good->backrefs);
4835         INIT_LIST_HEAD(&good->dups);
4836         good->cache.start = good->start;
4837         good->cache.size = good->nr;
4838         good->content_checked = 0;
4839         good->owner_ref_checked = 0;
4840         good->num_duplicates = 0;
4841         good->refs = rec->refs;
4842         list_splice_init(&rec->backrefs, &good->backrefs);
4843         while (1) {
4844                 cache = lookup_cache_extent(extent_cache, good->start,
4845                                             good->nr);
4846                 if (!cache)
4847                         break;
4848                 tmp = container_of(cache, struct extent_record, cache);
4849
4850                 /*
4851                  * If we find another overlapping extent and it's found_rec is
4852                  * set then it's a duplicate and we need to try and delete
4853                  * something.
4854                  */
4855                 if (tmp->found_rec || tmp->num_duplicates > 0) {
4856                         if (list_empty(&good->list))
4857                                 list_add_tail(&good->list,
4858                                               &duplicate_extents);
4859                         good->num_duplicates += tmp->num_duplicates + 1;
4860                         list_splice_init(&tmp->dups, &good->dups);
4861                         list_del_init(&tmp->list);
4862                         list_add_tail(&tmp->list, &good->dups);
4863                         remove_cache_extent(extent_cache, &tmp->cache);
4864                         continue;
4865                 }
4866
4867                 /*
4868                  * Ok we have another non extent item backed extent rec, so lets
4869                  * just add it to this extent and carry on like we did above.
4870                  */
4871                 good->refs += tmp->refs;
4872                 list_splice_init(&tmp->backrefs, &good->backrefs);
4873                 remove_cache_extent(extent_cache, &tmp->cache);
4874                 free(tmp);
4875         }
4876         ret = insert_cache_extent(extent_cache, &good->cache);
4877         BUG_ON(ret);
4878         free(rec);
4879         return good->num_duplicates ? 0 : 1;
4880 }
4881
4882 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
4883                                     struct btrfs_root *root,
4884                                     struct extent_record *rec)
4885 {
4886         LIST_HEAD(delete_list);
4887         struct btrfs_path *path;
4888         struct extent_record *tmp, *good, *n;
4889         int nr_del = 0;
4890         int ret = 0;
4891         struct btrfs_key key;
4892
4893         path = btrfs_alloc_path();
4894         if (!path) {
4895                 ret = -ENOMEM;
4896                 goto out;
4897         }
4898
4899         good = rec;
4900         /* Find the record that covers all of the duplicates. */
4901         list_for_each_entry(tmp, &rec->dups, list) {
4902                 if (good->start < tmp->start)
4903                         continue;
4904                 if (good->nr > tmp->nr)
4905                         continue;
4906
4907                 if (tmp->start + tmp->nr < good->start + good->nr) {
4908                         fprintf(stderr, "Ok we have overlapping extents that "
4909                                 "aren't completely covered by eachother, this "
4910                                 "is going to require more careful thought.  "
4911                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
4912                                 tmp->start, tmp->nr, good->start, good->nr);
4913                         abort();
4914                 }
4915                 good = tmp;
4916         }
4917
4918         if (good != rec)
4919                 list_add_tail(&rec->list, &delete_list);
4920
4921         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
4922                 if (tmp == good)
4923                         continue;
4924                 list_move_tail(&tmp->list, &delete_list);
4925         }
4926
4927         root = root->fs_info->extent_root;
4928         list_for_each_entry(tmp, &delete_list, list) {
4929                 if (tmp->found_rec == 0)
4930                         continue;
4931                 key.objectid = tmp->start;
4932                 key.type = BTRFS_EXTENT_ITEM_KEY;
4933                 key.offset = tmp->nr;
4934
4935                 /* Shouldn't happen but just in case */
4936                 if (tmp->metadata) {
4937                         fprintf(stderr, "Well this shouldn't happen, extent "
4938                                 "record overlaps but is metadata? "
4939                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
4940                         abort();
4941                 }
4942
4943                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4944                 if (ret) {
4945                         if (ret > 0)
4946                                 ret = -EINVAL;
4947                         goto out;
4948                 }
4949                 ret = btrfs_del_item(trans, root, path);
4950                 if (ret)
4951                         goto out;
4952                 btrfs_release_path(path);
4953                 nr_del++;
4954         }
4955
4956 out:
4957         while (!list_empty(&delete_list)) {
4958                 tmp = list_entry(delete_list.next, struct extent_record, list);
4959                 list_del_init(&tmp->list);
4960                 if (tmp == rec)
4961                         continue;
4962                 free(tmp);
4963         }
4964
4965         while (!list_empty(&rec->dups)) {
4966                 tmp = list_entry(rec->dups.next, struct extent_record, list);
4967                 list_del_init(&tmp->list);
4968                 free(tmp);
4969         }
4970
4971         btrfs_free_path(path);
4972
4973         if (!ret && !nr_del)
4974                 rec->num_duplicates = 0;
4975
4976         return ret ? ret : nr_del;
4977 }
4978
4979 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
4980                                   struct btrfs_fs_info *info,
4981                                   struct btrfs_path *path,
4982                                   struct cache_tree *extent_cache,
4983                                   struct extent_record *rec)
4984 {
4985         struct btrfs_root *root;
4986         struct extent_backref *back;
4987         struct data_backref *dback;
4988         struct cache_extent *cache;
4989         struct btrfs_file_extent_item *fi;
4990         struct btrfs_key key;
4991         u64 bytenr, bytes;
4992         int ret;
4993
4994         list_for_each_entry(back, &rec->backrefs, list) {
4995                 dback = (struct data_backref *)back;
4996
4997                 /* We found this one, we don't need to do a lookup */
4998                 if (dback->found_ref)
4999                         continue;
5000                 /* Don't care about full backrefs (poor unloved backrefs) */
5001                 if (back->full_backref)
5002                         continue;
5003                 key.objectid = dback->root;
5004                 key.type = BTRFS_ROOT_ITEM_KEY;
5005                 key.offset = (u64)-1;
5006
5007                 root = btrfs_read_fs_root(info, &key);
5008
5009                 /* No root, definitely a bad ref, skip */
5010                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5011                         continue;
5012                 /* Other err, exit */
5013                 if (IS_ERR(root))
5014                         return PTR_ERR(root);
5015
5016                 key.objectid = dback->owner;
5017                 key.type = BTRFS_EXTENT_DATA_KEY;
5018                 key.offset = dback->offset;
5019                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5020                 if (ret) {
5021                         btrfs_release_path(path);
5022                         if (ret < 0)
5023                                 return ret;
5024                         /* Didn't find it, we can carry on */
5025                         ret = 0;
5026                         continue;
5027                 }
5028
5029                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5030                                     struct btrfs_file_extent_item);
5031                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5032                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5033                 btrfs_release_path(path);
5034                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5035                 if (cache) {
5036                         struct extent_record *tmp;
5037                         tmp = container_of(cache, struct extent_record, cache);
5038
5039                         /*
5040                          * If we found an extent record for the bytenr for this
5041                          * particular backref then we can't add it to our
5042                          * current extent record.  We only want to add backrefs
5043                          * that don't have a corresponding extent item in the
5044                          * extent tree since they likely belong to this record
5045                          * and we need to fix it if it doesn't match bytenrs.
5046                          */
5047                         if  (tmp->found_rec)
5048                                 continue;
5049                 }
5050
5051                 dback->found_ref += 1;
5052                 dback->disk_bytenr = bytenr;
5053                 dback->bytes = bytes;
5054
5055                 /*
5056                  * Set this so the verify backref code knows not to trust the
5057                  * values in this backref.
5058                  */
5059                 back->broken = 1;
5060         }
5061
5062         return 0;
5063 }
5064
5065 /*
5066  * when an incorrect extent item is found, this will delete
5067  * all of the existing entries for it and recreate them
5068  * based on what the tree scan found.
5069  */
5070 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5071                              struct btrfs_fs_info *info,
5072                              struct cache_tree *extent_cache,
5073                              struct extent_record *rec)
5074 {
5075         int ret;
5076         struct btrfs_path *path;
5077         struct list_head *cur = rec->backrefs.next;
5078         struct cache_extent *cache;
5079         struct extent_backref *back;
5080         int allocated = 0;
5081         u64 flags = 0;
5082
5083         /* remember our flags for recreating the extent */
5084         ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start,
5085                                        rec->max_size, rec->metadata, NULL,
5086                                        &flags);
5087         if (ret < 0)
5088                 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5089
5090         path = btrfs_alloc_path();
5091         if (!path)
5092                 return -ENOMEM;
5093
5094         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5095                 /*
5096                  * Sometimes the backrefs themselves are so broken they don't
5097                  * get attached to any meaningful rec, so first go back and
5098                  * check any of our backrefs that we couldn't find and throw
5099                  * them into the list if we find the backref so that
5100                  * verify_backrefs can figure out what to do.
5101                  */
5102                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5103                                              rec);
5104                 if (ret < 0)
5105                         goto out;
5106         }
5107
5108         /* step one, make sure all of the backrefs agree */
5109         ret = verify_backrefs(trans, info, path, rec);
5110         if (ret < 0)
5111                 goto out;
5112
5113         /* step two, delete all the existing records */
5114         ret = delete_extent_records(trans, info->extent_root, path,
5115                                     rec->start, rec->max_size);
5116
5117         if (ret < 0)
5118                 goto out;
5119
5120         /* was this block corrupt?  If so, don't add references to it */
5121         cache = lookup_cache_extent(info->corrupt_blocks,
5122                                     rec->start, rec->max_size);
5123         if (cache) {
5124                 ret = 0;
5125                 goto out;
5126         }
5127
5128         /* step three, recreate all the refs we did find */
5129         while(cur != &rec->backrefs) {
5130                 back = list_entry(cur, struct extent_backref, list);
5131                 cur = cur->next;
5132
5133                 /*
5134                  * if we didn't find any references, don't create a
5135                  * new extent record
5136                  */
5137                 if (!back->found_ref)
5138                         continue;
5139
5140                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5141                 allocated = 1;
5142
5143                 if (ret)
5144                         goto out;
5145         }
5146 out:
5147         btrfs_free_path(path);
5148         return ret;
5149 }
5150
5151 /* right now we only prune from the extent allocation tree */
5152 static int prune_one_block(struct btrfs_trans_handle *trans,
5153                            struct btrfs_fs_info *info,
5154                            struct btrfs_corrupt_block *corrupt)
5155 {
5156         int ret;
5157         struct btrfs_path path;
5158         struct extent_buffer *eb;
5159         u64 found;
5160         int slot;
5161         int nritems;
5162         int level = corrupt->level + 1;
5163
5164         btrfs_init_path(&path);
5165 again:
5166         /* we want to stop at the parent to our busted block */
5167         path.lowest_level = level;
5168
5169         ret = btrfs_search_slot(trans, info->extent_root,
5170                                 &corrupt->key, &path, -1, 1);
5171
5172         if (ret < 0)
5173                 goto out;
5174
5175         eb = path.nodes[level];
5176         if (!eb) {
5177                 ret = -ENOENT;
5178                 goto out;
5179         }
5180
5181         /*
5182          * hopefully the search gave us the block we want to prune,
5183          * lets try that first
5184          */
5185         slot = path.slots[level];
5186         found =  btrfs_node_blockptr(eb, slot);
5187         if (found == corrupt->cache.start)
5188                 goto del_ptr;
5189
5190         nritems = btrfs_header_nritems(eb);
5191
5192         /* the search failed, lets scan this node and hope we find it */
5193         for (slot = 0; slot < nritems; slot++) {
5194                 found =  btrfs_node_blockptr(eb, slot);
5195                 if (found == corrupt->cache.start)
5196                         goto del_ptr;
5197         }
5198         /*
5199          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5200          * to this block
5201          */
5202         if (eb == info->extent_root->node) {
5203                 ret = -ENOENT;
5204                 goto out;
5205         } else {
5206                 level++;
5207                 btrfs_release_path(&path);
5208                 goto again;
5209         }
5210
5211 del_ptr:
5212         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5213         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5214
5215 out:
5216         btrfs_release_path(&path);
5217         return ret;
5218 }
5219
5220 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5221                                 struct btrfs_fs_info *info)
5222 {
5223         struct cache_extent *cache;
5224         struct btrfs_corrupt_block *corrupt;
5225
5226         cache = search_cache_extent(info->corrupt_blocks, 0);
5227         while (1) {
5228                 if (!cache)
5229                         break;
5230                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5231                 prune_one_block(trans, info, corrupt);
5232                 cache = next_cache_extent(cache);
5233         }
5234         return 0;
5235 }
5236
5237 static void free_corrupt_block(struct cache_extent *cache)
5238 {
5239         struct btrfs_corrupt_block *corrupt;
5240
5241         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5242         free(corrupt);
5243 }
5244
5245 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5246
5247 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5248 {
5249         struct btrfs_block_group_cache *cache;
5250         u64 start, end;
5251         int ret;
5252
5253         while (1) {
5254                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5255                                             &start, &end, EXTENT_DIRTY);
5256                 if (ret)
5257                         break;
5258                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5259                                    GFP_NOFS);
5260         }
5261
5262         start = 0;
5263         while (1) {
5264                 cache = btrfs_lookup_first_block_group(fs_info, start);
5265                 if (!cache)
5266                         break;
5267                 if (cache->cached)
5268                         cache->cached = 0;
5269                 start = cache->key.objectid + cache->key.offset;
5270         }
5271 }
5272
5273 static int check_extent_refs(struct btrfs_trans_handle *trans,
5274                              struct btrfs_root *root,
5275                              struct cache_tree *extent_cache)
5276 {
5277         struct extent_record *rec;
5278         struct cache_extent *cache;
5279         int err = 0;
5280         int ret = 0;
5281         int fixed = 0;
5282         int had_dups = 0;
5283
5284         if (repair) {
5285                 /*
5286                  * if we're doing a repair, we have to make sure
5287                  * we don't allocate from the problem extents.
5288                  * In the worst case, this will be all the
5289                  * extents in the FS
5290                  */
5291                 cache = search_cache_extent(extent_cache, 0);
5292                 while(cache) {
5293                         rec = container_of(cache, struct extent_record, cache);
5294                         btrfs_pin_extent(root->fs_info,
5295                                          rec->start, rec->max_size);
5296                         cache = next_cache_extent(cache);
5297                 }
5298
5299                 /* pin down all the corrupted blocks too */
5300                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5301                 while(cache) {
5302                         btrfs_pin_extent(root->fs_info,
5303                                          cache->start, cache->size);
5304                         cache = next_cache_extent(cache);
5305                 }
5306                 prune_corrupt_blocks(trans, root->fs_info);
5307                 reset_cached_block_groups(root->fs_info);
5308         }
5309
5310         /*
5311          * We need to delete any duplicate entries we find first otherwise we
5312          * could mess up the extent tree when we have backrefs that actually
5313          * belong to a different extent item and not the weird duplicate one.
5314          */
5315         while (repair && !list_empty(&duplicate_extents)) {
5316                 rec = list_entry(duplicate_extents.next, struct extent_record,
5317                                  list);
5318                 list_del_init(&rec->list);
5319
5320                 /* Sometimes we can find a backref before we find an actual
5321                  * extent, so we need to process it a little bit to see if there
5322                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5323                  * if this is a backref screwup.  If we need to delete stuff
5324                  * process_duplicates() will return 0, otherwise it will return
5325                  * 1 and we
5326                  */
5327                 if (process_duplicates(root, extent_cache, rec))
5328                         continue;
5329                 ret = delete_duplicate_records(trans, root, rec);
5330                 if (ret < 0)
5331                         return ret;
5332                 /*
5333                  * delete_duplicate_records will return the number of entries
5334                  * deleted, so if it's greater than 0 then we know we actually
5335                  * did something and we need to remove.
5336                  */
5337                 if (ret)
5338                         had_dups = 1;
5339         }
5340
5341         if (had_dups)
5342                 return -EAGAIN;
5343
5344         while(1) {
5345                 fixed = 0;
5346                 cache = search_cache_extent(extent_cache, 0);
5347                 if (!cache)
5348                         break;
5349                 rec = container_of(cache, struct extent_record, cache);
5350                 if (rec->num_duplicates) {
5351                         fprintf(stderr, "extent item %llu has multiple extent "
5352                                 "items\n", (unsigned long long)rec->start);
5353                         err = 1;
5354                 }
5355
5356                 if (rec->refs != rec->extent_item_refs) {
5357                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5358                                 (unsigned long long)rec->start,
5359                                 (unsigned long long)rec->nr);
5360                         fprintf(stderr, "extent item %llu, found %llu\n",
5361                                 (unsigned long long)rec->extent_item_refs,
5362                                 (unsigned long long)rec->refs);
5363                         if (!fixed && repair) {
5364                                 ret = fixup_extent_refs(trans, root->fs_info,
5365                                                         extent_cache, rec);
5366                                 if (ret)
5367                                         goto repair_abort;
5368                                 fixed = 1;
5369                         }
5370                         err = 1;
5371
5372                 }
5373                 if (all_backpointers_checked(rec, 1)) {
5374                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5375                                 (unsigned long long)rec->start,
5376                                 (unsigned long long)rec->nr);
5377
5378                         if (!fixed && repair) {
5379                                 ret = fixup_extent_refs(trans, root->fs_info,
5380                                                         extent_cache, rec);
5381                                 if (ret)
5382                                         goto repair_abort;
5383                                 fixed = 1;
5384                         }
5385
5386                         err = 1;
5387                 }
5388                 if (!rec->owner_ref_checked) {
5389                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5390                                 (unsigned long long)rec->start,
5391                                 (unsigned long long)rec->nr);
5392                         if (!fixed && repair) {
5393                                 ret = fixup_extent_refs(trans, root->fs_info,
5394                                                         extent_cache, rec);
5395                                 if (ret)
5396                                         goto repair_abort;
5397                                 fixed = 1;
5398                         }
5399                         err = 1;
5400                 }
5401
5402                 remove_cache_extent(extent_cache, cache);
5403                 free_all_extent_backrefs(rec);
5404                 free(rec);
5405         }
5406 repair_abort:
5407         if (repair) {
5408                 if (ret && ret != -EAGAIN) {
5409                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5410                         exit(1);
5411                 } else if (!ret) {
5412                         btrfs_fix_block_accounting(trans, root);
5413                 }
5414                 if (err)
5415                         fprintf(stderr, "repaired damaged extent references\n");
5416                 return ret;
5417         }
5418         return err;
5419 }
5420
5421 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5422 {
5423         u64 stripe_size;
5424
5425         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5426                 stripe_size = length;
5427                 stripe_size /= num_stripes;
5428         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5429                 stripe_size = length * 2;
5430                 stripe_size /= num_stripes;
5431         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5432                 stripe_size = length;
5433                 stripe_size /= (num_stripes - 1);
5434         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5435                 stripe_size = length;
5436                 stripe_size /= (num_stripes - 2);
5437         } else {
5438                 stripe_size = length;
5439         }
5440         return stripe_size;
5441 }
5442
5443 static int check_chunk_refs(struct chunk_record *chunk_rec,
5444                             struct block_group_tree *block_group_cache,
5445                             struct device_extent_tree *dev_extent_cache,
5446                             int silent)
5447 {
5448         struct cache_extent *block_group_item;
5449         struct block_group_record *block_group_rec;
5450         struct cache_extent *dev_extent_item;
5451         struct device_extent_record *dev_extent_rec;
5452         u64 devid;
5453         u64 offset;
5454         u64 length;
5455         int i;
5456         int ret = 0;
5457
5458         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5459                                                chunk_rec->offset,
5460                                                chunk_rec->length);
5461         if (block_group_item) {
5462                 block_group_rec = container_of(block_group_item,
5463                                                struct block_group_record,
5464                                                cache);
5465                 if (chunk_rec->length != block_group_rec->offset ||
5466                     chunk_rec->offset != block_group_rec->objectid ||
5467                     chunk_rec->type_flags != block_group_rec->flags) {
5468                         if (!silent)
5469                                 fprintf(stderr,
5470                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5471                                         chunk_rec->objectid,
5472                                         chunk_rec->type,
5473                                         chunk_rec->offset,
5474                                         chunk_rec->length,
5475                                         chunk_rec->offset,
5476                                         chunk_rec->type_flags,
5477                                         block_group_rec->objectid,
5478                                         block_group_rec->type,
5479                                         block_group_rec->offset,
5480                                         block_group_rec->offset,
5481                                         block_group_rec->objectid,
5482                                         block_group_rec->flags);
5483                         ret = -1;
5484                 } else {
5485                         list_del_init(&block_group_rec->list);
5486                         chunk_rec->bg_rec = block_group_rec;
5487                 }
5488         } else {
5489                 if (!silent)
5490                         fprintf(stderr,
5491                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5492                                 chunk_rec->objectid,
5493                                 chunk_rec->type,
5494                                 chunk_rec->offset,
5495                                 chunk_rec->length,
5496                                 chunk_rec->offset,
5497                                 chunk_rec->type_flags);
5498                 ret = -1;
5499         }
5500
5501         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5502                                     chunk_rec->num_stripes);
5503         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5504                 devid = chunk_rec->stripes[i].devid;
5505                 offset = chunk_rec->stripes[i].offset;
5506                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5507                                                        devid, offset, length);
5508                 if (dev_extent_item) {
5509                         dev_extent_rec = container_of(dev_extent_item,
5510                                                 struct device_extent_record,
5511                                                 cache);
5512                         if (dev_extent_rec->objectid != devid ||
5513                             dev_extent_rec->offset != offset ||
5514                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5515                             dev_extent_rec->length != length) {
5516                                 if (!silent)
5517                                         fprintf(stderr,
5518                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5519                                                 chunk_rec->objectid,
5520                                                 chunk_rec->type,
5521                                                 chunk_rec->offset,
5522                                                 chunk_rec->stripes[i].devid,
5523                                                 chunk_rec->stripes[i].offset,
5524                                                 dev_extent_rec->objectid,
5525                                                 dev_extent_rec->offset,
5526                                                 dev_extent_rec->length);
5527                                 ret = -1;
5528                         } else {
5529                                 list_move(&dev_extent_rec->chunk_list,
5530                                           &chunk_rec->dextents);
5531                         }
5532                 } else {
5533                         if (!silent)
5534                                 fprintf(stderr,
5535                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5536                                         chunk_rec->objectid,
5537                                         chunk_rec->type,
5538                                         chunk_rec->offset,
5539                                         chunk_rec->stripes[i].devid,
5540                                         chunk_rec->stripes[i].offset);
5541                         ret = -1;
5542                 }
5543         }
5544         return ret;
5545 }
5546
5547 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5548 int check_chunks(struct cache_tree *chunk_cache,
5549                  struct block_group_tree *block_group_cache,
5550                  struct device_extent_tree *dev_extent_cache,
5551                  struct list_head *good, struct list_head *bad, int silent)
5552 {
5553         struct cache_extent *chunk_item;
5554         struct chunk_record *chunk_rec;
5555         struct block_group_record *bg_rec;
5556         struct device_extent_record *dext_rec;
5557         int err;
5558         int ret = 0;
5559
5560         chunk_item = first_cache_extent(chunk_cache);
5561         while (chunk_item) {
5562                 chunk_rec = container_of(chunk_item, struct chunk_record,
5563                                          cache);
5564                 err = check_chunk_refs(chunk_rec, block_group_cache,
5565                                        dev_extent_cache, silent);
5566                 if (err) {
5567                         ret = err;
5568                         if (bad)
5569                                 list_add_tail(&chunk_rec->list, bad);
5570                 } else {
5571                         if (good)
5572                                 list_add_tail(&chunk_rec->list, good);
5573                 }
5574
5575                 chunk_item = next_cache_extent(chunk_item);
5576         }
5577
5578         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5579                 if (!silent)
5580                         fprintf(stderr,
5581                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5582                                 bg_rec->objectid,
5583                                 bg_rec->offset,
5584                                 bg_rec->flags);
5585                 if (!ret)
5586                         ret = 1;
5587         }
5588
5589         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5590                             chunk_list) {
5591                 if (!silent)
5592                         fprintf(stderr,
5593                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5594                                 dext_rec->objectid,
5595                                 dext_rec->offset,
5596                                 dext_rec->length);
5597                 if (!ret)
5598                         ret = 1;
5599         }
5600         return ret;
5601 }
5602
5603
5604 static int check_device_used(struct device_record *dev_rec,
5605                              struct device_extent_tree *dext_cache)
5606 {
5607         struct cache_extent *cache;
5608         struct device_extent_record *dev_extent_rec;
5609         u64 total_byte = 0;
5610
5611         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5612         while (cache) {
5613                 dev_extent_rec = container_of(cache,
5614                                               struct device_extent_record,
5615                                               cache);
5616                 if (dev_extent_rec->objectid != dev_rec->devid)
5617                         break;
5618
5619                 list_del(&dev_extent_rec->device_list);
5620                 total_byte += dev_extent_rec->length;
5621                 cache = next_cache_extent(cache);
5622         }
5623
5624         if (total_byte != dev_rec->byte_used) {
5625                 fprintf(stderr,
5626                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5627                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5628                         dev_rec->type, dev_rec->offset);
5629                 return -1;
5630         } else {
5631                 return 0;
5632         }
5633 }
5634
5635 /* check btrfs_dev_item -> btrfs_dev_extent */
5636 static int check_devices(struct rb_root *dev_cache,
5637                          struct device_extent_tree *dev_extent_cache)
5638 {
5639         struct rb_node *dev_node;
5640         struct device_record *dev_rec;
5641         struct device_extent_record *dext_rec;
5642         int err;
5643         int ret = 0;
5644
5645         dev_node = rb_first(dev_cache);
5646         while (dev_node) {
5647                 dev_rec = container_of(dev_node, struct device_record, node);
5648                 err = check_device_used(dev_rec, dev_extent_cache);
5649                 if (err)
5650                         ret = err;
5651
5652                 dev_node = rb_next(dev_node);
5653         }
5654         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5655                             device_list) {
5656                 fprintf(stderr,
5657                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5658                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5659                 if (!ret)
5660                         ret = 1;
5661         }
5662         return ret;
5663 }
5664
5665 static int check_chunks_and_extents(struct btrfs_root *root)
5666 {
5667         struct rb_root dev_cache;
5668         struct cache_tree chunk_cache;
5669         struct block_group_tree block_group_cache;
5670         struct device_extent_tree dev_extent_cache;
5671         struct cache_tree extent_cache;
5672         struct cache_tree seen;
5673         struct cache_tree pending;
5674         struct cache_tree reada;
5675         struct cache_tree nodes;
5676         struct cache_tree corrupt_blocks;
5677         struct btrfs_path path;
5678         struct btrfs_key key;
5679         struct btrfs_key found_key;
5680         int ret, err = 0;
5681         u64 last = 0;
5682         struct block_info *bits;
5683         int bits_nr;
5684         struct extent_buffer *leaf;
5685         struct btrfs_trans_handle *trans = NULL;
5686         int slot;
5687         struct btrfs_root_item ri;
5688         struct list_head dropping_trees;
5689
5690         dev_cache = RB_ROOT;
5691         cache_tree_init(&chunk_cache);
5692         block_group_tree_init(&block_group_cache);
5693         device_extent_tree_init(&dev_extent_cache);
5694
5695         cache_tree_init(&extent_cache);
5696         cache_tree_init(&seen);
5697         cache_tree_init(&pending);
5698         cache_tree_init(&nodes);
5699         cache_tree_init(&reada);
5700         cache_tree_init(&corrupt_blocks);
5701         INIT_LIST_HEAD(&dropping_trees);
5702
5703         if (repair) {
5704                 trans = btrfs_start_transaction(root, 1);
5705                 if (IS_ERR(trans)) {
5706                         fprintf(stderr, "Error starting transaction\n");
5707                         return PTR_ERR(trans);
5708                 }
5709                 root->fs_info->fsck_extent_cache = &extent_cache;
5710                 root->fs_info->free_extent_hook = free_extent_hook;
5711                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5712         }
5713
5714         bits_nr = 1024;
5715         bits = malloc(bits_nr * sizeof(struct block_info));
5716         if (!bits) {
5717                 perror("malloc");
5718                 exit(1);
5719         }
5720
5721 again:
5722         add_root_to_pending(root->fs_info->tree_root->node,
5723                             &extent_cache, &pending, &seen, &nodes,
5724                             &root->fs_info->tree_root->root_key);
5725
5726         add_root_to_pending(root->fs_info->chunk_root->node,
5727                             &extent_cache, &pending, &seen, &nodes,
5728                             &root->fs_info->chunk_root->root_key);
5729
5730         btrfs_init_path(&path);
5731         key.offset = 0;
5732         key.objectid = 0;
5733         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
5734         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
5735                                         &key, &path, 0, 0);
5736         BUG_ON(ret < 0);
5737         while(1) {
5738                 leaf = path.nodes[0];
5739                 slot = path.slots[0];
5740                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
5741                         ret = btrfs_next_leaf(root, &path);
5742                         if (ret != 0)
5743                                 break;
5744                         leaf = path.nodes[0];
5745                         slot = path.slots[0];
5746                 }
5747                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
5748                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
5749                         unsigned long offset;
5750                         struct extent_buffer *buf;
5751
5752                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5753                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
5754                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
5755                                 buf = read_tree_block(root->fs_info->tree_root,
5756                                                       btrfs_root_bytenr(&ri),
5757                                                       btrfs_level_size(root,
5758                                                       btrfs_root_level(&ri)),
5759                                                       0);
5760                                 add_root_to_pending(buf, &extent_cache,
5761                                                     &pending, &seen, &nodes,
5762                                                     &found_key);
5763                                 free_extent_buffer(buf);
5764                         } else {
5765                                 struct dropping_root_item_record *dri_rec;
5766                                 dri_rec = malloc(sizeof(*dri_rec));
5767                                 if (!dri_rec) {
5768                                         perror("malloc");
5769                                         exit(1);
5770                                 }
5771                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
5772                                 memcpy(&dri_rec->found_key, &found_key,
5773                                        sizeof(found_key));
5774                                 list_add_tail(&dri_rec->list, &dropping_trees);
5775                         }
5776                 }
5777                 path.slots[0]++;
5778         }
5779         btrfs_release_path(&path);
5780         while (1) {
5781                 ret = run_next_block(trans, root, bits, bits_nr, &last,
5782                                      &pending, &seen, &reada, &nodes,
5783                                      &extent_cache, &chunk_cache, &dev_cache,
5784                                      &block_group_cache, &dev_extent_cache,
5785                                      NULL);
5786                 if (ret != 0)
5787                         break;
5788         }
5789
5790         while (!list_empty(&dropping_trees)) {
5791                 struct dropping_root_item_record *rec;
5792                 struct extent_buffer *buf;
5793                 rec = list_entry(dropping_trees.next,
5794                                  struct dropping_root_item_record, list);
5795                 last = 0;
5796                 if (!bits) {
5797                         perror("realloc");
5798                         exit(1);
5799                 }
5800                 buf = read_tree_block(root->fs_info->tree_root,
5801                                       btrfs_root_bytenr(&rec->ri),
5802                                       btrfs_level_size(root,
5803                                       btrfs_root_level(&rec->ri)), 0);
5804                 add_root_to_pending(buf, &extent_cache, &pending,
5805                                     &seen, &nodes, &rec->found_key);
5806                 while (1) {
5807                         ret = run_next_block(trans, root, bits, bits_nr, &last,
5808                                              &pending, &seen, &reada,
5809                                              &nodes, &extent_cache,
5810                                              &chunk_cache, &dev_cache,
5811                                              &block_group_cache,
5812                                              &dev_extent_cache,
5813                                              &rec->ri);
5814                         if (ret != 0)
5815                                 break;
5816                 }
5817                 free_extent_buffer(buf);
5818                 list_del(&rec->list);
5819                 free(rec);
5820         }
5821
5822         if (ret >= 0)
5823                 ret = check_extent_refs(trans, root, &extent_cache);
5824         if (ret == -EAGAIN) {
5825                 ret = btrfs_commit_transaction(trans, root);
5826                 if (ret)
5827                         goto out;
5828
5829                 trans = btrfs_start_transaction(root, 1);
5830                 if (IS_ERR(trans)) {
5831                         ret = PTR_ERR(trans);
5832                         goto out;
5833                 }
5834
5835                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5836                 free_extent_cache_tree(&seen);
5837                 free_extent_cache_tree(&pending);
5838                 free_extent_cache_tree(&reada);
5839                 free_extent_cache_tree(&nodes);
5840                 free_extent_record_cache(root->fs_info, &extent_cache);
5841                 goto again;
5842         }
5843
5844         err = check_chunks(&chunk_cache, &block_group_cache,
5845                            &dev_extent_cache, NULL, NULL, 0);
5846         if (err && !ret)
5847                 ret = err;
5848
5849         err = check_devices(&dev_cache, &dev_extent_cache);
5850         if (err && !ret)
5851                 ret = err;
5852
5853         if (trans) {
5854                 err = btrfs_commit_transaction(trans, root);
5855                 if (!ret)
5856                         ret = err;
5857         }
5858 out:
5859         if (repair) {
5860                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
5861                 root->fs_info->fsck_extent_cache = NULL;
5862                 root->fs_info->free_extent_hook = NULL;
5863                 root->fs_info->corrupt_blocks = NULL;
5864         }
5865         free(bits);
5866         free_chunk_cache_tree(&chunk_cache);
5867         free_device_cache_tree(&dev_cache);
5868         free_block_group_tree(&block_group_cache);
5869         free_device_extent_tree(&dev_extent_cache);
5870         return ret;
5871 }
5872
5873 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
5874                            struct btrfs_root *root, int overwrite)
5875 {
5876         struct extent_buffer *c;
5877         struct extent_buffer *old = root->node;
5878         int level;
5879         struct btrfs_disk_key disk_key = {0,0,0};
5880
5881         level = 0;
5882
5883         if (overwrite) {
5884                 c = old;
5885                 extent_buffer_get(c);
5886                 goto init;
5887         }
5888         c = btrfs_alloc_free_block(trans, root,
5889                                    btrfs_level_size(root, 0),
5890                                    root->root_key.objectid,
5891                                    &disk_key, level, 0, 0);
5892         if (IS_ERR(c)) {
5893                 c = old;
5894                 extent_buffer_get(c);
5895         }
5896 init:
5897         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
5898         btrfs_set_header_level(c, level);
5899         btrfs_set_header_bytenr(c, c->start);
5900         btrfs_set_header_generation(c, trans->transid);
5901         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
5902         btrfs_set_header_owner(c, root->root_key.objectid);
5903
5904         write_extent_buffer(c, root->fs_info->fsid,
5905                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
5906
5907         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
5908                             btrfs_header_chunk_tree_uuid(c),
5909                             BTRFS_UUID_SIZE);
5910
5911         btrfs_mark_buffer_dirty(c);
5912
5913         free_extent_buffer(old);
5914         root->node = c;
5915         add_root_to_dirty_list(root);
5916         return 0;
5917 }
5918
5919 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
5920                                 struct extent_buffer *eb, int tree_root)
5921 {
5922         struct extent_buffer *tmp;
5923         struct btrfs_root_item *ri;
5924         struct btrfs_key key;
5925         u64 bytenr;
5926         u32 leafsize;
5927         int level = btrfs_header_level(eb);
5928         int nritems;
5929         int ret;
5930         int i;
5931
5932         btrfs_pin_extent(fs_info, eb->start, eb->len);
5933
5934         leafsize = btrfs_super_leafsize(fs_info->super_copy);
5935         nritems = btrfs_header_nritems(eb);
5936         for (i = 0; i < nritems; i++) {
5937                 if (level == 0) {
5938                         btrfs_item_key_to_cpu(eb, &key, i);
5939                         if (key.type != BTRFS_ROOT_ITEM_KEY)
5940                                 continue;
5941                         /* Skip the extent root and reloc roots */
5942                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
5943                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
5944                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
5945                                 continue;
5946                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
5947                         bytenr = btrfs_disk_root_bytenr(eb, ri);
5948
5949                         /*
5950                          * If at any point we start needing the real root we
5951                          * will have to build a stump root for the root we are
5952                          * in, but for now this doesn't actually use the root so
5953                          * just pass in extent_root.
5954                          */
5955                         tmp = read_tree_block(fs_info->extent_root, bytenr,
5956                                               leafsize, 0);
5957                         if (!tmp) {
5958                                 fprintf(stderr, "Error reading root block\n");
5959                                 return -EIO;
5960                         }
5961                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
5962                         free_extent_buffer(tmp);
5963                         if (ret)
5964                                 return ret;
5965                 } else {
5966                         bytenr = btrfs_node_blockptr(eb, i);
5967
5968                         /* If we aren't the tree root don't read the block */
5969                         if (level == 1 && !tree_root) {
5970                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
5971                                 continue;
5972                         }
5973
5974                         tmp = read_tree_block(fs_info->extent_root, bytenr,
5975                                               leafsize, 0);
5976                         if (!tmp) {
5977                                 fprintf(stderr, "Error reading tree block\n");
5978                                 return -EIO;
5979                         }
5980                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
5981                         free_extent_buffer(tmp);
5982                         if (ret)
5983                                 return ret;
5984                 }
5985         }
5986
5987         return 0;
5988 }
5989
5990 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
5991 {
5992         int ret;
5993
5994         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
5995         if (ret)
5996                 return ret;
5997
5998         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
5999 }
6000
6001 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6002 {
6003         struct btrfs_path *path;
6004         struct extent_buffer *leaf;
6005         struct btrfs_chunk *chunk;
6006         struct btrfs_key key;
6007         int ret;
6008
6009         path = btrfs_alloc_path();
6010         if (!path)
6011                 return -ENOMEM;
6012
6013         key.objectid = 0;
6014         key.type = BTRFS_CHUNK_ITEM_KEY;
6015         key.offset = 0;
6016
6017         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6018         if (ret < 0) {
6019                 btrfs_free_path(path);
6020                 return ret;
6021         }
6022
6023         /*
6024          * We do this in case the block groups were screwed up and had alloc
6025          * bits that aren't actually set on the chunks.  This happens with
6026          * restored images every time and could happen in real life I guess.
6027          */
6028         fs_info->avail_data_alloc_bits = 0;
6029         fs_info->avail_metadata_alloc_bits = 0;
6030         fs_info->avail_system_alloc_bits = 0;
6031
6032         /* First we need to create the in-memory block groups */
6033         while (1) {
6034                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6035                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6036                         if (ret < 0) {
6037                                 btrfs_free_path(path);
6038                                 return ret;
6039                         }
6040                         if (ret) {
6041                                 ret = 0;
6042                                 break;
6043                         }
6044                 }
6045                 leaf = path->nodes[0];
6046                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6047                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6048                         path->slots[0]++;
6049                         continue;
6050                 }
6051
6052                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6053                                        struct btrfs_chunk);
6054                 btrfs_add_block_group(fs_info, 0,
6055                                       btrfs_chunk_type(leaf, chunk),
6056                                       key.objectid, key.offset,
6057                                       btrfs_chunk_length(leaf, chunk));
6058                 path->slots[0]++;
6059         }
6060
6061         btrfs_free_path(path);
6062         return 0;
6063 }
6064
6065 static int reset_balance(struct btrfs_trans_handle *trans,
6066                          struct btrfs_fs_info *fs_info)
6067 {
6068         struct btrfs_root *root = fs_info->tree_root;
6069         struct btrfs_path *path;
6070         struct extent_buffer *leaf;
6071         struct btrfs_key key;
6072         int del_slot, del_nr = 0;
6073         int ret;
6074         int found = 0;
6075
6076         path = btrfs_alloc_path();
6077         if (!path)
6078                 return -ENOMEM;
6079
6080         key.objectid = BTRFS_BALANCE_OBJECTID;
6081         key.type = BTRFS_BALANCE_ITEM_KEY;
6082         key.offset = 0;
6083
6084         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6085         if (ret) {
6086                 if (ret > 0)
6087                         ret = 0;
6088                 goto out;
6089         }
6090
6091         ret = btrfs_del_item(trans, root, path);
6092         if (ret)
6093                 goto out;
6094         btrfs_release_path(path);
6095
6096         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6097         key.type = BTRFS_ROOT_ITEM_KEY;
6098         key.offset = 0;
6099
6100         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6101         if (ret < 0)
6102                 goto out;
6103         while (1) {
6104                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6105                         if (!found)
6106                                 break;
6107
6108                         if (del_nr) {
6109                                 ret = btrfs_del_items(trans, root, path,
6110                                                       del_slot, del_nr);
6111                                 del_nr = 0;
6112                                 if (ret)
6113                                         goto out;
6114                         }
6115                         key.offset++;
6116                         btrfs_release_path(path);
6117
6118                         found = 0;
6119                         ret = btrfs_search_slot(trans, root, &key, path,
6120                                                 -1, 1);
6121                         if (ret < 0)
6122                                 goto out;
6123                         continue;
6124                 }
6125                 found = 1;
6126                 leaf = path->nodes[0];
6127                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6128                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6129                         break;
6130                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6131                         path->slots[0]++;
6132                         continue;
6133                 }
6134                 if (!del_nr) {
6135                         del_slot = path->slots[0];
6136                         del_nr = 1;
6137                 } else {
6138                         del_nr++;
6139                 }
6140                 path->slots[0]++;
6141         }
6142
6143         if (del_nr) {
6144                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6145                 if (ret)
6146                         goto out;
6147         }
6148         btrfs_release_path(path);
6149
6150         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6151         key.type = BTRFS_ROOT_ITEM_KEY;
6152         key.offset = (u64)-1;
6153         root = btrfs_read_fs_root(fs_info, &key);
6154         if (IS_ERR(root)) {
6155                 fprintf(stderr, "Error reading data reloc tree\n");
6156                 return PTR_ERR(root);
6157         }
6158         root->track_dirty = 1;
6159         if (root->last_trans != trans->transid) {
6160                 root->last_trans = trans->transid;
6161                 root->commit_root = root->node;
6162                 extent_buffer_get(root->node);
6163         }
6164         ret = btrfs_fsck_reinit_root(trans, root, 0);
6165 out:
6166         btrfs_free_path(path);
6167         return ret;
6168 }
6169
6170 static int reinit_extent_tree(struct btrfs_fs_info *fs_info)
6171 {
6172         struct btrfs_trans_handle *trans;
6173         u64 start = 0;
6174         int ret;
6175
6176         /*
6177          * The only reason we don't do this is because right now we're just
6178          * walking the trees we find and pinning down their bytes, we don't look
6179          * at any of the leaves.  In order to do mixed groups we'd have to check
6180          * the leaves of any fs roots and pin down the bytes for any file
6181          * extents we find.  Not hard but why do it if we don't have to?
6182          */
6183         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6184                 fprintf(stderr, "We don't support re-initing the extent tree "
6185                         "for mixed block groups yet, please notify a btrfs "
6186                         "developer you want to do this so they can add this "
6187                         "functionality.\n");
6188                 return -EINVAL;
6189         }
6190
6191         trans = btrfs_start_transaction(fs_info->extent_root, 1);
6192         if (IS_ERR(trans)) {
6193                 fprintf(stderr, "Error starting transaction\n");
6194                 return PTR_ERR(trans);
6195         }
6196
6197         /*
6198          * first we need to walk all of the trees except the extent tree and pin
6199          * down the bytes that are in use so we don't overwrite any existing
6200          * metadata.
6201          */
6202         ret = pin_metadata_blocks(fs_info);
6203         if (ret) {
6204                 fprintf(stderr, "error pinning down used bytes\n");
6205                 return ret;
6206         }
6207
6208         /*
6209          * Need to drop all the block groups since we're going to recreate all
6210          * of them again.
6211          */
6212         btrfs_free_block_groups(fs_info);
6213         ret = reset_block_groups(fs_info);
6214         if (ret) {
6215                 fprintf(stderr, "error resetting the block groups\n");
6216                 return ret;
6217         }
6218
6219         ret = reset_balance(trans, fs_info);
6220         if (ret) {
6221                 fprintf(stderr, "error reseting the pending balance\n");
6222                 return ret;
6223         }
6224
6225         /* Ok we can allocate now, reinit the extent root */
6226         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6227         if (ret) {
6228                 fprintf(stderr, "extent root initialization failed\n");
6229                 /*
6230                  * When the transaction code is updated we should end the
6231                  * transaction, but for now progs only knows about commit so
6232                  * just return an error.
6233                  */
6234                 return ret;
6235         }
6236
6237         /*
6238          * Now we have all the in-memory block groups setup so we can make
6239          * allocations properly, and the metadata we care about is safe since we
6240          * pinned all of it above.
6241          */
6242         while (1) {
6243                 struct btrfs_block_group_cache *cache;
6244
6245                 cache = btrfs_lookup_first_block_group(fs_info, start);
6246                 if (!cache)
6247                         break;
6248                 start = cache->key.objectid + cache->key.offset;
6249                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6250                                         &cache->key, &cache->item,
6251                                         sizeof(cache->item));
6252                 if (ret) {
6253                         fprintf(stderr, "Error adding block group\n");
6254                         return ret;
6255                 }
6256                 btrfs_extent_post_op(trans, fs_info->extent_root);
6257         }
6258
6259         /*
6260          * Ok now we commit and run the normal fsck, which will add extent
6261          * entries for all of the items it finds.
6262          */
6263         return btrfs_commit_transaction(trans, fs_info->extent_root);
6264 }
6265
6266 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6267 {
6268         struct btrfs_path *path;
6269         struct btrfs_trans_handle *trans;
6270         struct btrfs_key key;
6271         int ret;
6272
6273         printf("Recowing metadata block %llu\n", eb->start);
6274         key.objectid = btrfs_header_owner(eb);
6275         key.type = BTRFS_ROOT_ITEM_KEY;
6276         key.offset = (u64)-1;
6277
6278         root = btrfs_read_fs_root(root->fs_info, &key);
6279         if (IS_ERR(root)) {
6280                 fprintf(stderr, "Couldn't find owner root %llu\n",
6281                         key.objectid);
6282                 return PTR_ERR(root);
6283         }
6284
6285         path = btrfs_alloc_path();
6286         if (!path)
6287                 return -ENOMEM;
6288
6289         trans = btrfs_start_transaction(root, 1);
6290         if (IS_ERR(trans)) {
6291                 btrfs_free_path(path);
6292                 return PTR_ERR(trans);
6293         }
6294
6295         path->lowest_level = btrfs_header_level(eb);
6296         if (path->lowest_level)
6297                 btrfs_node_key_to_cpu(eb, &key, 0);
6298         else
6299                 btrfs_item_key_to_cpu(eb, &key, 0);
6300
6301         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6302         btrfs_commit_transaction(trans, root);
6303         btrfs_free_path(path);
6304         return ret;
6305 }
6306
6307 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6308 {
6309         struct btrfs_path *path;
6310         struct btrfs_trans_handle *trans;
6311         struct btrfs_key key;
6312         int ret;
6313
6314         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6315                bad->key.type, bad->key.offset);
6316         key.objectid = bad->root_id;
6317         key.type = BTRFS_ROOT_ITEM_KEY;
6318         key.offset = (u64)-1;
6319
6320         root = btrfs_read_fs_root(root->fs_info, &key);
6321         if (IS_ERR(root)) {
6322                 fprintf(stderr, "Couldn't find owner root %llu\n",
6323                         key.objectid);
6324                 return PTR_ERR(root);
6325         }
6326
6327         path = btrfs_alloc_path();
6328         if (!path)
6329                 return -ENOMEM;
6330
6331         trans = btrfs_start_transaction(root, 1);
6332         if (IS_ERR(trans)) {
6333                 btrfs_free_path(path);
6334                 return PTR_ERR(trans);
6335         }
6336
6337         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6338         if (ret) {
6339                 if (ret > 0)
6340                         ret = 0;
6341                 goto out;
6342         }
6343         ret = btrfs_del_item(trans, root, path);
6344 out:
6345         btrfs_commit_transaction(trans, root);
6346         btrfs_free_path(path);
6347         return ret;
6348 }
6349
6350 static struct option long_options[] = {
6351         { "super", 1, NULL, 's' },
6352         { "repair", 0, NULL, 0 },
6353         { "init-csum-tree", 0, NULL, 0 },
6354         { "init-extent-tree", 0, NULL, 0 },
6355         { "backup", 0, NULL, 0 },
6356         { NULL, 0, NULL, 0}
6357 };
6358
6359 const char * const cmd_check_usage[] = {
6360         "btrfs check [options] <device>",
6361         "Check an unmounted btrfs filesystem.",
6362         "",
6363         "-s|--super <superblock>     use this superblock copy",
6364         "-b|--backup                 use the backup root copy",
6365         "--repair                    try to repair the filesystem",
6366         "--init-csum-tree            create a new CRC tree",
6367         "--init-extent-tree          create a new extent tree",
6368         NULL
6369 };
6370
6371 int cmd_check(int argc, char **argv)
6372 {
6373         struct cache_tree root_cache;
6374         struct btrfs_root *root;
6375         struct btrfs_fs_info *info;
6376         u64 bytenr = 0;
6377         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6378         int ret;
6379         int num;
6380         int option_index = 0;
6381         int init_csum_tree = 0;
6382         int init_extent_tree = 0;
6383         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_PARTIAL;
6384
6385         while(1) {
6386                 int c;
6387                 c = getopt_long(argc, argv, "as:b", long_options,
6388                                 &option_index);
6389                 if (c < 0)
6390                         break;
6391                 switch(c) {
6392                         case 'a': /* ignored */ break;
6393                         case 'b':
6394                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6395                                 break;
6396                         case 's':
6397                                 num = atol(optarg);
6398                                 bytenr = btrfs_sb_offset(num);
6399                                 printf("using SB copy %d, bytenr %llu\n", num,
6400                                        (unsigned long long)bytenr);
6401                                 break;
6402                         case '?':
6403                         case 'h':
6404                                 usage(cmd_check_usage);
6405                 }
6406                 if (option_index == 1) {
6407                         printf("enabling repair mode\n");
6408                         repair = 1;
6409                         ctree_flags |= OPEN_CTREE_WRITES;
6410                 } else if (option_index == 2) {
6411                         printf("Creating a new CRC tree\n");
6412                         init_csum_tree = 1;
6413                         repair = 1;
6414                         ctree_flags |= OPEN_CTREE_WRITES;
6415                 } else if (option_index == 3) {
6416                         init_extent_tree = 1;
6417                         ctree_flags |= (OPEN_CTREE_WRITES |
6418                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6419                         repair = 1;
6420                 }
6421
6422         }
6423         argc = argc - optind;
6424
6425         if (argc != 1)
6426                 usage(cmd_check_usage);
6427
6428         radix_tree_init();
6429         cache_tree_init(&root_cache);
6430
6431         if((ret = check_mounted(argv[optind])) < 0) {
6432                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6433                 return ret;
6434         } else if(ret) {
6435                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6436                 return -EBUSY;
6437         }
6438
6439         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6440         if (!info) {
6441                 fprintf(stderr, "Couldn't open file system\n");
6442                 return -EIO;
6443         }
6444
6445         uuid_unparse(info->super_copy->fsid, uuidbuf);
6446         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6447
6448         if (!extent_buffer_uptodate(info->tree_root->node) ||
6449             !extent_buffer_uptodate(info->dev_root->node) ||
6450             !extent_buffer_uptodate(info->chunk_root->node)) {
6451                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6452                 return -EIO;
6453         }
6454
6455         root = info->fs_root;
6456         if (init_extent_tree) {
6457                 printf("Creating a new extent tree\n");
6458                 ret = reinit_extent_tree(info);
6459                 if (ret)
6460                         return ret;
6461         }
6462         if (!extent_buffer_uptodate(info->extent_root->node)) {
6463                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6464                 return -EIO;
6465         }
6466
6467         fprintf(stderr, "checking extents\n");
6468         if (init_csum_tree) {
6469                 struct btrfs_trans_handle *trans;
6470
6471                 fprintf(stderr, "Reinit crc root\n");
6472                 trans = btrfs_start_transaction(info->csum_root, 1);
6473                 if (IS_ERR(trans)) {
6474                         fprintf(stderr, "Error starting transaction\n");
6475                         return PTR_ERR(trans);
6476                 }
6477
6478                 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6479                 if (ret) {
6480                         fprintf(stderr, "crc root initialization failed\n");
6481                         return -EIO;
6482                 }
6483
6484                 ret = btrfs_commit_transaction(trans, info->csum_root);
6485                 if (ret)
6486                         exit(1);
6487
6488                 ret = check_chunks_and_extents(root);
6489                 if (ret)
6490                         fprintf(stderr,
6491                                 "Errors found in extent allocation tree or chunk allocation\n");
6492                 goto out;
6493         }
6494         ret = check_chunks_and_extents(root);
6495         if (ret)
6496                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
6497
6498         fprintf(stderr, "checking free space cache\n");
6499         ret = check_space_cache(root);
6500         if (ret)
6501                 goto out;
6502
6503         fprintf(stderr, "checking fs roots\n");
6504         ret = check_fs_roots(root, &root_cache);
6505         if (ret)
6506                 goto out;
6507
6508         fprintf(stderr, "checking csums\n");
6509         ret = check_csums(root);
6510         if (ret)
6511                 goto out;
6512
6513         fprintf(stderr, "checking root refs\n");
6514         ret = check_root_refs(root, &root_cache);
6515         if (ret)
6516                 goto out;
6517
6518         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
6519                 struct extent_buffer *eb;
6520
6521                 eb = list_first_entry(&root->fs_info->recow_ebs,
6522                                       struct extent_buffer, recow);
6523                 ret = recow_extent_buffer(root, eb);
6524                 if (ret)
6525                         break;
6526         }
6527
6528         while (!list_empty(&delete_items)) {
6529                 struct bad_item *bad;
6530
6531                 bad = list_first_entry(&delete_items, struct bad_item, list);
6532                 list_del_init(&bad->list);
6533                 if (repair)
6534                         ret = delete_bad_item(root, bad);
6535                 free(bad);
6536         }
6537
6538         if (!list_empty(&root->fs_info->recow_ebs)) {
6539                 fprintf(stderr, "Transid errors in file system\n");
6540                 ret = 1;
6541         }
6542 out:
6543         free_root_recs_tree(&root_cache);
6544         close_ctree(root);
6545
6546         if (found_old_backref) { /*
6547                  * there was a disk format change when mixed
6548                  * backref was in testing tree. The old format
6549                  * existed about one week.
6550                  */
6551                 printf("\n * Found old mixed backref format. "
6552                        "The old format is not supported! *"
6553                        "\n * Please mount the FS in readonly mode, "
6554                        "backup data and re-format the FS. *\n\n");
6555                 ret = 1;
6556         }
6557         printf("found %llu bytes used err is %d\n",
6558                (unsigned long long)bytes_used, ret);
6559         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
6560         printf("total tree bytes: %llu\n",
6561                (unsigned long long)total_btree_bytes);
6562         printf("total fs tree bytes: %llu\n",
6563                (unsigned long long)total_fs_tree_bytes);
6564         printf("total extent tree bytes: %llu\n",
6565                (unsigned long long)total_extent_tree_bytes);
6566         printf("btree space waste bytes: %llu\n",
6567                (unsigned long long)btree_space_waste);
6568         printf("file data blocks allocated: %llu\n referenced %llu\n",
6569                 (unsigned long long)data_bytes_allocated,
6570                 (unsigned long long)data_bytes_referenced);
6571         printf("%s\n", BTRFS_BUILD_VERSION);
6572         return ret;
6573 }