Btrfs-progs: fsck: deal with corrupted csum root
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 struct tree_backref {
84         struct extent_backref node;
85         union {
86                 u64 parent;
87                 u64 root;
88         };
89 };
90
91 struct extent_record {
92         struct list_head backrefs;
93         struct list_head dups;
94         struct list_head list;
95         struct cache_extent cache;
96         struct btrfs_disk_key parent_key;
97         u64 start;
98         u64 max_size;
99         u64 nr;
100         u64 refs;
101         u64 extent_item_refs;
102         u64 generation;
103         u64 parent_generation;
104         u64 info_objectid;
105         u32 num_duplicates;
106         u8 info_level;
107         unsigned int found_rec:1;
108         unsigned int content_checked:1;
109         unsigned int owner_ref_checked:1;
110         unsigned int is_root:1;
111         unsigned int metadata:1;
112 };
113
114 struct inode_backref {
115         struct list_head list;
116         unsigned int found_dir_item:1;
117         unsigned int found_dir_index:1;
118         unsigned int found_inode_ref:1;
119         unsigned int filetype:8;
120         int errors;
121         unsigned int ref_type;
122         u64 dir;
123         u64 index;
124         u16 namelen;
125         char name[0];
126 };
127
128 struct dropping_root_item_record {
129         struct list_head list;
130         struct btrfs_root_item ri;
131         struct btrfs_key found_key;
132 };
133
134 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
135 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
136 #define REF_ERR_NO_INODE_REF            (1 << 2)
137 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
138 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
139 #define REF_ERR_DUP_INODE_REF           (1 << 5)
140 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
141 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
142 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
143 #define REF_ERR_NO_ROOT_REF             (1 << 9)
144 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
145 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
146 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
147
148 struct inode_record {
149         struct list_head backrefs;
150         unsigned int checked:1;
151         unsigned int merging:1;
152         unsigned int found_inode_item:1;
153         unsigned int found_dir_item:1;
154         unsigned int found_file_extent:1;
155         unsigned int found_csum_item:1;
156         unsigned int some_csum_missing:1;
157         unsigned int nodatasum:1;
158         int errors;
159
160         u64 ino;
161         u32 nlink;
162         u32 imode;
163         u64 isize;
164         u64 nbytes;
165
166         u32 found_link;
167         u64 found_size;
168         u64 extent_start;
169         u64 extent_end;
170         u64 first_extent_gap;
171
172         u32 refs;
173 };
174
175 #define I_ERR_NO_INODE_ITEM             (1 << 0)
176 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
177 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
178 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
179 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
180 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
181 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
182 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
183 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
184 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
185 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
186 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
187 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
188 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
189
190 struct root_backref {
191         struct list_head list;
192         unsigned int found_dir_item:1;
193         unsigned int found_dir_index:1;
194         unsigned int found_back_ref:1;
195         unsigned int found_forward_ref:1;
196         unsigned int reachable:1;
197         int errors;
198         u64 ref_root;
199         u64 dir;
200         u64 index;
201         u16 namelen;
202         char name[0];
203 };
204
205 struct root_record {
206         struct list_head backrefs;
207         struct cache_extent cache;
208         unsigned int found_root_item:1;
209         u64 objectid;
210         u32 found_ref;
211 };
212
213 struct ptr_node {
214         struct cache_extent cache;
215         void *data;
216 };
217
218 struct shared_node {
219         struct cache_extent cache;
220         struct cache_tree root_cache;
221         struct cache_tree inode_cache;
222         struct inode_record *current;
223         u32 refs;
224 };
225
226 struct block_info {
227         u64 start;
228         u32 size;
229 };
230
231 struct walk_control {
232         struct cache_tree shared;
233         struct shared_node *nodes[BTRFS_MAX_LEVEL];
234         int active_node;
235         int root_level;
236 };
237
238 struct bad_item {
239         struct btrfs_key key;
240         u64 root_id;
241         struct list_head list;
242 };
243
244 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
245
246 static void record_root_in_trans(struct btrfs_trans_handle *trans,
247                                  struct btrfs_root *root)
248 {
249         if (root->last_trans != trans->transid) {
250                 root->track_dirty = 1;
251                 root->last_trans = trans->transid;
252                 root->commit_root = root->node;
253                 extent_buffer_get(root->node);
254         }
255 }
256
257 static u8 imode_to_type(u32 imode)
258 {
259 #define S_SHIFT 12
260         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
261                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
262                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
263                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
264                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
265                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
266                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
267                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
268         };
269
270         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
271 #undef S_SHIFT
272 }
273
274 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
275 {
276         struct device_record *rec1;
277         struct device_record *rec2;
278
279         rec1 = rb_entry(node1, struct device_record, node);
280         rec2 = rb_entry(node2, struct device_record, node);
281         if (rec1->devid > rec2->devid)
282                 return -1;
283         else if (rec1->devid < rec2->devid)
284                 return 1;
285         else
286                 return 0;
287 }
288
289 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
290 {
291         struct inode_record *rec;
292         struct inode_backref *backref;
293         struct inode_backref *orig;
294         size_t size;
295
296         rec = malloc(sizeof(*rec));
297         memcpy(rec, orig_rec, sizeof(*rec));
298         rec->refs = 1;
299         INIT_LIST_HEAD(&rec->backrefs);
300
301         list_for_each_entry(orig, &orig_rec->backrefs, list) {
302                 size = sizeof(*orig) + orig->namelen + 1;
303                 backref = malloc(size);
304                 memcpy(backref, orig, size);
305                 list_add_tail(&backref->list, &rec->backrefs);
306         }
307         return rec;
308 }
309
310 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
311 {
312         u64 root_objectid = root->root_key.objectid;
313         int errors = rec->errors;
314
315         if (!errors)
316                 return;
317         /* reloc root errors, we print its corresponding fs root objectid*/
318         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
319                 root_objectid = root->root_key.offset;
320                 fprintf(stderr, "reloc");
321         }
322         fprintf(stderr, "root %llu inode %llu errors %x",
323                 (unsigned long long) root_objectid,
324                 (unsigned long long) rec->ino, rec->errors);
325
326         if (errors & I_ERR_NO_INODE_ITEM)
327                 fprintf(stderr, ", no inode item");
328         if (errors & I_ERR_NO_ORPHAN_ITEM)
329                 fprintf(stderr, ", no orphan item");
330         if (errors & I_ERR_DUP_INODE_ITEM)
331                 fprintf(stderr, ", dup inode item");
332         if (errors & I_ERR_DUP_DIR_INDEX)
333                 fprintf(stderr, ", dup dir index");
334         if (errors & I_ERR_ODD_DIR_ITEM)
335                 fprintf(stderr, ", odd dir item");
336         if (errors & I_ERR_ODD_FILE_EXTENT)
337                 fprintf(stderr, ", odd file extent");
338         if (errors & I_ERR_BAD_FILE_EXTENT)
339                 fprintf(stderr, ", bad file extent");
340         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
341                 fprintf(stderr, ", file extent overlap");
342         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
343                 fprintf(stderr, ", file extent discount");
344         if (errors & I_ERR_DIR_ISIZE_WRONG)
345                 fprintf(stderr, ", dir isize wrong");
346         if (errors & I_ERR_FILE_NBYTES_WRONG)
347                 fprintf(stderr, ", nbytes wrong");
348         if (errors & I_ERR_ODD_CSUM_ITEM)
349                 fprintf(stderr, ", odd csum item");
350         if (errors & I_ERR_SOME_CSUM_MISSING)
351                 fprintf(stderr, ", some csum missing");
352         if (errors & I_ERR_LINK_COUNT_WRONG)
353                 fprintf(stderr, ", link count wrong");
354         fprintf(stderr, "\n");
355 }
356
357 static void print_ref_error(int errors)
358 {
359         if (errors & REF_ERR_NO_DIR_ITEM)
360                 fprintf(stderr, ", no dir item");
361         if (errors & REF_ERR_NO_DIR_INDEX)
362                 fprintf(stderr, ", no dir index");
363         if (errors & REF_ERR_NO_INODE_REF)
364                 fprintf(stderr, ", no inode ref");
365         if (errors & REF_ERR_DUP_DIR_ITEM)
366                 fprintf(stderr, ", dup dir item");
367         if (errors & REF_ERR_DUP_DIR_INDEX)
368                 fprintf(stderr, ", dup dir index");
369         if (errors & REF_ERR_DUP_INODE_REF)
370                 fprintf(stderr, ", dup inode ref");
371         if (errors & REF_ERR_INDEX_UNMATCH)
372                 fprintf(stderr, ", index unmatch");
373         if (errors & REF_ERR_FILETYPE_UNMATCH)
374                 fprintf(stderr, ", filetype unmatch");
375         if (errors & REF_ERR_NAME_TOO_LONG)
376                 fprintf(stderr, ", name too long");
377         if (errors & REF_ERR_NO_ROOT_REF)
378                 fprintf(stderr, ", no root ref");
379         if (errors & REF_ERR_NO_ROOT_BACKREF)
380                 fprintf(stderr, ", no root backref");
381         if (errors & REF_ERR_DUP_ROOT_REF)
382                 fprintf(stderr, ", dup root ref");
383         if (errors & REF_ERR_DUP_ROOT_BACKREF)
384                 fprintf(stderr, ", dup root backref");
385         fprintf(stderr, "\n");
386 }
387
388 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
389                                           u64 ino, int mod)
390 {
391         struct ptr_node *node;
392         struct cache_extent *cache;
393         struct inode_record *rec = NULL;
394         int ret;
395
396         cache = lookup_cache_extent(inode_cache, ino, 1);
397         if (cache) {
398                 node = container_of(cache, struct ptr_node, cache);
399                 rec = node->data;
400                 if (mod && rec->refs > 1) {
401                         node->data = clone_inode_rec(rec);
402                         rec->refs--;
403                         rec = node->data;
404                 }
405         } else if (mod) {
406                 rec = calloc(1, sizeof(*rec));
407                 rec->ino = ino;
408                 rec->extent_start = (u64)-1;
409                 rec->first_extent_gap = (u64)-1;
410                 rec->refs = 1;
411                 INIT_LIST_HEAD(&rec->backrefs);
412
413                 node = malloc(sizeof(*node));
414                 node->cache.start = ino;
415                 node->cache.size = 1;
416                 node->data = rec;
417
418                 if (ino == BTRFS_FREE_INO_OBJECTID)
419                         rec->found_link = 1;
420
421                 ret = insert_cache_extent(inode_cache, &node->cache);
422                 BUG_ON(ret);
423         }
424         return rec;
425 }
426
427 static void free_inode_rec(struct inode_record *rec)
428 {
429         struct inode_backref *backref;
430
431         if (--rec->refs > 0)
432                 return;
433
434         while (!list_empty(&rec->backrefs)) {
435                 backref = list_entry(rec->backrefs.next,
436                                      struct inode_backref, list);
437                 list_del(&backref->list);
438                 free(backref);
439         }
440         free(rec);
441 }
442
443 static int can_free_inode_rec(struct inode_record *rec)
444 {
445         if (!rec->errors && rec->checked && rec->found_inode_item &&
446             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
447                 return 1;
448         return 0;
449 }
450
451 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
452                                  struct inode_record *rec)
453 {
454         struct cache_extent *cache;
455         struct inode_backref *tmp, *backref;
456         struct ptr_node *node;
457         unsigned char filetype;
458
459         if (!rec->found_inode_item)
460                 return;
461
462         filetype = imode_to_type(rec->imode);
463         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
464                 if (backref->found_dir_item && backref->found_dir_index) {
465                         if (backref->filetype != filetype)
466                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
467                         if (!backref->errors && backref->found_inode_ref) {
468                                 list_del(&backref->list);
469                                 free(backref);
470                         }
471                 }
472         }
473
474         if (!rec->checked || rec->merging)
475                 return;
476
477         if (S_ISDIR(rec->imode)) {
478                 if (rec->found_size != rec->isize)
479                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
480                 if (rec->found_file_extent)
481                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
482         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
483                 if (rec->found_dir_item)
484                         rec->errors |= I_ERR_ODD_DIR_ITEM;
485                 if (rec->found_size != rec->nbytes)
486                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
487                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
488                         rec->first_extent_gap = 0;
489                 if (rec->nlink > 0 && !no_holes &&
490                     (rec->extent_end < rec->isize ||
491                      rec->first_extent_gap < rec->isize))
492                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
493         }
494
495         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
496                 if (rec->found_csum_item && rec->nodatasum)
497                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
498                 if (rec->some_csum_missing && !rec->nodatasum)
499                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
500         }
501
502         BUG_ON(rec->refs != 1);
503         if (can_free_inode_rec(rec)) {
504                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
505                 node = container_of(cache, struct ptr_node, cache);
506                 BUG_ON(node->data != rec);
507                 remove_cache_extent(inode_cache, &node->cache);
508                 free(node);
509                 free_inode_rec(rec);
510         }
511 }
512
513 static int check_orphan_item(struct btrfs_root *root, u64 ino)
514 {
515         struct btrfs_path path;
516         struct btrfs_key key;
517         int ret;
518
519         key.objectid = BTRFS_ORPHAN_OBJECTID;
520         key.type = BTRFS_ORPHAN_ITEM_KEY;
521         key.offset = ino;
522
523         btrfs_init_path(&path);
524         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
525         btrfs_release_path(&path);
526         if (ret > 0)
527                 ret = -ENOENT;
528         return ret;
529 }
530
531 static int process_inode_item(struct extent_buffer *eb,
532                               int slot, struct btrfs_key *key,
533                               struct shared_node *active_node)
534 {
535         struct inode_record *rec;
536         struct btrfs_inode_item *item;
537
538         rec = active_node->current;
539         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
540         if (rec->found_inode_item) {
541                 rec->errors |= I_ERR_DUP_INODE_ITEM;
542                 return 1;
543         }
544         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
545         rec->nlink = btrfs_inode_nlink(eb, item);
546         rec->isize = btrfs_inode_size(eb, item);
547         rec->nbytes = btrfs_inode_nbytes(eb, item);
548         rec->imode = btrfs_inode_mode(eb, item);
549         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
550                 rec->nodatasum = 1;
551         rec->found_inode_item = 1;
552         if (rec->nlink == 0)
553                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
554         maybe_free_inode_rec(&active_node->inode_cache, rec);
555         return 0;
556 }
557
558 static struct inode_backref *get_inode_backref(struct inode_record *rec,
559                                                 const char *name,
560                                                 int namelen, u64 dir)
561 {
562         struct inode_backref *backref;
563
564         list_for_each_entry(backref, &rec->backrefs, list) {
565                 if (backref->dir != dir || backref->namelen != namelen)
566                         continue;
567                 if (memcmp(name, backref->name, namelen))
568                         continue;
569                 return backref;
570         }
571
572         backref = malloc(sizeof(*backref) + namelen + 1);
573         memset(backref, 0, sizeof(*backref));
574         backref->dir = dir;
575         backref->namelen = namelen;
576         memcpy(backref->name, name, namelen);
577         backref->name[namelen] = '\0';
578         list_add_tail(&backref->list, &rec->backrefs);
579         return backref;
580 }
581
582 static int add_inode_backref(struct cache_tree *inode_cache,
583                              u64 ino, u64 dir, u64 index,
584                              const char *name, int namelen,
585                              int filetype, int itemtype, int errors)
586 {
587         struct inode_record *rec;
588         struct inode_backref *backref;
589
590         rec = get_inode_rec(inode_cache, ino, 1);
591         backref = get_inode_backref(rec, name, namelen, dir);
592         if (errors)
593                 backref->errors |= errors;
594         if (itemtype == BTRFS_DIR_INDEX_KEY) {
595                 if (backref->found_dir_index)
596                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
597                 if (backref->found_inode_ref && backref->index != index)
598                         backref->errors |= REF_ERR_INDEX_UNMATCH;
599                 if (backref->found_dir_item && backref->filetype != filetype)
600                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
601
602                 backref->index = index;
603                 backref->filetype = filetype;
604                 backref->found_dir_index = 1;
605         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
606                 rec->found_link++;
607                 if (backref->found_dir_item)
608                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
609                 if (backref->found_dir_index && backref->filetype != filetype)
610                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
611
612                 backref->filetype = filetype;
613                 backref->found_dir_item = 1;
614         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
615                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
616                 if (backref->found_inode_ref)
617                         backref->errors |= REF_ERR_DUP_INODE_REF;
618                 if (backref->found_dir_index && backref->index != index)
619                         backref->errors |= REF_ERR_INDEX_UNMATCH;
620
621                 backref->ref_type = itemtype;
622                 backref->index = index;
623                 backref->found_inode_ref = 1;
624         } else {
625                 BUG_ON(1);
626         }
627
628         maybe_free_inode_rec(inode_cache, rec);
629         return 0;
630 }
631
632 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
633                             struct cache_tree *dst_cache)
634 {
635         struct inode_backref *backref;
636         u32 dir_count = 0;
637
638         dst->merging = 1;
639         list_for_each_entry(backref, &src->backrefs, list) {
640                 if (backref->found_dir_index) {
641                         add_inode_backref(dst_cache, dst->ino, backref->dir,
642                                         backref->index, backref->name,
643                                         backref->namelen, backref->filetype,
644                                         BTRFS_DIR_INDEX_KEY, backref->errors);
645                 }
646                 if (backref->found_dir_item) {
647                         dir_count++;
648                         add_inode_backref(dst_cache, dst->ino,
649                                         backref->dir, 0, backref->name,
650                                         backref->namelen, backref->filetype,
651                                         BTRFS_DIR_ITEM_KEY, backref->errors);
652                 }
653                 if (backref->found_inode_ref) {
654                         add_inode_backref(dst_cache, dst->ino,
655                                         backref->dir, backref->index,
656                                         backref->name, backref->namelen, 0,
657                                         backref->ref_type, backref->errors);
658                 }
659         }
660
661         if (src->found_dir_item)
662                 dst->found_dir_item = 1;
663         if (src->found_file_extent)
664                 dst->found_file_extent = 1;
665         if (src->found_csum_item)
666                 dst->found_csum_item = 1;
667         if (src->some_csum_missing)
668                 dst->some_csum_missing = 1;
669         if (dst->first_extent_gap > src->first_extent_gap)
670                 dst->first_extent_gap = src->first_extent_gap;
671
672         BUG_ON(src->found_link < dir_count);
673         dst->found_link += src->found_link - dir_count;
674         dst->found_size += src->found_size;
675         if (src->extent_start != (u64)-1) {
676                 if (dst->extent_start == (u64)-1) {
677                         dst->extent_start = src->extent_start;
678                         dst->extent_end = src->extent_end;
679                 } else {
680                         if (dst->extent_end > src->extent_start)
681                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
682                         else if (dst->extent_end < src->extent_start &&
683                                  dst->extent_end < dst->first_extent_gap)
684                                 dst->first_extent_gap = dst->extent_end;
685                         if (dst->extent_end < src->extent_end)
686                                 dst->extent_end = src->extent_end;
687                 }
688         }
689
690         dst->errors |= src->errors;
691         if (src->found_inode_item) {
692                 if (!dst->found_inode_item) {
693                         dst->nlink = src->nlink;
694                         dst->isize = src->isize;
695                         dst->nbytes = src->nbytes;
696                         dst->imode = src->imode;
697                         dst->nodatasum = src->nodatasum;
698                         dst->found_inode_item = 1;
699                 } else {
700                         dst->errors |= I_ERR_DUP_INODE_ITEM;
701                 }
702         }
703         dst->merging = 0;
704
705         return 0;
706 }
707
708 static int splice_shared_node(struct shared_node *src_node,
709                               struct shared_node *dst_node)
710 {
711         struct cache_extent *cache;
712         struct ptr_node *node, *ins;
713         struct cache_tree *src, *dst;
714         struct inode_record *rec, *conflict;
715         u64 current_ino = 0;
716         int splice = 0;
717         int ret;
718
719         if (--src_node->refs == 0)
720                 splice = 1;
721         if (src_node->current)
722                 current_ino = src_node->current->ino;
723
724         src = &src_node->root_cache;
725         dst = &dst_node->root_cache;
726 again:
727         cache = search_cache_extent(src, 0);
728         while (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 cache = next_cache_extent(cache);
732
733                 if (splice) {
734                         remove_cache_extent(src, &node->cache);
735                         ins = node;
736                 } else {
737                         ins = malloc(sizeof(*ins));
738                         ins->cache.start = node->cache.start;
739                         ins->cache.size = node->cache.size;
740                         ins->data = rec;
741                         rec->refs++;
742                 }
743                 ret = insert_cache_extent(dst, &ins->cache);
744                 if (ret == -EEXIST) {
745                         conflict = get_inode_rec(dst, rec->ino, 1);
746                         merge_inode_recs(rec, conflict, dst);
747                         if (rec->checked) {
748                                 conflict->checked = 1;
749                                 if (dst_node->current == conflict)
750                                         dst_node->current = NULL;
751                         }
752                         maybe_free_inode_rec(dst, conflict);
753                         free_inode_rec(rec);
754                         free(ins);
755                 } else {
756                         BUG_ON(ret);
757                 }
758         }
759
760         if (src == &src_node->root_cache) {
761                 src = &src_node->inode_cache;
762                 dst = &dst_node->inode_cache;
763                 goto again;
764         }
765
766         if (current_ino > 0 && (!dst_node->current ||
767             current_ino > dst_node->current->ino)) {
768                 if (dst_node->current) {
769                         dst_node->current->checked = 1;
770                         maybe_free_inode_rec(dst, dst_node->current);
771                 }
772                 dst_node->current = get_inode_rec(dst, current_ino, 1);
773         }
774         return 0;
775 }
776
777 static void free_inode_ptr(struct cache_extent *cache)
778 {
779         struct ptr_node *node;
780         struct inode_record *rec;
781
782         node = container_of(cache, struct ptr_node, cache);
783         rec = node->data;
784         free_inode_rec(rec);
785         free(node);
786 }
787
788 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
789
790 static struct shared_node *find_shared_node(struct cache_tree *shared,
791                                             u64 bytenr)
792 {
793         struct cache_extent *cache;
794         struct shared_node *node;
795
796         cache = lookup_cache_extent(shared, bytenr, 1);
797         if (cache) {
798                 node = container_of(cache, struct shared_node, cache);
799                 return node;
800         }
801         return NULL;
802 }
803
804 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
805 {
806         int ret;
807         struct shared_node *node;
808
809         node = calloc(1, sizeof(*node));
810         node->cache.start = bytenr;
811         node->cache.size = 1;
812         cache_tree_init(&node->root_cache);
813         cache_tree_init(&node->inode_cache);
814         node->refs = refs;
815
816         ret = insert_cache_extent(shared, &node->cache);
817         BUG_ON(ret);
818         return 0;
819 }
820
821 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
822                              struct walk_control *wc, int level)
823 {
824         struct shared_node *node;
825         struct shared_node *dest;
826
827         if (level == wc->active_node)
828                 return 0;
829
830         BUG_ON(wc->active_node <= level);
831         node = find_shared_node(&wc->shared, bytenr);
832         if (!node) {
833                 add_shared_node(&wc->shared, bytenr, refs);
834                 node = find_shared_node(&wc->shared, bytenr);
835                 wc->nodes[level] = node;
836                 wc->active_node = level;
837                 return 0;
838         }
839
840         if (wc->root_level == wc->active_node &&
841             btrfs_root_refs(&root->root_item) == 0) {
842                 if (--node->refs == 0) {
843                         free_inode_recs_tree(&node->root_cache);
844                         free_inode_recs_tree(&node->inode_cache);
845                         remove_cache_extent(&wc->shared, &node->cache);
846                         free(node);
847                 }
848                 return 1;
849         }
850
851         dest = wc->nodes[wc->active_node];
852         splice_shared_node(node, dest);
853         if (node->refs == 0) {
854                 remove_cache_extent(&wc->shared, &node->cache);
855                 free(node);
856         }
857         return 1;
858 }
859
860 static int leave_shared_node(struct btrfs_root *root,
861                              struct walk_control *wc, int level)
862 {
863         struct shared_node *node;
864         struct shared_node *dest;
865         int i;
866
867         if (level == wc->root_level)
868                 return 0;
869
870         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
871                 if (wc->nodes[i])
872                         break;
873         }
874         BUG_ON(i >= BTRFS_MAX_LEVEL);
875
876         node = wc->nodes[wc->active_node];
877         wc->nodes[wc->active_node] = NULL;
878         wc->active_node = i;
879
880         dest = wc->nodes[wc->active_node];
881         if (wc->active_node < wc->root_level ||
882             btrfs_root_refs(&root->root_item) > 0) {
883                 BUG_ON(node->refs <= 1);
884                 splice_shared_node(node, dest);
885         } else {
886                 BUG_ON(node->refs < 2);
887                 node->refs--;
888         }
889         return 0;
890 }
891
892 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
893                          u64 child_root_id)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         struct extent_buffer *leaf;
898         int has_parent = 0;
899         int ret;
900
901         btrfs_init_path(&path);
902
903         key.objectid = parent_root_id;
904         key.type = BTRFS_ROOT_REF_KEY;
905         key.offset = child_root_id;
906         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
907                                 0, 0);
908         if (ret < 0)
909                 return ret;
910         btrfs_release_path(&path);
911         if (!ret)
912                 return 1;
913
914         key.objectid = child_root_id;
915         key.type = BTRFS_ROOT_BACKREF_KEY;
916         key.offset = 0;
917         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
918                                 0, 0);
919         if (ret < 0)
920                 goto out;
921
922         while (1) {
923                 leaf = path.nodes[0];
924                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
925                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
926                         if (ret)
927                                 break;
928                         leaf = path.nodes[0];
929                 }
930
931                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
932                 if (key.objectid != child_root_id ||
933                     key.type != BTRFS_ROOT_BACKREF_KEY)
934                         break;
935
936                 has_parent = 1;
937
938                 if (key.offset == parent_root_id) {
939                         btrfs_release_path(&path);
940                         return 1;
941                 }
942
943                 path.slots[0]++;
944         }
945 out:
946         btrfs_release_path(&path);
947         if (ret < 0)
948                 return ret;
949         return has_parent? 0 : -1;
950 }
951
952 static int process_dir_item(struct btrfs_root *root,
953                             struct extent_buffer *eb,
954                             int slot, struct btrfs_key *key,
955                             struct shared_node *active_node)
956 {
957         u32 total;
958         u32 cur = 0;
959         u32 len;
960         u32 name_len;
961         u32 data_len;
962         int error;
963         int nritems = 0;
964         int filetype;
965         struct btrfs_dir_item *di;
966         struct inode_record *rec;
967         struct cache_tree *root_cache;
968         struct cache_tree *inode_cache;
969         struct btrfs_key location;
970         char namebuf[BTRFS_NAME_LEN];
971
972         root_cache = &active_node->root_cache;
973         inode_cache = &active_node->inode_cache;
974         rec = active_node->current;
975         rec->found_dir_item = 1;
976
977         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
978         total = btrfs_item_size_nr(eb, slot);
979         while (cur < total) {
980                 nritems++;
981                 btrfs_dir_item_key_to_cpu(eb, di, &location);
982                 name_len = btrfs_dir_name_len(eb, di);
983                 data_len = btrfs_dir_data_len(eb, di);
984                 filetype = btrfs_dir_type(eb, di);
985
986                 rec->found_size += name_len;
987                 if (name_len <= BTRFS_NAME_LEN) {
988                         len = name_len;
989                         error = 0;
990                 } else {
991                         len = BTRFS_NAME_LEN;
992                         error = REF_ERR_NAME_TOO_LONG;
993                 }
994                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
995
996                 if (location.type == BTRFS_INODE_ITEM_KEY) {
997                         add_inode_backref(inode_cache, location.objectid,
998                                           key->objectid, key->offset, namebuf,
999                                           len, filetype, key->type, error);
1000                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1001                         add_inode_backref(root_cache, location.objectid,
1002                                           key->objectid, key->offset,
1003                                           namebuf, len, filetype,
1004                                           key->type, error);
1005                 } else {
1006                         fprintf(stderr, "warning line %d\n", __LINE__);
1007                 }
1008
1009                 len = sizeof(*di) + name_len + data_len;
1010                 di = (struct btrfs_dir_item *)((char *)di + len);
1011                 cur += len;
1012         }
1013         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1014                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1015
1016         return 0;
1017 }
1018
1019 static int process_inode_ref(struct extent_buffer *eb,
1020                              int slot, struct btrfs_key *key,
1021                              struct shared_node *active_node)
1022 {
1023         u32 total;
1024         u32 cur = 0;
1025         u32 len;
1026         u32 name_len;
1027         u64 index;
1028         int error;
1029         struct cache_tree *inode_cache;
1030         struct btrfs_inode_ref *ref;
1031         char namebuf[BTRFS_NAME_LEN];
1032
1033         inode_cache = &active_node->inode_cache;
1034
1035         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1036         total = btrfs_item_size_nr(eb, slot);
1037         while (cur < total) {
1038                 name_len = btrfs_inode_ref_name_len(eb, ref);
1039                 index = btrfs_inode_ref_index(eb, ref);
1040                 if (name_len <= BTRFS_NAME_LEN) {
1041                         len = name_len;
1042                         error = 0;
1043                 } else {
1044                         len = BTRFS_NAME_LEN;
1045                         error = REF_ERR_NAME_TOO_LONG;
1046                 }
1047                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1048                 add_inode_backref(inode_cache, key->objectid, key->offset,
1049                                   index, namebuf, len, 0, key->type, error);
1050
1051                 len = sizeof(*ref) + name_len;
1052                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1053                 cur += len;
1054         }
1055         return 0;
1056 }
1057
1058 static int process_inode_extref(struct extent_buffer *eb,
1059                                 int slot, struct btrfs_key *key,
1060                                 struct shared_node *active_node)
1061 {
1062         u32 total;
1063         u32 cur = 0;
1064         u32 len;
1065         u32 name_len;
1066         u64 index;
1067         u64 parent;
1068         int error;
1069         struct cache_tree *inode_cache;
1070         struct btrfs_inode_extref *extref;
1071         char namebuf[BTRFS_NAME_LEN];
1072
1073         inode_cache = &active_node->inode_cache;
1074
1075         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1076         total = btrfs_item_size_nr(eb, slot);
1077         while (cur < total) {
1078                 name_len = btrfs_inode_extref_name_len(eb, extref);
1079                 index = btrfs_inode_extref_index(eb, extref);
1080                 parent = btrfs_inode_extref_parent(eb, extref);
1081                 if (name_len <= BTRFS_NAME_LEN) {
1082                         len = name_len;
1083                         error = 0;
1084                 } else {
1085                         len = BTRFS_NAME_LEN;
1086                         error = REF_ERR_NAME_TOO_LONG;
1087                 }
1088                 read_extent_buffer(eb, namebuf,
1089                                    (unsigned long)(extref + 1), len);
1090                 add_inode_backref(inode_cache, key->objectid, parent,
1091                                   index, namebuf, len, 0, key->type, error);
1092
1093                 len = sizeof(*extref) + name_len;
1094                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1095                 cur += len;
1096         }
1097         return 0;
1098
1099 }
1100
1101 static int count_csum_range(struct btrfs_root *root, u64 start,
1102                             u64 len, u64 *found)
1103 {
1104         struct btrfs_key key;
1105         struct btrfs_path path;
1106         struct extent_buffer *leaf;
1107         int ret;
1108         size_t size;
1109         *found = 0;
1110         u64 csum_end;
1111         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1112
1113         btrfs_init_path(&path);
1114
1115         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1116         key.offset = start;
1117         key.type = BTRFS_EXTENT_CSUM_KEY;
1118
1119         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1120                                 &key, &path, 0, 0);
1121         if (ret < 0)
1122                 goto out;
1123         if (ret > 0 && path.slots[0] > 0) {
1124                 leaf = path.nodes[0];
1125                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1126                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1127                     key.type == BTRFS_EXTENT_CSUM_KEY)
1128                         path.slots[0]--;
1129         }
1130
1131         while (len > 0) {
1132                 leaf = path.nodes[0];
1133                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1134                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1135                         if (ret > 0)
1136                                 break;
1137                         else if (ret < 0)
1138                                 goto out;
1139                         leaf = path.nodes[0];
1140                 }
1141
1142                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1143                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1144                     key.type != BTRFS_EXTENT_CSUM_KEY)
1145                         break;
1146
1147                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1148                 if (key.offset >= start + len)
1149                         break;
1150
1151                 if (key.offset > start)
1152                         start = key.offset;
1153
1154                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1155                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1156                 if (csum_end > start) {
1157                         size = min(csum_end - start, len);
1158                         len -= size;
1159                         start += size;
1160                         *found += size;
1161                 }
1162
1163                 path.slots[0]++;
1164         }
1165 out:
1166         if (ret < 0)
1167                 return ret;
1168         btrfs_release_path(&path);
1169         return 0;
1170 }
1171
1172 static int process_file_extent(struct btrfs_root *root,
1173                                 struct extent_buffer *eb,
1174                                 int slot, struct btrfs_key *key,
1175                                 struct shared_node *active_node)
1176 {
1177         struct inode_record *rec;
1178         struct btrfs_file_extent_item *fi;
1179         u64 num_bytes = 0;
1180         u64 disk_bytenr = 0;
1181         u64 extent_offset = 0;
1182         u64 mask = root->sectorsize - 1;
1183         int extent_type;
1184         int ret;
1185
1186         rec = active_node->current;
1187         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1188         rec->found_file_extent = 1;
1189
1190         if (rec->extent_start == (u64)-1) {
1191                 rec->extent_start = key->offset;
1192                 rec->extent_end = key->offset;
1193         }
1194
1195         if (rec->extent_end > key->offset)
1196                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1197         else if (rec->extent_end < key->offset &&
1198                  rec->extent_end < rec->first_extent_gap)
1199                 rec->first_extent_gap = rec->extent_end;
1200
1201         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1202         extent_type = btrfs_file_extent_type(eb, fi);
1203
1204         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1205                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1206                 if (num_bytes == 0)
1207                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1208                 rec->found_size += num_bytes;
1209                 num_bytes = (num_bytes + mask) & ~mask;
1210         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1211                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1212                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1213                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1214                 extent_offset = btrfs_file_extent_offset(eb, fi);
1215                 if (num_bytes == 0 || (num_bytes & mask))
1216                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1217                 if (num_bytes + extent_offset >
1218                     btrfs_file_extent_ram_bytes(eb, fi))
1219                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1220                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1221                     (btrfs_file_extent_compression(eb, fi) ||
1222                      btrfs_file_extent_encryption(eb, fi) ||
1223                      btrfs_file_extent_other_encoding(eb, fi)))
1224                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1225                 if (disk_bytenr > 0)
1226                         rec->found_size += num_bytes;
1227         } else {
1228                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1229         }
1230         rec->extent_end = key->offset + num_bytes;
1231
1232         if (disk_bytenr > 0) {
1233                 u64 found;
1234                 if (btrfs_file_extent_compression(eb, fi))
1235                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1236                 else
1237                         disk_bytenr += extent_offset;
1238
1239                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1240                 if (ret < 0)
1241                         return ret;
1242                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1243                         if (found > 0)
1244                                 rec->found_csum_item = 1;
1245                         if (found < num_bytes)
1246                                 rec->some_csum_missing = 1;
1247                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1248                         if (found > 0)
1249                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1250                 }
1251         }
1252         return 0;
1253 }
1254
1255 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1256                             struct walk_control *wc)
1257 {
1258         struct btrfs_key key;
1259         u32 nritems;
1260         int i;
1261         int ret = 0;
1262         struct cache_tree *inode_cache;
1263         struct shared_node *active_node;
1264
1265         if (wc->root_level == wc->active_node &&
1266             btrfs_root_refs(&root->root_item) == 0)
1267                 return 0;
1268
1269         active_node = wc->nodes[wc->active_node];
1270         inode_cache = &active_node->inode_cache;
1271         nritems = btrfs_header_nritems(eb);
1272         for (i = 0; i < nritems; i++) {
1273                 btrfs_item_key_to_cpu(eb, &key, i);
1274
1275                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1276                         continue;
1277                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1278                         continue;
1279
1280                 if (active_node->current == NULL ||
1281                     active_node->current->ino < key.objectid) {
1282                         if (active_node->current) {
1283                                 active_node->current->checked = 1;
1284                                 maybe_free_inode_rec(inode_cache,
1285                                                      active_node->current);
1286                         }
1287                         active_node->current = get_inode_rec(inode_cache,
1288                                                              key.objectid, 1);
1289                 }
1290                 switch (key.type) {
1291                 case BTRFS_DIR_ITEM_KEY:
1292                 case BTRFS_DIR_INDEX_KEY:
1293                         ret = process_dir_item(root, eb, i, &key, active_node);
1294                         break;
1295                 case BTRFS_INODE_REF_KEY:
1296                         ret = process_inode_ref(eb, i, &key, active_node);
1297                         break;
1298                 case BTRFS_INODE_EXTREF_KEY:
1299                         ret = process_inode_extref(eb, i, &key, active_node);
1300                         break;
1301                 case BTRFS_INODE_ITEM_KEY:
1302                         ret = process_inode_item(eb, i, &key, active_node);
1303                         break;
1304                 case BTRFS_EXTENT_DATA_KEY:
1305                         ret = process_file_extent(root, eb, i, &key,
1306                                                   active_node);
1307                         break;
1308                 default:
1309                         break;
1310                 };
1311         }
1312         return ret;
1313 }
1314
1315 static void reada_walk_down(struct btrfs_root *root,
1316                             struct extent_buffer *node, int slot)
1317 {
1318         u64 bytenr;
1319         u64 ptr_gen;
1320         u32 nritems;
1321         u32 blocksize;
1322         int i;
1323         int level;
1324
1325         level = btrfs_header_level(node);
1326         if (level != 1)
1327                 return;
1328
1329         nritems = btrfs_header_nritems(node);
1330         blocksize = btrfs_level_size(root, level - 1);
1331         for (i = slot; i < nritems; i++) {
1332                 bytenr = btrfs_node_blockptr(node, i);
1333                 ptr_gen = btrfs_node_ptr_generation(node, i);
1334                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1335         }
1336 }
1337
1338 /*
1339  * Check the child node/leaf by the following condition:
1340  * 1. the first item key of the node/leaf should be the same with the one
1341  *    in parent.
1342  * 2. block in parent node should match the child node/leaf.
1343  * 3. generation of parent node and child's header should be consistent.
1344  *
1345  * Or the child node/leaf pointed by the key in parent is not valid.
1346  *
1347  * We hope to check leaf owner too, but since subvol may share leaves,
1348  * which makes leaf owner check not so strong, key check should be
1349  * sufficient enough for that case.
1350  */
1351 static int check_child_node(struct btrfs_root *root,
1352                             struct extent_buffer *parent, int slot,
1353                             struct extent_buffer *child)
1354 {
1355         struct btrfs_key parent_key;
1356         struct btrfs_key child_key;
1357         int ret = 0;
1358
1359         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1360         if (btrfs_header_level(child) == 0)
1361                 btrfs_item_key_to_cpu(child, &child_key, 0);
1362         else
1363                 btrfs_node_key_to_cpu(child, &child_key, 0);
1364
1365         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1366                 ret = -EINVAL;
1367                 fprintf(stderr,
1368                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1369                         parent_key.objectid, parent_key.type, parent_key.offset,
1370                         child_key.objectid, child_key.type, child_key.offset);
1371         }
1372         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1373                 ret = -EINVAL;
1374                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1375                         btrfs_node_blockptr(parent, slot),
1376                         btrfs_header_bytenr(child));
1377         }
1378         if (btrfs_node_ptr_generation(parent, slot) !=
1379             btrfs_header_generation(child)) {
1380                 ret = -EINVAL;
1381                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1382                         btrfs_header_generation(child),
1383                         btrfs_node_ptr_generation(parent, slot));
1384         }
1385         return ret;
1386 }
1387
1388 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1389                           struct walk_control *wc, int *level)
1390 {
1391         u64 bytenr;
1392         u64 ptr_gen;
1393         struct extent_buffer *next;
1394         struct extent_buffer *cur;
1395         u32 blocksize;
1396         int ret, err = 0;
1397         u64 refs;
1398
1399         WARN_ON(*level < 0);
1400         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1401         ret = btrfs_lookup_extent_info(NULL, root,
1402                                        path->nodes[*level]->start,
1403                                        *level, 1, &refs, NULL);
1404         if (ret < 0) {
1405                 err = ret;
1406                 goto out;
1407         }
1408
1409         if (refs > 1) {
1410                 ret = enter_shared_node(root, path->nodes[*level]->start,
1411                                         refs, wc, *level);
1412                 if (ret > 0) {
1413                         err = ret;
1414                         goto out;
1415                 }
1416         }
1417
1418         while (*level >= 0) {
1419                 WARN_ON(*level < 0);
1420                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1421                 cur = path->nodes[*level];
1422
1423                 if (btrfs_header_level(cur) != *level)
1424                         WARN_ON(1);
1425
1426                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1427                         break;
1428                 if (*level == 0) {
1429                         ret = process_one_leaf(root, cur, wc);
1430                         if (ret < 0)
1431                                 err = ret;
1432                         break;
1433                 }
1434                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1435                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1436                 blocksize = btrfs_level_size(root, *level - 1);
1437                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1438                                                1, &refs, NULL);
1439                 if (ret < 0)
1440                         refs = 0;
1441
1442                 if (refs > 1) {
1443                         ret = enter_shared_node(root, bytenr, refs,
1444                                                 wc, *level - 1);
1445                         if (ret > 0) {
1446                                 path->slots[*level]++;
1447                                 continue;
1448                         }
1449                 }
1450
1451                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1452                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1453                         free_extent_buffer(next);
1454                         reada_walk_down(root, cur, path->slots[*level]);
1455                         next = read_tree_block(root, bytenr, blocksize,
1456                                                ptr_gen);
1457                         if (!next) {
1458                                 err = -EIO;
1459                                 goto out;
1460                         }
1461                 }
1462
1463                 ret = check_child_node(root, cur, path->slots[*level], next);
1464                 if (ret) {
1465                         err = ret;
1466                         goto out;
1467                 }
1468                 *level = *level - 1;
1469                 free_extent_buffer(path->nodes[*level]);
1470                 path->nodes[*level] = next;
1471                 path->slots[*level] = 0;
1472         }
1473 out:
1474         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1475         return err;
1476 }
1477
1478 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1479                         struct walk_control *wc, int *level)
1480 {
1481         int i;
1482         struct extent_buffer *leaf;
1483
1484         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1485                 leaf = path->nodes[i];
1486                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1487                         path->slots[i]++;
1488                         *level = i;
1489                         return 0;
1490                 } else {
1491                         free_extent_buffer(path->nodes[*level]);
1492                         path->nodes[*level] = NULL;
1493                         BUG_ON(*level > wc->active_node);
1494                         if (*level == wc->active_node)
1495                                 leave_shared_node(root, wc, *level);
1496                         *level = i + 1;
1497                 }
1498         }
1499         return 1;
1500 }
1501
1502 static int check_root_dir(struct inode_record *rec)
1503 {
1504         struct inode_backref *backref;
1505         int ret = -1;
1506
1507         if (!rec->found_inode_item || rec->errors)
1508                 goto out;
1509         if (rec->nlink != 1 || rec->found_link != 0)
1510                 goto out;
1511         if (list_empty(&rec->backrefs))
1512                 goto out;
1513         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1514         if (!backref->found_inode_ref)
1515                 goto out;
1516         if (backref->index != 0 || backref->namelen != 2 ||
1517             memcmp(backref->name, "..", 2))
1518                 goto out;
1519         if (backref->found_dir_index || backref->found_dir_item)
1520                 goto out;
1521         ret = 0;
1522 out:
1523         return ret;
1524 }
1525
1526 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1527                               struct btrfs_root *root, struct btrfs_path *path,
1528                               struct inode_record *rec)
1529 {
1530         struct btrfs_inode_item *ei;
1531         struct btrfs_key key;
1532         int ret;
1533
1534         key.objectid = rec->ino;
1535         key.type = BTRFS_INODE_ITEM_KEY;
1536         key.offset = (u64)-1;
1537
1538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1539         if (ret < 0)
1540                 goto out;
1541         if (ret) {
1542                 if (!path->slots[0]) {
1543                         ret = -ENOENT;
1544                         goto out;
1545                 }
1546                 path->slots[0]--;
1547                 ret = 0;
1548         }
1549         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1550         if (key.objectid != rec->ino) {
1551                 ret = -ENOENT;
1552                 goto out;
1553         }
1554
1555         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1556                             struct btrfs_inode_item);
1557         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1558         btrfs_mark_buffer_dirty(path->nodes[0]);
1559         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1560         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1561                root->root_key.objectid);
1562 out:
1563         btrfs_release_path(path);
1564         return ret;
1565 }
1566
1567 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1568                                     struct btrfs_root *root,
1569                                     struct btrfs_path *path,
1570                                     struct inode_record *rec)
1571 {
1572         struct btrfs_key key;
1573         int ret;
1574
1575         key.objectid = BTRFS_ORPHAN_OBJECTID;
1576         key.type = BTRFS_ORPHAN_ITEM_KEY;
1577         key.offset = rec->ino;
1578
1579         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1580         btrfs_release_path(path);
1581         if (!ret)
1582                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1583         return ret;
1584 }
1585
1586 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1587 {
1588         struct btrfs_trans_handle *trans;
1589         struct btrfs_path *path;
1590         int ret = 0;
1591
1592         /* So far we just fix dir isize wrong */
1593         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1594                 return 1;
1595
1596         path = btrfs_alloc_path();
1597         if (!path)
1598                 return -ENOMEM;
1599
1600         trans = btrfs_start_transaction(root, 1);
1601         if (IS_ERR(trans)) {
1602                 btrfs_free_path(path);
1603                 return PTR_ERR(trans);
1604         }
1605
1606         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1607                 ret = repair_inode_isize(trans, root, path, rec);
1608         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1609                 ret = repair_inode_orphan_item(trans, root, path, rec);
1610         btrfs_commit_transaction(trans, root);
1611         btrfs_free_path(path);
1612         return ret;
1613 }
1614
1615 static int check_inode_recs(struct btrfs_root *root,
1616                             struct cache_tree *inode_cache)
1617 {
1618         struct cache_extent *cache;
1619         struct ptr_node *node;
1620         struct inode_record *rec;
1621         struct inode_backref *backref;
1622         int ret;
1623         u64 error = 0;
1624         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1625
1626         if (btrfs_root_refs(&root->root_item) == 0) {
1627                 if (!cache_tree_empty(inode_cache))
1628                         fprintf(stderr, "warning line %d\n", __LINE__);
1629                 return 0;
1630         }
1631
1632         rec = get_inode_rec(inode_cache, root_dirid, 0);
1633         if (rec) {
1634                 ret = check_root_dir(rec);
1635                 if (ret) {
1636                         fprintf(stderr, "root %llu root dir %llu error\n",
1637                                 (unsigned long long)root->root_key.objectid,
1638                                 (unsigned long long)root_dirid);
1639                         error++;
1640                 }
1641         } else {
1642                 fprintf(stderr, "root %llu root dir %llu not found\n",
1643                         (unsigned long long)root->root_key.objectid,
1644                         (unsigned long long)root_dirid);
1645         }
1646
1647         while (1) {
1648                 cache = search_cache_extent(inode_cache, 0);
1649                 if (!cache)
1650                         break;
1651                 node = container_of(cache, struct ptr_node, cache);
1652                 rec = node->data;
1653                 remove_cache_extent(inode_cache, &node->cache);
1654                 free(node);
1655                 if (rec->ino == root_dirid ||
1656                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1657                         free_inode_rec(rec);
1658                         continue;
1659                 }
1660
1661                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1662                         ret = check_orphan_item(root, rec->ino);
1663                         if (ret == 0)
1664                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1665                         if (can_free_inode_rec(rec)) {
1666                                 free_inode_rec(rec);
1667                                 continue;
1668                         }
1669                 }
1670
1671                 if (repair) {
1672                         ret = try_repair_inode(root, rec);
1673                         if (ret == 0 && can_free_inode_rec(rec)) {
1674                                 free_inode_rec(rec);
1675                                 continue;
1676                         }
1677                         ret = 0;
1678                 }
1679
1680                 error++;
1681                 if (!rec->found_inode_item)
1682                         rec->errors |= I_ERR_NO_INODE_ITEM;
1683                 if (rec->found_link != rec->nlink)
1684                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1685                 print_inode_error(root, rec);
1686                 list_for_each_entry(backref, &rec->backrefs, list) {
1687                         if (!backref->found_dir_item)
1688                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1689                         if (!backref->found_dir_index)
1690                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1691                         if (!backref->found_inode_ref)
1692                                 backref->errors |= REF_ERR_NO_INODE_REF;
1693                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1694                                 " namelen %u name %s filetype %d errors %x",
1695                                 (unsigned long long)backref->dir,
1696                                 (unsigned long long)backref->index,
1697                                 backref->namelen, backref->name,
1698                                 backref->filetype, backref->errors);
1699                         print_ref_error(backref->errors);
1700                 }
1701                 free_inode_rec(rec);
1702         }
1703         return (error > 0) ? -1 : 0;
1704 }
1705
1706 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1707                                         u64 objectid)
1708 {
1709         struct cache_extent *cache;
1710         struct root_record *rec = NULL;
1711         int ret;
1712
1713         cache = lookup_cache_extent(root_cache, objectid, 1);
1714         if (cache) {
1715                 rec = container_of(cache, struct root_record, cache);
1716         } else {
1717                 rec = calloc(1, sizeof(*rec));
1718                 rec->objectid = objectid;
1719                 INIT_LIST_HEAD(&rec->backrefs);
1720                 rec->cache.start = objectid;
1721                 rec->cache.size = 1;
1722
1723                 ret = insert_cache_extent(root_cache, &rec->cache);
1724                 BUG_ON(ret);
1725         }
1726         return rec;
1727 }
1728
1729 static struct root_backref *get_root_backref(struct root_record *rec,
1730                                              u64 ref_root, u64 dir, u64 index,
1731                                              const char *name, int namelen)
1732 {
1733         struct root_backref *backref;
1734
1735         list_for_each_entry(backref, &rec->backrefs, list) {
1736                 if (backref->ref_root != ref_root || backref->dir != dir ||
1737                     backref->namelen != namelen)
1738                         continue;
1739                 if (memcmp(name, backref->name, namelen))
1740                         continue;
1741                 return backref;
1742         }
1743
1744         backref = malloc(sizeof(*backref) + namelen + 1);
1745         memset(backref, 0, sizeof(*backref));
1746         backref->ref_root = ref_root;
1747         backref->dir = dir;
1748         backref->index = index;
1749         backref->namelen = namelen;
1750         memcpy(backref->name, name, namelen);
1751         backref->name[namelen] = '\0';
1752         list_add_tail(&backref->list, &rec->backrefs);
1753         return backref;
1754 }
1755
1756 static void free_root_record(struct cache_extent *cache)
1757 {
1758         struct root_record *rec;
1759         struct root_backref *backref;
1760
1761         rec = container_of(cache, struct root_record, cache);
1762         while (!list_empty(&rec->backrefs)) {
1763                 backref = list_entry(rec->backrefs.next,
1764                                      struct root_backref, list);
1765                 list_del(&backref->list);
1766                 free(backref);
1767         }
1768
1769         kfree(rec);
1770 }
1771
1772 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1773
1774 static int add_root_backref(struct cache_tree *root_cache,
1775                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1776                             const char *name, int namelen,
1777                             int item_type, int errors)
1778 {
1779         struct root_record *rec;
1780         struct root_backref *backref;
1781
1782         rec = get_root_rec(root_cache, root_id);
1783         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1784
1785         backref->errors |= errors;
1786
1787         if (item_type != BTRFS_DIR_ITEM_KEY) {
1788                 if (backref->found_dir_index || backref->found_back_ref ||
1789                     backref->found_forward_ref) {
1790                         if (backref->index != index)
1791                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1792                 } else {
1793                         backref->index = index;
1794                 }
1795         }
1796
1797         if (item_type == BTRFS_DIR_ITEM_KEY) {
1798                 if (backref->found_forward_ref)
1799                         rec->found_ref++;
1800                 backref->found_dir_item = 1;
1801         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1802                 backref->found_dir_index = 1;
1803         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1804                 if (backref->found_forward_ref)
1805                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1806                 else if (backref->found_dir_item)
1807                         rec->found_ref++;
1808                 backref->found_forward_ref = 1;
1809         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1810                 if (backref->found_back_ref)
1811                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1812                 backref->found_back_ref = 1;
1813         } else {
1814                 BUG_ON(1);
1815         }
1816
1817         if (backref->found_forward_ref && backref->found_dir_item)
1818                 backref->reachable = 1;
1819         return 0;
1820 }
1821
1822 static int merge_root_recs(struct btrfs_root *root,
1823                            struct cache_tree *src_cache,
1824                            struct cache_tree *dst_cache)
1825 {
1826         struct cache_extent *cache;
1827         struct ptr_node *node;
1828         struct inode_record *rec;
1829         struct inode_backref *backref;
1830         int ret = 0;
1831
1832         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1833                 free_inode_recs_tree(src_cache);
1834                 return 0;
1835         }
1836
1837         while (1) {
1838                 cache = search_cache_extent(src_cache, 0);
1839                 if (!cache)
1840                         break;
1841                 node = container_of(cache, struct ptr_node, cache);
1842                 rec = node->data;
1843                 remove_cache_extent(src_cache, &node->cache);
1844                 free(node);
1845
1846                 ret = is_child_root(root, root->objectid, rec->ino);
1847                 if (ret < 0)
1848                         break;
1849                 else if (ret == 0)
1850                         goto skip;
1851
1852                 list_for_each_entry(backref, &rec->backrefs, list) {
1853                         BUG_ON(backref->found_inode_ref);
1854                         if (backref->found_dir_item)
1855                                 add_root_backref(dst_cache, rec->ino,
1856                                         root->root_key.objectid, backref->dir,
1857                                         backref->index, backref->name,
1858                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1859                                         backref->errors);
1860                         if (backref->found_dir_index)
1861                                 add_root_backref(dst_cache, rec->ino,
1862                                         root->root_key.objectid, backref->dir,
1863                                         backref->index, backref->name,
1864                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1865                                         backref->errors);
1866                 }
1867 skip:
1868                 free_inode_rec(rec);
1869         }
1870         if (ret < 0)
1871                 return ret;
1872         return 0;
1873 }
1874
1875 static int check_root_refs(struct btrfs_root *root,
1876                            struct cache_tree *root_cache)
1877 {
1878         struct root_record *rec;
1879         struct root_record *ref_root;
1880         struct root_backref *backref;
1881         struct cache_extent *cache;
1882         int loop = 1;
1883         int ret;
1884         int error;
1885         int errors = 0;
1886
1887         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1888         rec->found_ref = 1;
1889
1890         /* fixme: this can not detect circular references */
1891         while (loop) {
1892                 loop = 0;
1893                 cache = search_cache_extent(root_cache, 0);
1894                 while (1) {
1895                         if (!cache)
1896                                 break;
1897                         rec = container_of(cache, struct root_record, cache);
1898                         cache = next_cache_extent(cache);
1899
1900                         if (rec->found_ref == 0)
1901                                 continue;
1902
1903                         list_for_each_entry(backref, &rec->backrefs, list) {
1904                                 if (!backref->reachable)
1905                                         continue;
1906
1907                                 ref_root = get_root_rec(root_cache,
1908                                                         backref->ref_root);
1909                                 if (ref_root->found_ref > 0)
1910                                         continue;
1911
1912                                 backref->reachable = 0;
1913                                 rec->found_ref--;
1914                                 if (rec->found_ref == 0)
1915                                         loop = 1;
1916                         }
1917                 }
1918         }
1919
1920         cache = search_cache_extent(root_cache, 0);
1921         while (1) {
1922                 if (!cache)
1923                         break;
1924                 rec = container_of(cache, struct root_record, cache);
1925                 cache = next_cache_extent(cache);
1926
1927                 if (rec->found_ref == 0 &&
1928                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1929                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1930                         ret = check_orphan_item(root->fs_info->tree_root,
1931                                                 rec->objectid);
1932                         if (ret == 0)
1933                                 continue;
1934
1935                         /*
1936                          * If we don't have a root item then we likely just have
1937                          * a dir item in a snapshot for this root but no actual
1938                          * ref key or anything so it's meaningless.
1939                          */
1940                         if (!rec->found_root_item)
1941                                 continue;
1942                         errors++;
1943                         fprintf(stderr, "fs tree %llu not referenced\n",
1944                                 (unsigned long long)rec->objectid);
1945                 }
1946
1947                 error = 0;
1948                 if (rec->found_ref > 0 && !rec->found_root_item)
1949                         error = 1;
1950                 list_for_each_entry(backref, &rec->backrefs, list) {
1951                         if (!backref->found_dir_item)
1952                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1953                         if (!backref->found_dir_index)
1954                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1955                         if (!backref->found_back_ref)
1956                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1957                         if (!backref->found_forward_ref)
1958                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1959                         if (backref->reachable && backref->errors)
1960                                 error = 1;
1961                 }
1962                 if (!error)
1963                         continue;
1964
1965                 errors++;
1966                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1967                         (unsigned long long)rec->objectid, rec->found_ref,
1968                          rec->found_root_item ? "" : "not found");
1969
1970                 list_for_each_entry(backref, &rec->backrefs, list) {
1971                         if (!backref->reachable)
1972                                 continue;
1973                         if (!backref->errors && rec->found_root_item)
1974                                 continue;
1975                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1976                                 " index %llu namelen %u name %s errors %x\n",
1977                                 (unsigned long long)backref->ref_root,
1978                                 (unsigned long long)backref->dir,
1979                                 (unsigned long long)backref->index,
1980                                 backref->namelen, backref->name,
1981                                 backref->errors);
1982                         print_ref_error(backref->errors);
1983                 }
1984         }
1985         return errors > 0 ? 1 : 0;
1986 }
1987
1988 static int process_root_ref(struct extent_buffer *eb, int slot,
1989                             struct btrfs_key *key,
1990                             struct cache_tree *root_cache)
1991 {
1992         u64 dirid;
1993         u64 index;
1994         u32 len;
1995         u32 name_len;
1996         struct btrfs_root_ref *ref;
1997         char namebuf[BTRFS_NAME_LEN];
1998         int error;
1999
2000         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
2001
2002         dirid = btrfs_root_ref_dirid(eb, ref);
2003         index = btrfs_root_ref_sequence(eb, ref);
2004         name_len = btrfs_root_ref_name_len(eb, ref);
2005
2006         if (name_len <= BTRFS_NAME_LEN) {
2007                 len = name_len;
2008                 error = 0;
2009         } else {
2010                 len = BTRFS_NAME_LEN;
2011                 error = REF_ERR_NAME_TOO_LONG;
2012         }
2013         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
2014
2015         if (key->type == BTRFS_ROOT_REF_KEY) {
2016                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
2017                                  index, namebuf, len, key->type, error);
2018         } else {
2019                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
2020                                  index, namebuf, len, key->type, error);
2021         }
2022         return 0;
2023 }
2024
2025 static int check_fs_root(struct btrfs_root *root,
2026                          struct cache_tree *root_cache,
2027                          struct walk_control *wc)
2028 {
2029         int ret = 0;
2030         int err = 0;
2031         int wret;
2032         int level;
2033         struct btrfs_path path;
2034         struct shared_node root_node;
2035         struct root_record *rec;
2036         struct btrfs_root_item *root_item = &root->root_item;
2037
2038         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2039                 rec = get_root_rec(root_cache, root->root_key.objectid);
2040                 if (btrfs_root_refs(root_item) > 0)
2041                         rec->found_root_item = 1;
2042         }
2043
2044         btrfs_init_path(&path);
2045         memset(&root_node, 0, sizeof(root_node));
2046         cache_tree_init(&root_node.root_cache);
2047         cache_tree_init(&root_node.inode_cache);
2048
2049         level = btrfs_header_level(root->node);
2050         memset(wc->nodes, 0, sizeof(wc->nodes));
2051         wc->nodes[level] = &root_node;
2052         wc->active_node = level;
2053         wc->root_level = level;
2054
2055         if (btrfs_root_refs(root_item) > 0 ||
2056             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2057                 path.nodes[level] = root->node;
2058                 extent_buffer_get(root->node);
2059                 path.slots[level] = 0;
2060         } else {
2061                 struct btrfs_key key;
2062                 struct btrfs_disk_key found_key;
2063
2064                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2065                 level = root_item->drop_level;
2066                 path.lowest_level = level;
2067                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2068                 if (wret < 0)
2069                         goto skip_walking;
2070                 btrfs_node_key(path.nodes[level], &found_key,
2071                                 path.slots[level]);
2072                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2073                                         sizeof(found_key)));
2074         }
2075
2076         while (1) {
2077                 wret = walk_down_tree(root, &path, wc, &level);
2078                 if (wret < 0)
2079                         ret = wret;
2080                 if (wret != 0)
2081                         break;
2082
2083                 wret = walk_up_tree(root, &path, wc, &level);
2084                 if (wret < 0)
2085                         ret = wret;
2086                 if (wret != 0)
2087                         break;
2088         }
2089 skip_walking:
2090         btrfs_release_path(&path);
2091
2092         err = merge_root_recs(root, &root_node.root_cache, root_cache);
2093         if (err < 0)
2094                 ret = err;
2095
2096         if (root_node.current) {
2097                 root_node.current->checked = 1;
2098                 maybe_free_inode_rec(&root_node.inode_cache,
2099                                 root_node.current);
2100         }
2101
2102         err = check_inode_recs(root, &root_node.inode_cache);
2103         if (!ret)
2104                 ret = err;
2105         return ret;
2106 }
2107
2108 static int fs_root_objectid(u64 objectid)
2109 {
2110         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2111             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2112                 return 1;
2113         return is_fstree(objectid);
2114 }
2115
2116 static int check_fs_roots(struct btrfs_root *root,
2117                           struct cache_tree *root_cache)
2118 {
2119         struct btrfs_path path;
2120         struct btrfs_key key;
2121         struct walk_control wc;
2122         struct extent_buffer *leaf;
2123         struct btrfs_root *tmp_root;
2124         struct btrfs_root *tree_root = root->fs_info->tree_root;
2125         int ret;
2126         int err = 0;
2127
2128         /*
2129          * Just in case we made any changes to the extent tree that weren't
2130          * reflected into the free space cache yet.
2131          */
2132         if (repair)
2133                 reset_cached_block_groups(root->fs_info);
2134         memset(&wc, 0, sizeof(wc));
2135         cache_tree_init(&wc.shared);
2136         btrfs_init_path(&path);
2137
2138         key.offset = 0;
2139         key.objectid = 0;
2140         key.type = BTRFS_ROOT_ITEM_KEY;
2141         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2142         if (ret < 0) {
2143                 err = 1;
2144                 goto out;
2145         }
2146         while (1) {
2147                 leaf = path.nodes[0];
2148                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2149                         ret = btrfs_next_leaf(tree_root, &path);
2150                         if (ret) {
2151                                 if (ret < 0)
2152                                         err = 1;
2153                                 break;
2154                         }
2155                         leaf = path.nodes[0];
2156                 }
2157                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2158                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2159                     fs_root_objectid(key.objectid)) {
2160                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2161                                 tmp_root = btrfs_read_fs_root_no_cache(
2162                                                 root->fs_info, &key);
2163                         } else {
2164                                 key.offset = (u64)-1;
2165                                 tmp_root = btrfs_read_fs_root(
2166                                                 root->fs_info, &key);
2167                         }
2168                         if (IS_ERR(tmp_root)) {
2169                                 err = 1;
2170                                 goto next;
2171                         }
2172                         ret = check_fs_root(tmp_root, root_cache, &wc);
2173                         if (ret)
2174                                 err = 1;
2175                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
2176                                 btrfs_free_fs_root(tmp_root);
2177                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2178                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2179                         process_root_ref(leaf, path.slots[0], &key,
2180                                          root_cache);
2181                 }
2182 next:
2183                 path.slots[0]++;
2184         }
2185 out:
2186         btrfs_release_path(&path);
2187         if (err)
2188                 free_extent_cache_tree(&wc.shared);
2189         if (!cache_tree_empty(&wc.shared))
2190                 fprintf(stderr, "warning line %d\n", __LINE__);
2191
2192         return err;
2193 }
2194
2195 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2196 {
2197         struct list_head *cur = rec->backrefs.next;
2198         struct extent_backref *back;
2199         struct tree_backref *tback;
2200         struct data_backref *dback;
2201         u64 found = 0;
2202         int err = 0;
2203
2204         while(cur != &rec->backrefs) {
2205                 back = list_entry(cur, struct extent_backref, list);
2206                 cur = cur->next;
2207                 if (!back->found_extent_tree) {
2208                         err = 1;
2209                         if (!print_errs)
2210                                 goto out;
2211                         if (back->is_data) {
2212                                 dback = (struct data_backref *)back;
2213                                 fprintf(stderr, "Backref %llu %s %llu"
2214                                         " owner %llu offset %llu num_refs %lu"
2215                                         " not found in extent tree\n",
2216                                         (unsigned long long)rec->start,
2217                                         back->full_backref ?
2218                                         "parent" : "root",
2219                                         back->full_backref ?
2220                                         (unsigned long long)dback->parent:
2221                                         (unsigned long long)dback->root,
2222                                         (unsigned long long)dback->owner,
2223                                         (unsigned long long)dback->offset,
2224                                         (unsigned long)dback->num_refs);
2225                         } else {
2226                                 tback = (struct tree_backref *)back;
2227                                 fprintf(stderr, "Backref %llu parent %llu"
2228                                         " root %llu not found in extent tree\n",
2229                                         (unsigned long long)rec->start,
2230                                         (unsigned long long)tback->parent,
2231                                         (unsigned long long)tback->root);
2232                         }
2233                 }
2234                 if (!back->is_data && !back->found_ref) {
2235                         err = 1;
2236                         if (!print_errs)
2237                                 goto out;
2238                         tback = (struct tree_backref *)back;
2239                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2240                                 (unsigned long long)rec->start,
2241                                 back->full_backref ? "parent" : "root",
2242                                 back->full_backref ?
2243                                 (unsigned long long)tback->parent :
2244                                 (unsigned long long)tback->root, back);
2245                 }
2246                 if (back->is_data) {
2247                         dback = (struct data_backref *)back;
2248                         if (dback->found_ref != dback->num_refs) {
2249                                 err = 1;
2250                                 if (!print_errs)
2251                                         goto out;
2252                                 fprintf(stderr, "Incorrect local backref count"
2253                                         " on %llu %s %llu owner %llu"
2254                                         " offset %llu found %u wanted %u back %p\n",
2255                                         (unsigned long long)rec->start,
2256                                         back->full_backref ?
2257                                         "parent" : "root",
2258                                         back->full_backref ?
2259                                         (unsigned long long)dback->parent:
2260                                         (unsigned long long)dback->root,
2261                                         (unsigned long long)dback->owner,
2262                                         (unsigned long long)dback->offset,
2263                                         dback->found_ref, dback->num_refs, back);
2264                         }
2265                         if (dback->disk_bytenr != rec->start) {
2266                                 err = 1;
2267                                 if (!print_errs)
2268                                         goto out;
2269                                 fprintf(stderr, "Backref disk bytenr does not"
2270                                         " match extent record, bytenr=%llu, "
2271                                         "ref bytenr=%llu\n",
2272                                         (unsigned long long)rec->start,
2273                                         (unsigned long long)dback->disk_bytenr);
2274                         }
2275
2276                         if (dback->bytes != rec->nr) {
2277                                 err = 1;
2278                                 if (!print_errs)
2279                                         goto out;
2280                                 fprintf(stderr, "Backref bytes do not match "
2281                                         "extent backref, bytenr=%llu, ref "
2282                                         "bytes=%llu, backref bytes=%llu\n",
2283                                         (unsigned long long)rec->start,
2284                                         (unsigned long long)rec->nr,
2285                                         (unsigned long long)dback->bytes);
2286                         }
2287                 }
2288                 if (!back->is_data) {
2289                         found += 1;
2290                 } else {
2291                         dback = (struct data_backref *)back;
2292                         found += dback->found_ref;
2293                 }
2294         }
2295         if (found != rec->refs) {
2296                 err = 1;
2297                 if (!print_errs)
2298                         goto out;
2299                 fprintf(stderr, "Incorrect global backref count "
2300                         "on %llu found %llu wanted %llu\n",
2301                         (unsigned long long)rec->start,
2302                         (unsigned long long)found,
2303                         (unsigned long long)rec->refs);
2304         }
2305 out:
2306         return err;
2307 }
2308
2309 static int free_all_extent_backrefs(struct extent_record *rec)
2310 {
2311         struct extent_backref *back;
2312         struct list_head *cur;
2313         while (!list_empty(&rec->backrefs)) {
2314                 cur = rec->backrefs.next;
2315                 back = list_entry(cur, struct extent_backref, list);
2316                 list_del(cur);
2317                 free(back);
2318         }
2319         return 0;
2320 }
2321
2322 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2323                                      struct cache_tree *extent_cache)
2324 {
2325         struct cache_extent *cache;
2326         struct extent_record *rec;
2327
2328         while (1) {
2329                 cache = first_cache_extent(extent_cache);
2330                 if (!cache)
2331                         break;
2332                 rec = container_of(cache, struct extent_record, cache);
2333                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2334                 remove_cache_extent(extent_cache, cache);
2335                 free_all_extent_backrefs(rec);
2336                 free(rec);
2337         }
2338 }
2339
2340 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2341                                  struct extent_record *rec)
2342 {
2343         if (rec->content_checked && rec->owner_ref_checked &&
2344             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2345             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2346                 remove_cache_extent(extent_cache, &rec->cache);
2347                 free_all_extent_backrefs(rec);
2348                 list_del_init(&rec->list);
2349                 free(rec);
2350         }
2351         return 0;
2352 }
2353
2354 static int check_owner_ref(struct btrfs_root *root,
2355                             struct extent_record *rec,
2356                             struct extent_buffer *buf)
2357 {
2358         struct extent_backref *node;
2359         struct tree_backref *back;
2360         struct btrfs_root *ref_root;
2361         struct btrfs_key key;
2362         struct btrfs_path path;
2363         struct extent_buffer *parent;
2364         int level;
2365         int found = 0;
2366         int ret;
2367
2368         list_for_each_entry(node, &rec->backrefs, list) {
2369                 if (node->is_data)
2370                         continue;
2371                 if (!node->found_ref)
2372                         continue;
2373                 if (node->full_backref)
2374                         continue;
2375                 back = (struct tree_backref *)node;
2376                 if (btrfs_header_owner(buf) == back->root)
2377                         return 0;
2378         }
2379         BUG_ON(rec->is_root);
2380
2381         /* try to find the block by search corresponding fs tree */
2382         key.objectid = btrfs_header_owner(buf);
2383         key.type = BTRFS_ROOT_ITEM_KEY;
2384         key.offset = (u64)-1;
2385
2386         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2387         if (IS_ERR(ref_root))
2388                 return 1;
2389
2390         level = btrfs_header_level(buf);
2391         if (level == 0)
2392                 btrfs_item_key_to_cpu(buf, &key, 0);
2393         else
2394                 btrfs_node_key_to_cpu(buf, &key, 0);
2395
2396         btrfs_init_path(&path);
2397         path.lowest_level = level + 1;
2398         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2399         if (ret < 0)
2400                 return 0;
2401
2402         parent = path.nodes[level + 1];
2403         if (parent && buf->start == btrfs_node_blockptr(parent,
2404                                                         path.slots[level + 1]))
2405                 found = 1;
2406
2407         btrfs_release_path(&path);
2408         return found ? 0 : 1;
2409 }
2410
2411 static int is_extent_tree_record(struct extent_record *rec)
2412 {
2413         struct list_head *cur = rec->backrefs.next;
2414         struct extent_backref *node;
2415         struct tree_backref *back;
2416         int is_extent = 0;
2417
2418         while(cur != &rec->backrefs) {
2419                 node = list_entry(cur, struct extent_backref, list);
2420                 cur = cur->next;
2421                 if (node->is_data)
2422                         return 0;
2423                 back = (struct tree_backref *)node;
2424                 if (node->full_backref)
2425                         return 0;
2426                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2427                         is_extent = 1;
2428         }
2429         return is_extent;
2430 }
2431
2432
2433 static int record_bad_block_io(struct btrfs_fs_info *info,
2434                                struct cache_tree *extent_cache,
2435                                u64 start, u64 len)
2436 {
2437         struct extent_record *rec;
2438         struct cache_extent *cache;
2439         struct btrfs_key key;
2440
2441         cache = lookup_cache_extent(extent_cache, start, len);
2442         if (!cache)
2443                 return 0;
2444
2445         rec = container_of(cache, struct extent_record, cache);
2446         if (!is_extent_tree_record(rec))
2447                 return 0;
2448
2449         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2450         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2451 }
2452
2453 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2454                        struct extent_buffer *buf, int slot)
2455 {
2456         if (btrfs_header_level(buf)) {
2457                 struct btrfs_key_ptr ptr1, ptr2;
2458
2459                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2460                                    sizeof(struct btrfs_key_ptr));
2461                 read_extent_buffer(buf, &ptr2,
2462                                    btrfs_node_key_ptr_offset(slot + 1),
2463                                    sizeof(struct btrfs_key_ptr));
2464                 write_extent_buffer(buf, &ptr1,
2465                                     btrfs_node_key_ptr_offset(slot + 1),
2466                                     sizeof(struct btrfs_key_ptr));
2467                 write_extent_buffer(buf, &ptr2,
2468                                     btrfs_node_key_ptr_offset(slot),
2469                                     sizeof(struct btrfs_key_ptr));
2470                 if (slot == 0) {
2471                         struct btrfs_disk_key key;
2472                         btrfs_node_key(buf, &key, 0);
2473                         btrfs_fixup_low_keys(root, path, &key,
2474                                              btrfs_header_level(buf) + 1);
2475                 }
2476         } else {
2477                 struct btrfs_item *item1, *item2;
2478                 struct btrfs_key k1, k2;
2479                 char *item1_data, *item2_data;
2480                 u32 item1_offset, item2_offset, item1_size, item2_size;
2481
2482                 item1 = btrfs_item_nr(slot);
2483                 item2 = btrfs_item_nr(slot + 1);
2484                 btrfs_item_key_to_cpu(buf, &k1, slot);
2485                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2486                 item1_offset = btrfs_item_offset(buf, item1);
2487                 item2_offset = btrfs_item_offset(buf, item2);
2488                 item1_size = btrfs_item_size(buf, item1);
2489                 item2_size = btrfs_item_size(buf, item2);
2490
2491                 item1_data = malloc(item1_size);
2492                 if (!item1_data)
2493                         return -ENOMEM;
2494                 item2_data = malloc(item2_size);
2495                 if (!item2_data) {
2496                         free(item1_data);
2497                         return -ENOMEM;
2498                 }
2499
2500                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2501                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2502
2503                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2504                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2505                 free(item1_data);
2506                 free(item2_data);
2507
2508                 btrfs_set_item_offset(buf, item1, item2_offset);
2509                 btrfs_set_item_offset(buf, item2, item1_offset);
2510                 btrfs_set_item_size(buf, item1, item2_size);
2511                 btrfs_set_item_size(buf, item2, item1_size);
2512
2513                 path->slots[0] = slot;
2514                 btrfs_set_item_key_unsafe(root, path, &k2);
2515                 path->slots[0] = slot + 1;
2516                 btrfs_set_item_key_unsafe(root, path, &k1);
2517         }
2518         return 0;
2519 }
2520
2521 /*
2522  * Attempt to fix basic block failures.  Currently we only handle bad key
2523  * orders, we will cycle through the keys and swap them if necessary.
2524  */
2525 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2526                                 struct btrfs_root *root,
2527                                 struct extent_buffer *buf,
2528                                 struct btrfs_disk_key *parent_key,
2529                                 enum btrfs_tree_block_status status)
2530 {
2531         struct btrfs_path *path;
2532         struct btrfs_key k1, k2;
2533         int i;
2534         int level;
2535         int ret;
2536
2537         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2538                 return -EIO;
2539
2540         k1.objectid = btrfs_header_owner(buf);
2541         k1.type = BTRFS_ROOT_ITEM_KEY;
2542         k1.offset = (u64)-1;
2543
2544         root = btrfs_read_fs_root(root->fs_info, &k1);
2545         if (IS_ERR(root))
2546                 return -EIO;
2547
2548         record_root_in_trans(trans, root);
2549
2550         path = btrfs_alloc_path();
2551         if (!path)
2552                 return -EIO;
2553
2554         level = btrfs_header_level(buf);
2555         path->lowest_level = level;
2556         path->skip_check_block = 1;
2557         if (level)
2558                 btrfs_node_key_to_cpu(buf, &k1, 0);
2559         else
2560                 btrfs_item_key_to_cpu(buf, &k1, 0);
2561
2562         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2563         if (ret) {
2564                 btrfs_free_path(path);
2565                 return -EIO;
2566         }
2567
2568         buf = path->nodes[level];
2569         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2570                 if (level) {
2571                         btrfs_node_key_to_cpu(buf, &k1, i);
2572                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2573                 } else {
2574                         btrfs_item_key_to_cpu(buf, &k1, i);
2575                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2576                 }
2577                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2578                         continue;
2579                 ret = swap_values(root, path, buf, i);
2580                 if (ret)
2581                         break;
2582                 btrfs_mark_buffer_dirty(buf);
2583                 i = 0;
2584         }
2585
2586         btrfs_free_path(path);
2587         return ret;
2588 }
2589
2590 static int check_block(struct btrfs_trans_handle *trans,
2591                        struct btrfs_root *root,
2592                        struct cache_tree *extent_cache,
2593                        struct extent_buffer *buf, u64 flags)
2594 {
2595         struct extent_record *rec;
2596         struct cache_extent *cache;
2597         struct btrfs_key key;
2598         enum btrfs_tree_block_status status;
2599         int ret = 0;
2600         int level;
2601
2602         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2603         if (!cache)
2604                 return 1;
2605         rec = container_of(cache, struct extent_record, cache);
2606         rec->generation = btrfs_header_generation(buf);
2607
2608         level = btrfs_header_level(buf);
2609         if (btrfs_header_nritems(buf) > 0) {
2610
2611                 if (level == 0)
2612                         btrfs_item_key_to_cpu(buf, &key, 0);
2613                 else
2614                         btrfs_node_key_to_cpu(buf, &key, 0);
2615
2616                 rec->info_objectid = key.objectid;
2617         }
2618         rec->info_level = level;
2619
2620         if (btrfs_is_leaf(buf))
2621                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2622         else
2623                 status = btrfs_check_node(root, &rec->parent_key, buf);
2624
2625         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2626                 if (repair)
2627                         status = try_to_fix_bad_block(trans, root, buf,
2628                                                       &rec->parent_key,
2629                                                       status);
2630                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2631                         ret = -EIO;
2632                         fprintf(stderr, "bad block %llu\n",
2633                                 (unsigned long long)buf->start);
2634                 } else {
2635                         /*
2636                          * Signal to callers we need to start the scan over
2637                          * again since we'll have cow'ed blocks.
2638                          */
2639                         ret = -EAGAIN;
2640                 }
2641         } else {
2642                 rec->content_checked = 1;
2643                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2644                         rec->owner_ref_checked = 1;
2645                 else {
2646                         ret = check_owner_ref(root, rec, buf);
2647                         if (!ret)
2648                                 rec->owner_ref_checked = 1;
2649                 }
2650         }
2651         if (!ret)
2652                 maybe_free_extent_rec(extent_cache, rec);
2653         return ret;
2654 }
2655
2656 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2657                                                 u64 parent, u64 root)
2658 {
2659         struct list_head *cur = rec->backrefs.next;
2660         struct extent_backref *node;
2661         struct tree_backref *back;
2662
2663         while(cur != &rec->backrefs) {
2664                 node = list_entry(cur, struct extent_backref, list);
2665                 cur = cur->next;
2666                 if (node->is_data)
2667                         continue;
2668                 back = (struct tree_backref *)node;
2669                 if (parent > 0) {
2670                         if (!node->full_backref)
2671                                 continue;
2672                         if (parent == back->parent)
2673                                 return back;
2674                 } else {
2675                         if (node->full_backref)
2676                                 continue;
2677                         if (back->root == root)
2678                                 return back;
2679                 }
2680         }
2681         return NULL;
2682 }
2683
2684 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2685                                                 u64 parent, u64 root)
2686 {
2687         struct tree_backref *ref = malloc(sizeof(*ref));
2688         memset(&ref->node, 0, sizeof(ref->node));
2689         if (parent > 0) {
2690                 ref->parent = parent;
2691                 ref->node.full_backref = 1;
2692         } else {
2693                 ref->root = root;
2694                 ref->node.full_backref = 0;
2695         }
2696         list_add_tail(&ref->node.list, &rec->backrefs);
2697
2698         return ref;
2699 }
2700
2701 static struct data_backref *find_data_backref(struct extent_record *rec,
2702                                                 u64 parent, u64 root,
2703                                                 u64 owner, u64 offset,
2704                                                 int found_ref,
2705                                                 u64 disk_bytenr, u64 bytes)
2706 {
2707         struct list_head *cur = rec->backrefs.next;
2708         struct extent_backref *node;
2709         struct data_backref *back;
2710
2711         while(cur != &rec->backrefs) {
2712                 node = list_entry(cur, struct extent_backref, list);
2713                 cur = cur->next;
2714                 if (!node->is_data)
2715                         continue;
2716                 back = (struct data_backref *)node;
2717                 if (parent > 0) {
2718                         if (!node->full_backref)
2719                                 continue;
2720                         if (parent == back->parent)
2721                                 return back;
2722                 } else {
2723                         if (node->full_backref)
2724                                 continue;
2725                         if (back->root == root && back->owner == owner &&
2726                             back->offset == offset) {
2727                                 if (found_ref && node->found_ref &&
2728                                     (back->bytes != bytes ||
2729                                     back->disk_bytenr != disk_bytenr))
2730                                         continue;
2731                                 return back;
2732                         }
2733                 }
2734         }
2735         return NULL;
2736 }
2737
2738 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2739                                                 u64 parent, u64 root,
2740                                                 u64 owner, u64 offset,
2741                                                 u64 max_size)
2742 {
2743         struct data_backref *ref = malloc(sizeof(*ref));
2744         memset(&ref->node, 0, sizeof(ref->node));
2745         ref->node.is_data = 1;
2746
2747         if (parent > 0) {
2748                 ref->parent = parent;
2749                 ref->owner = 0;
2750                 ref->offset = 0;
2751                 ref->node.full_backref = 1;
2752         } else {
2753                 ref->root = root;
2754                 ref->owner = owner;
2755                 ref->offset = offset;
2756                 ref->node.full_backref = 0;
2757         }
2758         ref->bytes = max_size;
2759         ref->found_ref = 0;
2760         ref->num_refs = 0;
2761         list_add_tail(&ref->node.list, &rec->backrefs);
2762         if (max_size > rec->max_size)
2763                 rec->max_size = max_size;
2764         return ref;
2765 }
2766
2767 static int add_extent_rec(struct cache_tree *extent_cache,
2768                           struct btrfs_key *parent_key, u64 parent_gen,
2769                           u64 start, u64 nr, u64 extent_item_refs,
2770                           int is_root, int inc_ref, int set_checked,
2771                           int metadata, int extent_rec, u64 max_size)
2772 {
2773         struct extent_record *rec;
2774         struct cache_extent *cache;
2775         int ret = 0;
2776         int dup = 0;
2777
2778         cache = lookup_cache_extent(extent_cache, start, nr);
2779         if (cache) {
2780                 rec = container_of(cache, struct extent_record, cache);
2781                 if (inc_ref)
2782                         rec->refs++;
2783                 if (rec->nr == 1)
2784                         rec->nr = max(nr, max_size);
2785
2786                 /*
2787                  * We need to make sure to reset nr to whatever the extent
2788                  * record says was the real size, this way we can compare it to
2789                  * the backrefs.
2790                  */
2791                 if (extent_rec) {
2792                         if (start != rec->start || rec->found_rec) {
2793                                 struct extent_record *tmp;
2794
2795                                 dup = 1;
2796                                 if (list_empty(&rec->list))
2797                                         list_add_tail(&rec->list,
2798                                                       &duplicate_extents);
2799
2800                                 /*
2801                                  * We have to do this song and dance in case we
2802                                  * find an extent record that falls inside of
2803                                  * our current extent record but does not have
2804                                  * the same objectid.
2805                                  */
2806                                 tmp = malloc(sizeof(*tmp));
2807                                 if (!tmp)
2808                                         return -ENOMEM;
2809                                 tmp->start = start;
2810                                 tmp->max_size = max_size;
2811                                 tmp->nr = nr;
2812                                 tmp->found_rec = 1;
2813                                 tmp->metadata = metadata;
2814                                 tmp->extent_item_refs = extent_item_refs;
2815                                 INIT_LIST_HEAD(&tmp->list);
2816                                 list_add_tail(&tmp->list, &rec->dups);
2817                                 rec->num_duplicates++;
2818                         } else {
2819                                 rec->nr = nr;
2820                                 rec->found_rec = 1;
2821                         }
2822                 }
2823
2824                 if (extent_item_refs && !dup) {
2825                         if (rec->extent_item_refs) {
2826                                 fprintf(stderr, "block %llu rec "
2827                                         "extent_item_refs %llu, passed %llu\n",
2828                                         (unsigned long long)start,
2829                                         (unsigned long long)
2830                                                         rec->extent_item_refs,
2831                                         (unsigned long long)extent_item_refs);
2832                         }
2833                         rec->extent_item_refs = extent_item_refs;
2834                 }
2835                 if (is_root)
2836                         rec->is_root = 1;
2837                 if (set_checked) {
2838                         rec->content_checked = 1;
2839                         rec->owner_ref_checked = 1;
2840                 }
2841
2842                 if (parent_key)
2843                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2844                 if (parent_gen)
2845                         rec->parent_generation = parent_gen;
2846
2847                 if (rec->max_size < max_size)
2848                         rec->max_size = max_size;
2849
2850                 maybe_free_extent_rec(extent_cache, rec);
2851                 return ret;
2852         }
2853         rec = malloc(sizeof(*rec));
2854         rec->start = start;
2855         rec->max_size = max_size;
2856         rec->nr = max(nr, max_size);
2857         rec->found_rec = !!extent_rec;
2858         rec->content_checked = 0;
2859         rec->owner_ref_checked = 0;
2860         rec->num_duplicates = 0;
2861         rec->metadata = metadata;
2862         INIT_LIST_HEAD(&rec->backrefs);
2863         INIT_LIST_HEAD(&rec->dups);
2864         INIT_LIST_HEAD(&rec->list);
2865
2866         if (is_root)
2867                 rec->is_root = 1;
2868         else
2869                 rec->is_root = 0;
2870
2871         if (inc_ref)
2872                 rec->refs = 1;
2873         else
2874                 rec->refs = 0;
2875
2876         if (extent_item_refs)
2877                 rec->extent_item_refs = extent_item_refs;
2878         else
2879                 rec->extent_item_refs = 0;
2880
2881         if (parent_key)
2882                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2883         else
2884                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2885
2886         if (parent_gen)
2887                 rec->parent_generation = parent_gen;
2888         else
2889                 rec->parent_generation = 0;
2890
2891         rec->cache.start = start;
2892         rec->cache.size = nr;
2893         ret = insert_cache_extent(extent_cache, &rec->cache);
2894         BUG_ON(ret);
2895         bytes_used += nr;
2896         if (set_checked) {
2897                 rec->content_checked = 1;
2898                 rec->owner_ref_checked = 1;
2899         }
2900         return ret;
2901 }
2902
2903 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2904                             u64 parent, u64 root, int found_ref)
2905 {
2906         struct extent_record *rec;
2907         struct tree_backref *back;
2908         struct cache_extent *cache;
2909
2910         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2911         if (!cache) {
2912                 add_extent_rec(extent_cache, NULL, 0, bytenr,
2913                                1, 0, 0, 0, 0, 1, 0, 0);
2914                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2915                 if (!cache)
2916                         abort();
2917         }
2918
2919         rec = container_of(cache, struct extent_record, cache);
2920         if (rec->start != bytenr) {
2921                 abort();
2922         }
2923
2924         back = find_tree_backref(rec, parent, root);
2925         if (!back)
2926                 back = alloc_tree_backref(rec, parent, root);
2927
2928         if (found_ref) {
2929                 if (back->node.found_ref) {
2930                         fprintf(stderr, "Extent back ref already exists "
2931                                 "for %llu parent %llu root %llu \n",
2932                                 (unsigned long long)bytenr,
2933                                 (unsigned long long)parent,
2934                                 (unsigned long long)root);
2935                 }
2936                 back->node.found_ref = 1;
2937         } else {
2938                 if (back->node.found_extent_tree) {
2939                         fprintf(stderr, "Extent back ref already exists "
2940                                 "for %llu parent %llu root %llu \n",
2941                                 (unsigned long long)bytenr,
2942                                 (unsigned long long)parent,
2943                                 (unsigned long long)root);
2944                 }
2945                 back->node.found_extent_tree = 1;
2946         }
2947         maybe_free_extent_rec(extent_cache, rec);
2948         return 0;
2949 }
2950
2951 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2952                             u64 parent, u64 root, u64 owner, u64 offset,
2953                             u32 num_refs, int found_ref, u64 max_size)
2954 {
2955         struct extent_record *rec;
2956         struct data_backref *back;
2957         struct cache_extent *cache;
2958
2959         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2960         if (!cache) {
2961                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
2962                                0, 0, max_size);
2963                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2964                 if (!cache)
2965                         abort();
2966         }
2967
2968         rec = container_of(cache, struct extent_record, cache);
2969         if (rec->max_size < max_size)
2970                 rec->max_size = max_size;
2971
2972         /*
2973          * If found_ref is set then max_size is the real size and must match the
2974          * existing refs.  So if we have already found a ref then we need to
2975          * make sure that this ref matches the existing one, otherwise we need
2976          * to add a new backref so we can notice that the backrefs don't match
2977          * and we need to figure out who is telling the truth.  This is to
2978          * account for that awful fsync bug I introduced where we'd end up with
2979          * a btrfs_file_extent_item that would have its length include multiple
2980          * prealloc extents or point inside of a prealloc extent.
2981          */
2982         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2983                                  bytenr, max_size);
2984         if (!back)
2985                 back = alloc_data_backref(rec, parent, root, owner, offset,
2986                                           max_size);
2987
2988         if (found_ref) {
2989                 BUG_ON(num_refs != 1);
2990                 if (back->node.found_ref)
2991                         BUG_ON(back->bytes != max_size);
2992                 back->node.found_ref = 1;
2993                 back->found_ref += 1;
2994                 back->bytes = max_size;
2995                 back->disk_bytenr = bytenr;
2996                 rec->refs += 1;
2997                 rec->content_checked = 1;
2998                 rec->owner_ref_checked = 1;
2999         } else {
3000                 if (back->node.found_extent_tree) {
3001                         fprintf(stderr, "Extent back ref already exists "
3002                                 "for %llu parent %llu root %llu "
3003                                 "owner %llu offset %llu num_refs %lu\n",
3004                                 (unsigned long long)bytenr,
3005                                 (unsigned long long)parent,
3006                                 (unsigned long long)root,
3007                                 (unsigned long long)owner,
3008                                 (unsigned long long)offset,
3009                                 (unsigned long)num_refs);
3010                 }
3011                 back->num_refs = num_refs;
3012                 back->node.found_extent_tree = 1;
3013         }
3014         maybe_free_extent_rec(extent_cache, rec);
3015         return 0;
3016 }
3017
3018 static int add_pending(struct cache_tree *pending,
3019                        struct cache_tree *seen, u64 bytenr, u32 size)
3020 {
3021         int ret;
3022         ret = add_cache_extent(seen, bytenr, size);
3023         if (ret)
3024                 return ret;
3025         add_cache_extent(pending, bytenr, size);
3026         return 0;
3027 }
3028
3029 static int pick_next_pending(struct cache_tree *pending,
3030                         struct cache_tree *reada,
3031                         struct cache_tree *nodes,
3032                         u64 last, struct block_info *bits, int bits_nr,
3033                         int *reada_bits)
3034 {
3035         unsigned long node_start = last;
3036         struct cache_extent *cache;
3037         int ret;
3038
3039         cache = search_cache_extent(reada, 0);
3040         if (cache) {
3041                 bits[0].start = cache->start;
3042                 bits[0].size = cache->size;
3043                 *reada_bits = 1;
3044                 return 1;
3045         }
3046         *reada_bits = 0;
3047         if (node_start > 32768)
3048                 node_start -= 32768;
3049
3050         cache = search_cache_extent(nodes, node_start);
3051         if (!cache)
3052                 cache = search_cache_extent(nodes, 0);
3053
3054         if (!cache) {
3055                  cache = search_cache_extent(pending, 0);
3056                  if (!cache)
3057                          return 0;
3058                  ret = 0;
3059                  do {
3060                          bits[ret].start = cache->start;
3061                          bits[ret].size = cache->size;
3062                          cache = next_cache_extent(cache);
3063                          ret++;
3064                  } while (cache && ret < bits_nr);
3065                  return ret;
3066         }
3067
3068         ret = 0;
3069         do {
3070                 bits[ret].start = cache->start;
3071                 bits[ret].size = cache->size;
3072                 cache = next_cache_extent(cache);
3073                 ret++;
3074         } while (cache && ret < bits_nr);
3075
3076         if (bits_nr - ret > 8) {
3077                 u64 lookup = bits[0].start + bits[0].size;
3078                 struct cache_extent *next;
3079                 next = search_cache_extent(pending, lookup);
3080                 while(next) {
3081                         if (next->start - lookup > 32768)
3082                                 break;
3083                         bits[ret].start = next->start;
3084                         bits[ret].size = next->size;
3085                         lookup = next->start + next->size;
3086                         ret++;
3087                         if (ret == bits_nr)
3088                                 break;
3089                         next = next_cache_extent(next);
3090                         if (!next)
3091                                 break;
3092                 }
3093         }
3094         return ret;
3095 }
3096
3097 static void free_chunk_record(struct cache_extent *cache)
3098 {
3099         struct chunk_record *rec;
3100
3101         rec = container_of(cache, struct chunk_record, cache);
3102         free(rec);
3103 }
3104
3105 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
3106 {
3107         cache_tree_free_extents(chunk_cache, free_chunk_record);
3108 }
3109
3110 static void free_device_record(struct rb_node *node)
3111 {
3112         struct device_record *rec;
3113
3114         rec = container_of(node, struct device_record, node);
3115         free(rec);
3116 }
3117
3118 FREE_RB_BASED_TREE(device_cache, free_device_record);
3119
3120 int insert_block_group_record(struct block_group_tree *tree,
3121                               struct block_group_record *bg_rec)
3122 {
3123         int ret;
3124
3125         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3126         if (ret)
3127                 return ret;
3128
3129         list_add_tail(&bg_rec->list, &tree->block_groups);
3130         return 0;
3131 }
3132
3133 static void free_block_group_record(struct cache_extent *cache)
3134 {
3135         struct block_group_record *rec;
3136
3137         rec = container_of(cache, struct block_group_record, cache);
3138         free(rec);
3139 }
3140
3141 void free_block_group_tree(struct block_group_tree *tree)
3142 {
3143         cache_tree_free_extents(&tree->tree, free_block_group_record);
3144 }
3145
3146 int insert_device_extent_record(struct device_extent_tree *tree,
3147                                 struct device_extent_record *de_rec)
3148 {
3149         int ret;
3150
3151         /*
3152          * Device extent is a bit different from the other extents, because
3153          * the extents which belong to the different devices may have the
3154          * same start and size, so we need use the special extent cache
3155          * search/insert functions.
3156          */
3157         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3158         if (ret)
3159                 return ret;
3160
3161         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3162         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3163         return 0;
3164 }
3165
3166 static void free_device_extent_record(struct cache_extent *cache)
3167 {
3168         struct device_extent_record *rec;
3169
3170         rec = container_of(cache, struct device_extent_record, cache);
3171         free(rec);
3172 }
3173
3174 void free_device_extent_tree(struct device_extent_tree *tree)
3175 {
3176         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3177 }
3178
3179 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3180 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3181                                  struct extent_buffer *leaf, int slot)
3182 {
3183         struct btrfs_extent_ref_v0 *ref0;
3184         struct btrfs_key key;
3185
3186         btrfs_item_key_to_cpu(leaf, &key, slot);
3187         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3188         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3189                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3190         } else {
3191                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3192                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3193         }
3194         return 0;
3195 }
3196 #endif
3197
3198 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3199                                             struct btrfs_key *key,
3200                                             int slot)
3201 {
3202         struct btrfs_chunk *ptr;
3203         struct chunk_record *rec;
3204         int num_stripes, i;
3205
3206         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3207         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3208
3209         rec = malloc(btrfs_chunk_record_size(num_stripes));
3210         if (!rec) {
3211                 fprintf(stderr, "memory allocation failed\n");
3212                 exit(-1);
3213         }
3214
3215         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3216
3217         INIT_LIST_HEAD(&rec->list);
3218         INIT_LIST_HEAD(&rec->dextents);
3219         rec->bg_rec = NULL;
3220
3221         rec->cache.start = key->offset;
3222         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3223
3224         rec->generation = btrfs_header_generation(leaf);
3225
3226         rec->objectid = key->objectid;
3227         rec->type = key->type;
3228         rec->offset = key->offset;
3229
3230         rec->length = rec->cache.size;
3231         rec->owner = btrfs_chunk_owner(leaf, ptr);
3232         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3233         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3234         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3235         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3236         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3237         rec->num_stripes = num_stripes;
3238         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3239
3240         for (i = 0; i < rec->num_stripes; ++i) {
3241                 rec->stripes[i].devid =
3242                         btrfs_stripe_devid_nr(leaf, ptr, i);
3243                 rec->stripes[i].offset =
3244                         btrfs_stripe_offset_nr(leaf, ptr, i);
3245                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3246                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3247                                 BTRFS_UUID_SIZE);
3248         }
3249
3250         return rec;
3251 }
3252
3253 static int process_chunk_item(struct cache_tree *chunk_cache,
3254                               struct btrfs_key *key, struct extent_buffer *eb,
3255                               int slot)
3256 {
3257         struct chunk_record *rec;
3258         int ret = 0;
3259
3260         rec = btrfs_new_chunk_record(eb, key, slot);
3261         ret = insert_cache_extent(chunk_cache, &rec->cache);
3262         if (ret) {
3263                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3264                         rec->offset, rec->length);
3265                 free(rec);
3266         }
3267
3268         return ret;
3269 }
3270
3271 static int process_device_item(struct rb_root *dev_cache,
3272                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3273 {
3274         struct btrfs_dev_item *ptr;
3275         struct device_record *rec;
3276         int ret = 0;
3277
3278         ptr = btrfs_item_ptr(eb,
3279                 slot, struct btrfs_dev_item);
3280
3281         rec = malloc(sizeof(*rec));
3282         if (!rec) {
3283                 fprintf(stderr, "memory allocation failed\n");
3284                 return -ENOMEM;
3285         }
3286
3287         rec->devid = key->offset;
3288         rec->generation = btrfs_header_generation(eb);
3289
3290         rec->objectid = key->objectid;
3291         rec->type = key->type;
3292         rec->offset = key->offset;
3293
3294         rec->devid = btrfs_device_id(eb, ptr);
3295         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3296         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3297
3298         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3299         if (ret) {
3300                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3301                 free(rec);
3302         }
3303
3304         return ret;
3305 }
3306
3307 struct block_group_record *
3308 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3309                              int slot)
3310 {
3311         struct btrfs_block_group_item *ptr;
3312         struct block_group_record *rec;
3313
3314         rec = malloc(sizeof(*rec));
3315         if (!rec) {
3316                 fprintf(stderr, "memory allocation failed\n");
3317                 exit(-1);
3318         }
3319         memset(rec, 0, sizeof(*rec));
3320
3321         rec->cache.start = key->objectid;
3322         rec->cache.size = key->offset;
3323
3324         rec->generation = btrfs_header_generation(leaf);
3325
3326         rec->objectid = key->objectid;
3327         rec->type = key->type;
3328         rec->offset = key->offset;
3329
3330         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3331         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3332
3333         INIT_LIST_HEAD(&rec->list);
3334
3335         return rec;
3336 }
3337
3338 static int process_block_group_item(struct block_group_tree *block_group_cache,
3339                                     struct btrfs_key *key,
3340                                     struct extent_buffer *eb, int slot)
3341 {
3342         struct block_group_record *rec;
3343         int ret = 0;
3344
3345         rec = btrfs_new_block_group_record(eb, key, slot);
3346         ret = insert_block_group_record(block_group_cache, rec);
3347         if (ret) {
3348                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3349                         rec->objectid, rec->offset);
3350                 free(rec);
3351         }
3352
3353         return ret;
3354 }
3355
3356 struct device_extent_record *
3357 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3358                                struct btrfs_key *key, int slot)
3359 {
3360         struct device_extent_record *rec;
3361         struct btrfs_dev_extent *ptr;
3362
3363         rec = malloc(sizeof(*rec));
3364         if (!rec) {
3365                 fprintf(stderr, "memory allocation failed\n");
3366                 exit(-1);
3367         }
3368         memset(rec, 0, sizeof(*rec));
3369
3370         rec->cache.objectid = key->objectid;
3371         rec->cache.start = key->offset;
3372
3373         rec->generation = btrfs_header_generation(leaf);
3374
3375         rec->objectid = key->objectid;
3376         rec->type = key->type;
3377         rec->offset = key->offset;
3378
3379         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3380         rec->chunk_objecteid =
3381                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3382         rec->chunk_offset =
3383                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3384         rec->length = btrfs_dev_extent_length(leaf, ptr);
3385         rec->cache.size = rec->length;
3386
3387         INIT_LIST_HEAD(&rec->chunk_list);
3388         INIT_LIST_HEAD(&rec->device_list);
3389
3390         return rec;
3391 }
3392
3393 static int
3394 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3395                            struct btrfs_key *key, struct extent_buffer *eb,
3396                            int slot)
3397 {
3398         struct device_extent_record *rec;
3399         int ret;
3400
3401         rec = btrfs_new_device_extent_record(eb, key, slot);
3402         ret = insert_device_extent_record(dev_extent_cache, rec);
3403         if (ret) {
3404                 fprintf(stderr,
3405                         "Device extent[%llu, %llu, %llu] existed.\n",
3406                         rec->objectid, rec->offset, rec->length);
3407                 free(rec);
3408         }
3409
3410         return ret;
3411 }
3412
3413 static int process_extent_item(struct btrfs_root *root,
3414                                struct cache_tree *extent_cache,
3415                                struct extent_buffer *eb, int slot)
3416 {
3417         struct btrfs_extent_item *ei;
3418         struct btrfs_extent_inline_ref *iref;
3419         struct btrfs_extent_data_ref *dref;
3420         struct btrfs_shared_data_ref *sref;
3421         struct btrfs_key key;
3422         unsigned long end;
3423         unsigned long ptr;
3424         int type;
3425         u32 item_size = btrfs_item_size_nr(eb, slot);
3426         u64 refs = 0;
3427         u64 offset;
3428         u64 num_bytes;
3429         int metadata = 0;
3430
3431         btrfs_item_key_to_cpu(eb, &key, slot);
3432
3433         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3434                 metadata = 1;
3435                 num_bytes = root->leafsize;
3436         } else {
3437                 num_bytes = key.offset;
3438         }
3439
3440         if (item_size < sizeof(*ei)) {
3441 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3442                 struct btrfs_extent_item_v0 *ei0;
3443                 BUG_ON(item_size != sizeof(*ei0));
3444                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3445                 refs = btrfs_extent_refs_v0(eb, ei0);
3446 #else
3447                 BUG();
3448 #endif
3449                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3450                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3451                                       num_bytes);
3452         }
3453
3454         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3455         refs = btrfs_extent_refs(eb, ei);
3456
3457         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3458                        refs, 0, 0, 0, metadata, 1, num_bytes);
3459
3460         ptr = (unsigned long)(ei + 1);
3461         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3462             key.type == BTRFS_EXTENT_ITEM_KEY)
3463                 ptr += sizeof(struct btrfs_tree_block_info);
3464
3465         end = (unsigned long)ei + item_size;
3466         while (ptr < end) {
3467                 iref = (struct btrfs_extent_inline_ref *)ptr;
3468                 type = btrfs_extent_inline_ref_type(eb, iref);
3469                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3470                 switch (type) {
3471                 case BTRFS_TREE_BLOCK_REF_KEY:
3472                         add_tree_backref(extent_cache, key.objectid,
3473                                          0, offset, 0);
3474                         break;
3475                 case BTRFS_SHARED_BLOCK_REF_KEY:
3476                         add_tree_backref(extent_cache, key.objectid,
3477                                          offset, 0, 0);
3478                         break;
3479                 case BTRFS_EXTENT_DATA_REF_KEY:
3480                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3481                         add_data_backref(extent_cache, key.objectid, 0,
3482                                         btrfs_extent_data_ref_root(eb, dref),
3483                                         btrfs_extent_data_ref_objectid(eb,
3484                                                                        dref),
3485                                         btrfs_extent_data_ref_offset(eb, dref),
3486                                         btrfs_extent_data_ref_count(eb, dref),
3487                                         0, num_bytes);
3488                         break;
3489                 case BTRFS_SHARED_DATA_REF_KEY:
3490                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3491                         add_data_backref(extent_cache, key.objectid, offset,
3492                                         0, 0, 0,
3493                                         btrfs_shared_data_ref_count(eb, sref),
3494                                         0, num_bytes);
3495                         break;
3496                 default:
3497                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3498                                 key.objectid, key.type, num_bytes);
3499                         goto out;
3500                 }
3501                 ptr += btrfs_extent_inline_ref_size(type);
3502         }
3503         WARN_ON(ptr > end);
3504 out:
3505         return 0;
3506 }
3507
3508 static int check_cache_range(struct btrfs_root *root,
3509                              struct btrfs_block_group_cache *cache,
3510                              u64 offset, u64 bytes)
3511 {
3512         struct btrfs_free_space *entry;
3513         u64 *logical;
3514         u64 bytenr;
3515         int stripe_len;
3516         int i, nr, ret;
3517
3518         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3519                 bytenr = btrfs_sb_offset(i);
3520                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3521                                        cache->key.objectid, bytenr, 0,
3522                                        &logical, &nr, &stripe_len);
3523                 if (ret)
3524                         return ret;
3525
3526                 while (nr--) {
3527                         if (logical[nr] + stripe_len <= offset)
3528                                 continue;
3529                         if (offset + bytes <= logical[nr])
3530                                 continue;
3531                         if (logical[nr] == offset) {
3532                                 if (stripe_len >= bytes) {
3533                                         kfree(logical);
3534                                         return 0;
3535                                 }
3536                                 bytes -= stripe_len;
3537                                 offset += stripe_len;
3538                         } else if (logical[nr] < offset) {
3539                                 if (logical[nr] + stripe_len >=
3540                                     offset + bytes) {
3541                                         kfree(logical);
3542                                         return 0;
3543                                 }
3544                                 bytes = (offset + bytes) -
3545                                         (logical[nr] + stripe_len);
3546                                 offset = logical[nr] + stripe_len;
3547                         } else {
3548                                 /*
3549                                  * Could be tricky, the super may land in the
3550                                  * middle of the area we're checking.  First
3551                                  * check the easiest case, it's at the end.
3552                                  */
3553                                 if (logical[nr] + stripe_len >=
3554                                     bytes + offset) {
3555                                         bytes = logical[nr] - offset;
3556                                         continue;
3557                                 }
3558
3559                                 /* Check the left side */
3560                                 ret = check_cache_range(root, cache,
3561                                                         offset,
3562                                                         logical[nr] - offset);
3563                                 if (ret) {
3564                                         kfree(logical);
3565                                         return ret;
3566                                 }
3567
3568                                 /* Now we continue with the right side */
3569                                 bytes = (offset + bytes) -
3570                                         (logical[nr] + stripe_len);
3571                                 offset = logical[nr] + stripe_len;
3572                         }
3573                 }
3574
3575                 kfree(logical);
3576         }
3577
3578         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3579         if (!entry) {
3580                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3581                         offset, offset+bytes);
3582                 return -EINVAL;
3583         }
3584
3585         if (entry->offset != offset) {
3586                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3587                         entry->offset);
3588                 return -EINVAL;
3589         }
3590
3591         if (entry->bytes != bytes) {
3592                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3593                         bytes, entry->bytes, offset);
3594                 return -EINVAL;
3595         }
3596
3597         unlink_free_space(cache->free_space_ctl, entry);
3598         free(entry);
3599         return 0;
3600 }
3601
3602 static int verify_space_cache(struct btrfs_root *root,
3603                               struct btrfs_block_group_cache *cache)
3604 {
3605         struct btrfs_path *path;
3606         struct extent_buffer *leaf;
3607         struct btrfs_key key;
3608         u64 last;
3609         int ret = 0;
3610
3611         path = btrfs_alloc_path();
3612         if (!path)
3613                 return -ENOMEM;
3614
3615         root = root->fs_info->extent_root;
3616
3617         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3618
3619         key.objectid = last;
3620         key.offset = 0;
3621         key.type = BTRFS_EXTENT_ITEM_KEY;
3622
3623         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3624         if (ret < 0)
3625                 goto out;
3626         ret = 0;
3627         while (1) {
3628                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3629                         ret = btrfs_next_leaf(root, path);
3630                         if (ret < 0)
3631                                 goto out;
3632                         if (ret > 0) {
3633                                 ret = 0;
3634                                 break;
3635                         }
3636                 }
3637                 leaf = path->nodes[0];
3638                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3639                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3640                         break;
3641                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3642                     key.type != BTRFS_METADATA_ITEM_KEY) {
3643                         path->slots[0]++;
3644                         continue;
3645                 }
3646
3647                 if (last == key.objectid) {
3648                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3649                                 last = key.objectid + key.offset;
3650                         else
3651                                 last = key.objectid + root->leafsize;
3652                         path->slots[0]++;
3653                         continue;
3654                 }
3655
3656                 ret = check_cache_range(root, cache, last,
3657                                         key.objectid - last);
3658                 if (ret)
3659                         break;
3660                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3661                         last = key.objectid + key.offset;
3662                 else
3663                         last = key.objectid + root->leafsize;
3664                 path->slots[0]++;
3665         }
3666
3667         if (last < cache->key.objectid + cache->key.offset)
3668                 ret = check_cache_range(root, cache, last,
3669                                         cache->key.objectid +
3670                                         cache->key.offset - last);
3671
3672 out:
3673         btrfs_free_path(path);
3674
3675         if (!ret &&
3676             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3677                 fprintf(stderr, "There are still entries left in the space "
3678                         "cache\n");
3679                 ret = -EINVAL;
3680         }
3681
3682         return ret;
3683 }
3684
3685 static int check_space_cache(struct btrfs_root *root)
3686 {
3687         struct btrfs_block_group_cache *cache;
3688         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3689         int ret;
3690         int error = 0;
3691
3692         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3693             btrfs_super_generation(root->fs_info->super_copy) !=
3694             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3695                 printf("cache and super generation don't match, space cache "
3696                        "will be invalidated\n");
3697                 return 0;
3698         }
3699
3700         while (1) {
3701                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3702                 if (!cache)
3703                         break;
3704
3705                 start = cache->key.objectid + cache->key.offset;
3706                 if (!cache->free_space_ctl) {
3707                         if (btrfs_init_free_space_ctl(cache,
3708                                                       root->sectorsize)) {
3709                                 ret = -ENOMEM;
3710                                 break;
3711                         }
3712                 } else {
3713                         btrfs_remove_free_space_cache(cache);
3714                 }
3715
3716                 ret = load_free_space_cache(root->fs_info, cache);
3717                 if (!ret)
3718                         continue;
3719
3720                 ret = verify_space_cache(root, cache);
3721                 if (ret) {
3722                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3723                                 cache->key.objectid);
3724                         error++;
3725                 }
3726         }
3727
3728         return error ? -EINVAL : 0;
3729 }
3730
3731 static int read_extent_data(struct btrfs_root *root, char *data,
3732                         u64 logical, u64 *len, int mirror)
3733 {
3734         u64 offset = 0;
3735         struct btrfs_multi_bio *multi = NULL;
3736         struct btrfs_fs_info *info = root->fs_info;
3737         struct btrfs_device *device;
3738         int ret = 0;
3739         u64 max_len = *len;
3740
3741         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
3742                               &multi, mirror, NULL);
3743         if (ret) {
3744                 fprintf(stderr, "Couldn't map the block %llu\n",
3745                                 logical + offset);
3746                 goto err;
3747         }
3748         device = multi->stripes[0].dev;
3749
3750         if (device->fd == 0)
3751                 goto err;
3752         if (*len > max_len)
3753                 *len = max_len;
3754
3755         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
3756         if (ret != *len)
3757                 ret = -EIO;
3758         else
3759                 ret = 0;
3760 err:
3761         kfree(multi);
3762         return ret;
3763 }
3764
3765 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
3766                         u64 num_bytes, unsigned long leaf_offset,
3767                         struct extent_buffer *eb) {
3768
3769         u64 offset = 0;
3770         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3771         char *data;
3772         unsigned long csum_offset;
3773         u32 csum;
3774         u32 csum_expected;
3775         u64 read_len;
3776         u64 data_checked = 0;
3777         u64 tmp;
3778         int ret = 0;
3779         int mirror;
3780         int num_copies;
3781
3782         if (num_bytes % root->sectorsize)
3783                 return -EINVAL;
3784
3785         data = malloc(num_bytes);
3786         if (!data)
3787                 return -ENOMEM;
3788
3789         while (offset < num_bytes) {
3790                 mirror = 0;
3791 again:
3792                 read_len = num_bytes - offset;
3793                 /* read as much space once a time */
3794                 ret = read_extent_data(root, data + offset,
3795                                 bytenr + offset, &read_len, mirror);
3796                 if (ret)
3797                         goto out;
3798                 data_checked = 0;
3799                 /* verify every 4k data's checksum */
3800                 while (data_checked < read_len) {
3801                         csum = ~(u32)0;
3802                         tmp = offset + data_checked;
3803
3804                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
3805                                                csum, root->sectorsize);
3806                         btrfs_csum_final(csum, (char *)&csum);
3807
3808                         csum_offset = leaf_offset +
3809                                  tmp / root->sectorsize * csum_size;
3810                         read_extent_buffer(eb, (char *)&csum_expected,
3811                                            csum_offset, csum_size);
3812                         /* try another mirror */
3813                         if (csum != csum_expected) {
3814                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
3815                                                 mirror, bytenr + tmp,
3816                                                 csum, csum_expected);
3817                                 num_copies = btrfs_num_copies(
3818                                                 &root->fs_info->mapping_tree,
3819                                                 bytenr, num_bytes);
3820                                 if (mirror < num_copies - 1) {
3821                                         mirror += 1;
3822                                         goto again;
3823                                 }
3824                         }
3825                         data_checked += root->sectorsize;
3826                 }
3827                 offset += read_len;
3828         }
3829 out:
3830         free(data);
3831         return ret;
3832 }
3833
3834 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3835                                u64 num_bytes)
3836 {
3837         struct btrfs_path *path;
3838         struct extent_buffer *leaf;
3839         struct btrfs_key key;
3840         int ret;
3841
3842         path = btrfs_alloc_path();
3843         if (!path) {
3844                 fprintf(stderr, "Error allocing path\n");
3845                 return -ENOMEM;
3846         }
3847
3848         key.objectid = bytenr;
3849         key.type = BTRFS_EXTENT_ITEM_KEY;
3850         key.offset = (u64)-1;
3851
3852 again:
3853         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3854                                 0, 0);
3855         if (ret < 0) {
3856                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3857                 btrfs_free_path(path);
3858                 return ret;
3859         } else if (ret) {
3860                 if (path->slots[0] > 0) {
3861                         path->slots[0]--;
3862                 } else {
3863                         ret = btrfs_prev_leaf(root, path);
3864                         if (ret < 0) {
3865                                 goto out;
3866                         } else if (ret > 0) {
3867                                 ret = 0;
3868                                 goto out;
3869                         }
3870                 }
3871         }
3872
3873         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3874
3875         /*
3876          * Block group items come before extent items if they have the same
3877          * bytenr, so walk back one more just in case.  Dear future traveler,
3878          * first congrats on mastering time travel.  Now if it's not too much
3879          * trouble could you go back to 2006 and tell Chris to make the
3880          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
3881          * EXTENT_ITEM_KEY please?
3882          */
3883         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
3884                 if (path->slots[0] > 0) {
3885                         path->slots[0]--;
3886                 } else {
3887                         ret = btrfs_prev_leaf(root, path);
3888                         if (ret < 0) {
3889                                 goto out;
3890                         } else if (ret > 0) {
3891                                 ret = 0;
3892                                 goto out;
3893                         }
3894                 }
3895                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3896         }
3897
3898         while (num_bytes) {
3899                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3900                         ret = btrfs_next_leaf(root, path);
3901                         if (ret < 0) {
3902                                 fprintf(stderr, "Error going to next leaf "
3903                                         "%d\n", ret);
3904                                 btrfs_free_path(path);
3905                                 return ret;
3906                         } else if (ret) {
3907                                 break;
3908                         }
3909                 }
3910                 leaf = path->nodes[0];
3911                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3912                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3913                         path->slots[0]++;
3914                         continue;
3915                 }
3916                 if (key.objectid + key.offset < bytenr) {
3917                         path->slots[0]++;
3918                         continue;
3919                 }
3920                 if (key.objectid > bytenr + num_bytes)
3921                         break;
3922
3923                 if (key.objectid == bytenr) {
3924                         if (key.offset >= num_bytes) {
3925                                 num_bytes = 0;
3926                                 break;
3927                         }
3928                         num_bytes -= key.offset;
3929                         bytenr += key.offset;
3930                 } else if (key.objectid < bytenr) {
3931                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3932                                 num_bytes = 0;
3933                                 break;
3934                         }
3935                         num_bytes = (bytenr + num_bytes) -
3936                                 (key.objectid + key.offset);
3937                         bytenr = key.objectid + key.offset;
3938                 } else {
3939                         if (key.objectid + key.offset < bytenr + num_bytes) {
3940                                 u64 new_start = key.objectid + key.offset;
3941                                 u64 new_bytes = bytenr + num_bytes - new_start;
3942
3943                                 /*
3944                                  * Weird case, the extent is in the middle of
3945                                  * our range, we'll have to search one side
3946                                  * and then the other.  Not sure if this happens
3947                                  * in real life, but no harm in coding it up
3948                                  * anyway just in case.
3949                                  */
3950                                 btrfs_release_path(path);
3951                                 ret = check_extent_exists(root, new_start,
3952                                                           new_bytes);
3953                                 if (ret) {
3954                                         fprintf(stderr, "Right section didn't "
3955                                                 "have a record\n");
3956                                         break;
3957                                 }
3958                                 num_bytes = key.objectid - bytenr;
3959                                 goto again;
3960                         }
3961                         num_bytes = key.objectid - bytenr;
3962                 }
3963                 path->slots[0]++;
3964         }
3965         ret = 0;
3966
3967 out:
3968         if (num_bytes && !ret) {
3969                 fprintf(stderr, "There are no extents for csum range "
3970                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3971                 ret = 1;
3972         }
3973
3974         btrfs_free_path(path);
3975         return ret;
3976 }
3977
3978 static int check_csums(struct btrfs_root *root)
3979 {
3980         struct btrfs_path *path;
3981         struct extent_buffer *leaf;
3982         struct btrfs_key key;
3983         u64 offset = 0, num_bytes = 0;
3984         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3985         int errors = 0;
3986         int ret;
3987         u64 data_len;
3988         unsigned long leaf_offset;
3989
3990         root = root->fs_info->csum_root;
3991         if (!extent_buffer_uptodate(root->node)) {
3992                 fprintf(stderr, "No valid csum tree found\n");
3993                 return -ENOENT;
3994         }
3995
3996         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3997         key.type = BTRFS_EXTENT_CSUM_KEY;
3998         key.offset = 0;
3999
4000         path = btrfs_alloc_path();
4001         if (!path)
4002                 return -ENOMEM;
4003
4004         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4005         if (ret < 0) {
4006                 fprintf(stderr, "Error searching csum tree %d\n", ret);
4007                 btrfs_free_path(path);
4008                 return ret;
4009         }
4010
4011         if (ret > 0 && path->slots[0])
4012                 path->slots[0]--;
4013         ret = 0;
4014
4015         while (1) {
4016                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4017                         ret = btrfs_next_leaf(root, path);
4018                         if (ret < 0) {
4019                                 fprintf(stderr, "Error going to next leaf "
4020                                         "%d\n", ret);
4021                                 break;
4022                         }
4023                         if (ret)
4024                                 break;
4025                 }
4026                 leaf = path->nodes[0];
4027
4028                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4029                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
4030                         path->slots[0]++;
4031                         continue;
4032                 }
4033
4034                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
4035                               csum_size) * root->sectorsize;
4036                 if (!check_data_csum)
4037                         goto skip_csum_check;
4038                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
4039                 ret = check_extent_csums(root, key.offset, data_len,
4040                                          leaf_offset, leaf);
4041                 if (ret)
4042                         break;
4043 skip_csum_check:
4044                 if (!num_bytes) {
4045                         offset = key.offset;
4046                 } else if (key.offset != offset + num_bytes) {
4047                         ret = check_extent_exists(root, offset, num_bytes);
4048                         if (ret) {
4049                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
4050                                         "there is no extent record\n",
4051                                         offset, offset+num_bytes);
4052                                 errors++;
4053                         }
4054                         offset = key.offset;
4055                         num_bytes = 0;
4056                 }
4057                 num_bytes += data_len;
4058                 path->slots[0]++;
4059         }
4060
4061         btrfs_free_path(path);
4062         return errors;
4063 }
4064
4065 static int is_dropped_key(struct btrfs_key *key,
4066                           struct btrfs_key *drop_key) {
4067         if (key->objectid < drop_key->objectid)
4068                 return 1;
4069         else if (key->objectid == drop_key->objectid) {
4070                 if (key->type < drop_key->type)
4071                         return 1;
4072                 else if (key->type == drop_key->type) {
4073                         if (key->offset < drop_key->offset)
4074                                 return 1;
4075                 }
4076         }
4077         return 0;
4078 }
4079
4080 static int run_next_block(struct btrfs_trans_handle *trans,
4081                           struct btrfs_root *root,
4082                           struct block_info *bits,
4083                           int bits_nr,
4084                           u64 *last,
4085                           struct cache_tree *pending,
4086                           struct cache_tree *seen,
4087                           struct cache_tree *reada,
4088                           struct cache_tree *nodes,
4089                           struct cache_tree *extent_cache,
4090                           struct cache_tree *chunk_cache,
4091                           struct rb_root *dev_cache,
4092                           struct block_group_tree *block_group_cache,
4093                           struct device_extent_tree *dev_extent_cache,
4094                           struct btrfs_root_item *ri)
4095 {
4096         struct extent_buffer *buf;
4097         u64 bytenr;
4098         u32 size;
4099         u64 parent;
4100         u64 owner;
4101         u64 flags;
4102         u64 ptr;
4103         u64 gen = 0;
4104         int ret = 0;
4105         int i;
4106         int nritems;
4107         struct btrfs_key key;
4108         struct cache_extent *cache;
4109         int reada_bits;
4110
4111         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
4112                                     bits_nr, &reada_bits);
4113         if (nritems == 0)
4114                 return 1;
4115
4116         if (!reada_bits) {
4117                 for(i = 0; i < nritems; i++) {
4118                         ret = add_cache_extent(reada, bits[i].start,
4119                                                bits[i].size);
4120                         if (ret == -EEXIST)
4121                                 continue;
4122
4123                         /* fixme, get the parent transid */
4124                         readahead_tree_block(root, bits[i].start,
4125                                              bits[i].size, 0);
4126                 }
4127         }
4128         *last = bits[0].start;
4129         bytenr = bits[0].start;
4130         size = bits[0].size;
4131
4132         cache = lookup_cache_extent(pending, bytenr, size);
4133         if (cache) {
4134                 remove_cache_extent(pending, cache);
4135                 free(cache);
4136         }
4137         cache = lookup_cache_extent(reada, bytenr, size);
4138         if (cache) {
4139                 remove_cache_extent(reada, cache);
4140                 free(cache);
4141         }
4142         cache = lookup_cache_extent(nodes, bytenr, size);
4143         if (cache) {
4144                 remove_cache_extent(nodes, cache);
4145                 free(cache);
4146         }
4147         cache = lookup_cache_extent(extent_cache, bytenr, size);
4148         if (cache) {
4149                 struct extent_record *rec;
4150
4151                 rec = container_of(cache, struct extent_record, cache);
4152                 gen = rec->parent_generation;
4153         }
4154
4155         /* fixme, get the real parent transid */
4156         buf = read_tree_block(root, bytenr, size, gen);
4157         if (!extent_buffer_uptodate(buf)) {
4158                 record_bad_block_io(root->fs_info,
4159                                     extent_cache, bytenr, size);
4160                 goto out;
4161         }
4162
4163         nritems = btrfs_header_nritems(buf);
4164
4165         /*
4166          * FIXME, this only works only if we don't have any full
4167          * backref mode.
4168          */
4169         if (!init_extent_tree) {
4170                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4171                                        btrfs_header_level(buf), 1, NULL,
4172                                        &flags);
4173                 if (ret < 0)
4174                         flags = 0;
4175         } else {
4176                 flags = 0;
4177         }
4178
4179         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4180                 parent = bytenr;
4181                 owner = 0;
4182         } else {
4183                 parent = 0;
4184                 owner = btrfs_header_owner(buf);
4185         }
4186
4187         ret = check_block(trans, root, extent_cache, buf, flags);
4188         if (ret)
4189                 goto out;
4190
4191         if (btrfs_is_leaf(buf)) {
4192                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4193                 for (i = 0; i < nritems; i++) {
4194                         struct btrfs_file_extent_item *fi;
4195                         btrfs_item_key_to_cpu(buf, &key, i);
4196                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4197                                 process_extent_item(root, extent_cache, buf,
4198                                                     i);
4199                                 continue;
4200                         }
4201                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4202                                 process_extent_item(root, extent_cache, buf,
4203                                                     i);
4204                                 continue;
4205                         }
4206                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4207                                 total_csum_bytes +=
4208                                         btrfs_item_size_nr(buf, i);
4209                                 continue;
4210                         }
4211                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4212                                 process_chunk_item(chunk_cache, &key, buf, i);
4213                                 continue;
4214                         }
4215                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4216                                 process_device_item(dev_cache, &key, buf, i);
4217                                 continue;
4218                         }
4219                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4220                                 process_block_group_item(block_group_cache,
4221                                         &key, buf, i);
4222                                 continue;
4223                         }
4224                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4225                                 process_device_extent_item(dev_extent_cache,
4226                                         &key, buf, i);
4227                                 continue;
4228
4229                         }
4230                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4231 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4232                                 process_extent_ref_v0(extent_cache, buf, i);
4233 #else
4234                                 BUG();
4235 #endif
4236                                 continue;
4237                         }
4238
4239                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4240                                 add_tree_backref(extent_cache, key.objectid, 0,
4241                                                  key.offset, 0);
4242                                 continue;
4243                         }
4244                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4245                                 add_tree_backref(extent_cache, key.objectid,
4246                                                  key.offset, 0, 0);
4247                                 continue;
4248                         }
4249                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4250                                 struct btrfs_extent_data_ref *ref;
4251                                 ref = btrfs_item_ptr(buf, i,
4252                                                 struct btrfs_extent_data_ref);
4253                                 add_data_backref(extent_cache,
4254                                         key.objectid, 0,
4255                                         btrfs_extent_data_ref_root(buf, ref),
4256                                         btrfs_extent_data_ref_objectid(buf,
4257                                                                        ref),
4258                                         btrfs_extent_data_ref_offset(buf, ref),
4259                                         btrfs_extent_data_ref_count(buf, ref),
4260                                         0, root->sectorsize);
4261                                 continue;
4262                         }
4263                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4264                                 struct btrfs_shared_data_ref *ref;
4265                                 ref = btrfs_item_ptr(buf, i,
4266                                                 struct btrfs_shared_data_ref);
4267                                 add_data_backref(extent_cache,
4268                                         key.objectid, key.offset, 0, 0, 0,
4269                                         btrfs_shared_data_ref_count(buf, ref),
4270                                         0, root->sectorsize);
4271                                 continue;
4272                         }
4273                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4274                                 struct bad_item *bad;
4275
4276                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4277                                         continue;
4278                                 if (!owner)
4279                                         continue;
4280                                 bad = malloc(sizeof(struct bad_item));
4281                                 if (!bad)
4282                                         continue;
4283                                 INIT_LIST_HEAD(&bad->list);
4284                                 memcpy(&bad->key, &key,
4285                                        sizeof(struct btrfs_key));
4286                                 bad->root_id = owner;
4287                                 list_add_tail(&bad->list, &delete_items);
4288                                 continue;
4289                         }
4290                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4291                                 continue;
4292                         fi = btrfs_item_ptr(buf, i,
4293                                             struct btrfs_file_extent_item);
4294                         if (btrfs_file_extent_type(buf, fi) ==
4295                             BTRFS_FILE_EXTENT_INLINE)
4296                                 continue;
4297                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4298                                 continue;
4299
4300                         data_bytes_allocated +=
4301                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4302                         if (data_bytes_allocated < root->sectorsize) {
4303                                 abort();
4304                         }
4305                         data_bytes_referenced +=
4306                                 btrfs_file_extent_num_bytes(buf, fi);
4307                         add_data_backref(extent_cache,
4308                                 btrfs_file_extent_disk_bytenr(buf, fi),
4309                                 parent, owner, key.objectid, key.offset -
4310                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4311                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4312                 }
4313         } else {
4314                 int level;
4315                 struct btrfs_key first_key;
4316
4317                 first_key.objectid = 0;
4318
4319                 if (nritems > 0)
4320                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4321                 level = btrfs_header_level(buf);
4322                 for (i = 0; i < nritems; i++) {
4323                         ptr = btrfs_node_blockptr(buf, i);
4324                         size = btrfs_level_size(root, level - 1);
4325                         btrfs_node_key_to_cpu(buf, &key, i);
4326                         if (ri != NULL) {
4327                                 struct btrfs_key drop_key;
4328                                 btrfs_disk_key_to_cpu(&drop_key,
4329                                                       &ri->drop_progress);
4330                                 if ((level == ri->drop_level)
4331                                     && is_dropped_key(&key, &drop_key)) {
4332                                         continue;
4333                                 }
4334                         }
4335                         ret = add_extent_rec(extent_cache, &key,
4336                                              btrfs_node_ptr_generation(buf, i),
4337                                              ptr, size, 0, 0, 1, 0, 1, 0,
4338                                              size);
4339                         BUG_ON(ret);
4340
4341                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4342
4343                         if (level > 1) {
4344                                 add_pending(nodes, seen, ptr, size);
4345                         } else {
4346                                 add_pending(pending, seen, ptr, size);
4347                         }
4348                 }
4349                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4350                                       nritems) * sizeof(struct btrfs_key_ptr);
4351         }
4352         total_btree_bytes += buf->len;
4353         if (fs_root_objectid(btrfs_header_owner(buf)))
4354                 total_fs_tree_bytes += buf->len;
4355         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4356                 total_extent_tree_bytes += buf->len;
4357         if (!found_old_backref &&
4358             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4359             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4360             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4361                 found_old_backref = 1;
4362 out:
4363         free_extent_buffer(buf);
4364         return ret;
4365 }
4366
4367 static int add_root_to_pending(struct extent_buffer *buf,
4368                                struct cache_tree *extent_cache,
4369                                struct cache_tree *pending,
4370                                struct cache_tree *seen,
4371                                struct cache_tree *nodes,
4372                                struct btrfs_key *root_key)
4373 {
4374         if (btrfs_header_level(buf) > 0)
4375                 add_pending(nodes, seen, buf->start, buf->len);
4376         else
4377                 add_pending(pending, seen, buf->start, buf->len);
4378         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4379                        0, 1, 1, 0, 1, 0, buf->len);
4380
4381         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4382             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4383                 add_tree_backref(extent_cache, buf->start, buf->start,
4384                                  0, 1);
4385         else
4386                 add_tree_backref(extent_cache, buf->start, 0,
4387                                  root_key->objectid, 1);
4388         return 0;
4389 }
4390
4391 /* as we fix the tree, we might be deleting blocks that
4392  * we're tracking for repair.  This hook makes sure we
4393  * remove any backrefs for blocks as we are fixing them.
4394  */
4395 static int free_extent_hook(struct btrfs_trans_handle *trans,
4396                             struct btrfs_root *root,
4397                             u64 bytenr, u64 num_bytes, u64 parent,
4398                             u64 root_objectid, u64 owner, u64 offset,
4399                             int refs_to_drop)
4400 {
4401         struct extent_record *rec;
4402         struct cache_extent *cache;
4403         int is_data;
4404         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4405
4406         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4407         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4408         if (!cache)
4409                 return 0;
4410
4411         rec = container_of(cache, struct extent_record, cache);
4412         if (is_data) {
4413                 struct data_backref *back;
4414                 back = find_data_backref(rec, parent, root_objectid, owner,
4415                                          offset, 1, bytenr, num_bytes);
4416                 if (!back)
4417                         goto out;
4418                 if (back->node.found_ref) {
4419                         back->found_ref -= refs_to_drop;
4420                         if (rec->refs)
4421                                 rec->refs -= refs_to_drop;
4422                 }
4423                 if (back->node.found_extent_tree) {
4424                         back->num_refs -= refs_to_drop;
4425                         if (rec->extent_item_refs)
4426                                 rec->extent_item_refs -= refs_to_drop;
4427                 }
4428                 if (back->found_ref == 0)
4429                         back->node.found_ref = 0;
4430                 if (back->num_refs == 0)
4431                         back->node.found_extent_tree = 0;
4432
4433                 if (!back->node.found_extent_tree && back->node.found_ref) {
4434                         list_del(&back->node.list);
4435                         free(back);
4436                 }
4437         } else {
4438                 struct tree_backref *back;
4439                 back = find_tree_backref(rec, parent, root_objectid);
4440                 if (!back)
4441                         goto out;
4442                 if (back->node.found_ref) {
4443                         if (rec->refs)
4444                                 rec->refs--;
4445                         back->node.found_ref = 0;
4446                 }
4447                 if (back->node.found_extent_tree) {
4448                         if (rec->extent_item_refs)
4449                                 rec->extent_item_refs--;
4450                         back->node.found_extent_tree = 0;
4451                 }
4452                 if (!back->node.found_extent_tree && back->node.found_ref) {
4453                         list_del(&back->node.list);
4454                         free(back);
4455                 }
4456         }
4457         maybe_free_extent_rec(extent_cache, rec);
4458 out:
4459         return 0;
4460 }
4461
4462 static int delete_extent_records(struct btrfs_trans_handle *trans,
4463                                  struct btrfs_root *root,
4464                                  struct btrfs_path *path,
4465                                  u64 bytenr, u64 new_len)
4466 {
4467         struct btrfs_key key;
4468         struct btrfs_key found_key;
4469         struct extent_buffer *leaf;
4470         int ret;
4471         int slot;
4472
4473
4474         key.objectid = bytenr;
4475         key.type = (u8)-1;
4476         key.offset = (u64)-1;
4477
4478         while(1) {
4479                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4480                                         &key, path, 0, 1);
4481                 if (ret < 0)
4482                         break;
4483
4484                 if (ret > 0) {
4485                         ret = 0;
4486                         if (path->slots[0] == 0)
4487                                 break;
4488                         path->slots[0]--;
4489                 }
4490                 ret = 0;
4491
4492                 leaf = path->nodes[0];
4493                 slot = path->slots[0];
4494
4495                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4496                 if (found_key.objectid != bytenr)
4497                         break;
4498
4499                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4500                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4501                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4502                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4503                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4504                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4505                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4506                         btrfs_release_path(path);
4507                         if (found_key.type == 0) {
4508                                 if (found_key.offset == 0)
4509                                         break;
4510                                 key.offset = found_key.offset - 1;
4511                                 key.type = found_key.type;
4512                         }
4513                         key.type = found_key.type - 1;
4514                         key.offset = (u64)-1;
4515                         continue;
4516                 }
4517
4518                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4519                         found_key.objectid, found_key.type, found_key.offset);
4520
4521                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4522                 if (ret)
4523                         break;
4524                 btrfs_release_path(path);
4525
4526                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4527                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4528                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4529                                 found_key.offset : root->leafsize;
4530
4531                         ret = btrfs_update_block_group(trans, root, bytenr,
4532                                                        bytes, 0, 0);
4533                         if (ret)
4534                                 break;
4535                 }
4536         }
4537
4538         btrfs_release_path(path);
4539         return ret;
4540 }
4541
4542 /*
4543  * for a single backref, this will allocate a new extent
4544  * and add the backref to it.
4545  */
4546 static int record_extent(struct btrfs_trans_handle *trans,
4547                          struct btrfs_fs_info *info,
4548                          struct btrfs_path *path,
4549                          struct extent_record *rec,
4550                          struct extent_backref *back,
4551                          int allocated, u64 flags)
4552 {
4553         int ret;
4554         struct btrfs_root *extent_root = info->extent_root;
4555         struct extent_buffer *leaf;
4556         struct btrfs_key ins_key;
4557         struct btrfs_extent_item *ei;
4558         struct tree_backref *tback;
4559         struct data_backref *dback;
4560         struct btrfs_tree_block_info *bi;
4561
4562         if (!back->is_data)
4563                 rec->max_size = max_t(u64, rec->max_size,
4564                                     info->extent_root->leafsize);
4565
4566         if (!allocated) {
4567                 u32 item_size = sizeof(*ei);
4568
4569                 if (!back->is_data)
4570                         item_size += sizeof(*bi);
4571
4572                 ins_key.objectid = rec->start;
4573                 ins_key.offset = rec->max_size;
4574                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4575
4576                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4577                                         &ins_key, item_size);
4578                 if (ret)
4579                         goto fail;
4580
4581                 leaf = path->nodes[0];
4582                 ei = btrfs_item_ptr(leaf, path->slots[0],
4583                                     struct btrfs_extent_item);
4584
4585                 btrfs_set_extent_refs(leaf, ei, 0);
4586                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4587
4588                 if (back->is_data) {
4589                         btrfs_set_extent_flags(leaf, ei,
4590                                                BTRFS_EXTENT_FLAG_DATA);
4591                 } else {
4592                         struct btrfs_disk_key copy_key;;
4593
4594                         tback = (struct tree_backref *)back;
4595                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4596                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4597                                              sizeof(*bi));
4598
4599                         btrfs_set_disk_key_objectid(&copy_key,
4600                                                     rec->info_objectid);
4601                         btrfs_set_disk_key_type(&copy_key, 0);
4602                         btrfs_set_disk_key_offset(&copy_key, 0);
4603
4604                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4605                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4606
4607                         btrfs_set_extent_flags(leaf, ei,
4608                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4609                 }
4610
4611                 btrfs_mark_buffer_dirty(leaf);
4612                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4613                                                rec->max_size, 1, 0);
4614                 if (ret)
4615                         goto fail;
4616                 btrfs_release_path(path);
4617         }
4618
4619         if (back->is_data) {
4620                 u64 parent;
4621                 int i;
4622
4623                 dback = (struct data_backref *)back;
4624                 if (back->full_backref)
4625                         parent = dback->parent;
4626                 else
4627                         parent = 0;
4628
4629                 for (i = 0; i < dback->found_ref; i++) {
4630                         /* if parent != 0, we're doing a full backref
4631                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4632                          * just makes the backref allocator create a data
4633                          * backref
4634                          */
4635                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4636                                                    rec->start, rec->max_size,
4637                                                    parent,
4638                                                    dback->root,
4639                                                    parent ?
4640                                                    BTRFS_FIRST_FREE_OBJECTID :
4641                                                    dback->owner,
4642                                                    dback->offset);
4643                         if (ret)
4644                                 break;
4645                 }
4646                 fprintf(stderr, "adding new data backref"
4647                                 " on %llu %s %llu owner %llu"
4648                                 " offset %llu found %d\n",
4649                                 (unsigned long long)rec->start,
4650                                 back->full_backref ?
4651                                 "parent" : "root",
4652                                 back->full_backref ?
4653                                 (unsigned long long)parent :
4654                                 (unsigned long long)dback->root,
4655                                 (unsigned long long)dback->owner,
4656                                 (unsigned long long)dback->offset,
4657                                 dback->found_ref);
4658         } else {
4659                 u64 parent;
4660
4661                 tback = (struct tree_backref *)back;
4662                 if (back->full_backref)
4663                         parent = tback->parent;
4664                 else
4665                         parent = 0;
4666
4667                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4668                                            rec->start, rec->max_size,
4669                                            parent, tback->root, 0, 0);
4670                 fprintf(stderr, "adding new tree backref on "
4671                         "start %llu len %llu parent %llu root %llu\n",
4672                         rec->start, rec->max_size, tback->parent, tback->root);
4673         }
4674         if (ret)
4675                 goto fail;
4676 fail:
4677         btrfs_release_path(path);
4678         return ret;
4679 }
4680
4681 struct extent_entry {
4682         u64 bytenr;
4683         u64 bytes;
4684         int count;
4685         int broken;
4686         struct list_head list;
4687 };
4688
4689 static struct extent_entry *find_entry(struct list_head *entries,
4690                                        u64 bytenr, u64 bytes)
4691 {
4692         struct extent_entry *entry = NULL;
4693
4694         list_for_each_entry(entry, entries, list) {
4695                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4696                         return entry;
4697         }
4698
4699         return NULL;
4700 }
4701
4702 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4703 {
4704         struct extent_entry *entry, *best = NULL, *prev = NULL;
4705
4706         list_for_each_entry(entry, entries, list) {
4707                 if (!prev) {
4708                         prev = entry;
4709                         continue;
4710                 }
4711
4712                 /*
4713                  * If there are as many broken entries as entries then we know
4714                  * not to trust this particular entry.
4715                  */
4716                 if (entry->broken == entry->count)
4717                         continue;
4718
4719                 /*
4720                  * If our current entry == best then we can't be sure our best
4721                  * is really the best, so we need to keep searching.
4722                  */
4723                 if (best && best->count == entry->count) {
4724                         prev = entry;
4725                         best = NULL;
4726                         continue;
4727                 }
4728
4729                 /* Prev == entry, not good enough, have to keep searching */
4730                 if (!prev->broken && prev->count == entry->count)
4731                         continue;
4732
4733                 if (!best)
4734                         best = (prev->count > entry->count) ? prev : entry;
4735                 else if (best->count < entry->count)
4736                         best = entry;
4737                 prev = entry;
4738         }
4739
4740         return best;
4741 }
4742
4743 static int repair_ref(struct btrfs_trans_handle *trans,
4744                       struct btrfs_fs_info *info, struct btrfs_path *path,
4745                       struct data_backref *dback, struct extent_entry *entry)
4746 {
4747         struct btrfs_root *root;
4748         struct btrfs_file_extent_item *fi;
4749         struct extent_buffer *leaf;
4750         struct btrfs_key key;
4751         u64 bytenr, bytes;
4752         int ret;
4753
4754         key.objectid = dback->root;
4755         key.type = BTRFS_ROOT_ITEM_KEY;
4756         key.offset = (u64)-1;
4757         root = btrfs_read_fs_root(info, &key);
4758         if (IS_ERR(root)) {
4759                 fprintf(stderr, "Couldn't find root for our ref\n");
4760                 return -EINVAL;
4761         }
4762
4763         /*
4764          * The backref points to the original offset of the extent if it was
4765          * split, so we need to search down to the offset we have and then walk
4766          * forward until we find the backref we're looking for.
4767          */
4768         key.objectid = dback->owner;
4769         key.type = BTRFS_EXTENT_DATA_KEY;
4770         key.offset = dback->offset;
4771         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4772         if (ret < 0) {
4773                 fprintf(stderr, "Error looking up ref %d\n", ret);
4774                 return ret;
4775         }
4776
4777         while (1) {
4778                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4779                         ret = btrfs_next_leaf(root, path);
4780                         if (ret) {
4781                                 fprintf(stderr, "Couldn't find our ref, next\n");
4782                                 return -EINVAL;
4783                         }
4784                 }
4785                 leaf = path->nodes[0];
4786                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4787                 if (key.objectid != dback->owner ||
4788                     key.type != BTRFS_EXTENT_DATA_KEY) {
4789                         fprintf(stderr, "Couldn't find our ref, search\n");
4790                         return -EINVAL;
4791                 }
4792                 fi = btrfs_item_ptr(leaf, path->slots[0],
4793                                     struct btrfs_file_extent_item);
4794                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4795                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4796
4797                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4798                         break;
4799                 path->slots[0]++;
4800         }
4801
4802         btrfs_release_path(path);
4803
4804         /*
4805          * Have to make sure that this root gets updated when we commit the
4806          * transaction
4807          */
4808         record_root_in_trans(trans, root);
4809
4810         /*
4811          * Ok we have the key of the file extent we want to fix, now we can cow
4812          * down to the thing and fix it.
4813          */
4814         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4815         if (ret < 0) {
4816                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4817                         key.objectid, key.type, key.offset, ret);
4818                 return ret;
4819         }
4820         if (ret > 0) {
4821                 fprintf(stderr, "Well that's odd, we just found this key "
4822                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4823                         key.offset);
4824                 return -EINVAL;
4825         }
4826         leaf = path->nodes[0];
4827         fi = btrfs_item_ptr(leaf, path->slots[0],
4828                             struct btrfs_file_extent_item);
4829
4830         if (btrfs_file_extent_compression(leaf, fi) &&
4831             dback->disk_bytenr != entry->bytenr) {
4832                 fprintf(stderr, "Ref doesn't match the record start and is "
4833                         "compressed, please take a btrfs-image of this file "
4834                         "system and send it to a btrfs developer so they can "
4835                         "complete this functionality for bytenr %Lu\n",
4836                         dback->disk_bytenr);
4837                 return -EINVAL;
4838         }
4839
4840         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4841                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4842         } else if (dback->disk_bytenr > entry->bytenr) {
4843                 u64 off_diff, offset;
4844
4845                 off_diff = dback->disk_bytenr - entry->bytenr;
4846                 offset = btrfs_file_extent_offset(leaf, fi);
4847                 if (dback->disk_bytenr + offset +
4848                     btrfs_file_extent_num_bytes(leaf, fi) >
4849                     entry->bytenr + entry->bytes) {
4850                         fprintf(stderr, "Ref is past the entry end, please "
4851                                 "take a btrfs-image of this file system and "
4852                                 "send it to a btrfs developer, ref %Lu\n",
4853                                 dback->disk_bytenr);
4854                         return -EINVAL;
4855                 }
4856                 offset += off_diff;
4857                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4858                 btrfs_set_file_extent_offset(leaf, fi, offset);
4859         } else if (dback->disk_bytenr < entry->bytenr) {
4860                 u64 offset;
4861
4862                 offset = btrfs_file_extent_offset(leaf, fi);
4863                 if (dback->disk_bytenr + offset < entry->bytenr) {
4864                         fprintf(stderr, "Ref is before the entry start, please"
4865                                 " take a btrfs-image of this file system and "
4866                                 "send it to a btrfs developer, ref %Lu\n",
4867                                 dback->disk_bytenr);
4868                         return -EINVAL;
4869                 }
4870
4871                 offset += dback->disk_bytenr;
4872                 offset -= entry->bytenr;
4873                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4874                 btrfs_set_file_extent_offset(leaf, fi, offset);
4875         }
4876
4877         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4878
4879         /*
4880          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4881          * only do this if we aren't using compression, otherwise it's a
4882          * trickier case.
4883          */
4884         if (!btrfs_file_extent_compression(leaf, fi))
4885                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4886         else
4887                 printf("ram bytes may be wrong?\n");
4888         btrfs_mark_buffer_dirty(leaf);
4889         btrfs_release_path(path);
4890         return 0;
4891 }
4892
4893 static int verify_backrefs(struct btrfs_trans_handle *trans,
4894                            struct btrfs_fs_info *info, struct btrfs_path *path,
4895                            struct extent_record *rec)
4896 {
4897         struct extent_backref *back;
4898         struct data_backref *dback;
4899         struct extent_entry *entry, *best = NULL;
4900         LIST_HEAD(entries);
4901         int nr_entries = 0;
4902         int broken_entries = 0;
4903         int ret = 0;
4904         short mismatch = 0;
4905
4906         /*
4907          * Metadata is easy and the backrefs should always agree on bytenr and
4908          * size, if not we've got bigger issues.
4909          */
4910         if (rec->metadata)
4911                 return 0;
4912
4913         list_for_each_entry(back, &rec->backrefs, list) {
4914                 dback = (struct data_backref *)back;
4915                 /*
4916                  * We only pay attention to backrefs that we found a real
4917                  * backref for.
4918                  */
4919                 if (dback->found_ref == 0)
4920                         continue;
4921                 if (back->full_backref)
4922                         continue;
4923
4924                 /*
4925                  * For now we only catch when the bytes don't match, not the
4926                  * bytenr.  We can easily do this at the same time, but I want
4927                  * to have a fs image to test on before we just add repair
4928                  * functionality willy-nilly so we know we won't screw up the
4929                  * repair.
4930                  */
4931
4932                 entry = find_entry(&entries, dback->disk_bytenr,
4933                                    dback->bytes);
4934                 if (!entry) {
4935                         entry = malloc(sizeof(struct extent_entry));
4936                         if (!entry) {
4937                                 ret = -ENOMEM;
4938                                 goto out;
4939                         }
4940                         memset(entry, 0, sizeof(*entry));
4941                         entry->bytenr = dback->disk_bytenr;
4942                         entry->bytes = dback->bytes;
4943                         list_add_tail(&entry->list, &entries);
4944                         nr_entries++;
4945                 }
4946
4947                 /*
4948                  * If we only have on entry we may think the entries agree when
4949                  * in reality they don't so we have to do some extra checking.
4950                  */
4951                 if (dback->disk_bytenr != rec->start ||
4952                     dback->bytes != rec->nr || back->broken)
4953                         mismatch = 1;
4954
4955                 if (back->broken) {
4956                         entry->broken++;
4957                         broken_entries++;
4958                 }
4959
4960                 entry->count++;
4961         }
4962
4963         /* Yay all the backrefs agree, carry on good sir */
4964         if (nr_entries <= 1 && !mismatch)
4965                 goto out;
4966
4967         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4968                 "%Lu\n", rec->start);
4969
4970         /*
4971          * First we want to see if the backrefs can agree amongst themselves who
4972          * is right, so figure out which one of the entries has the highest
4973          * count.
4974          */
4975         best = find_most_right_entry(&entries);
4976
4977         /*
4978          * Ok so we may have an even split between what the backrefs think, so
4979          * this is where we use the extent ref to see what it thinks.
4980          */
4981         if (!best) {
4982                 entry = find_entry(&entries, rec->start, rec->nr);
4983                 if (!entry && (!broken_entries || !rec->found_rec)) {
4984                         fprintf(stderr, "Backrefs don't agree with each other "
4985                                 "and extent record doesn't agree with anybody,"
4986                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4987                                 rec->start, rec->nr);
4988                         ret = -EINVAL;
4989                         goto out;
4990                 } else if (!entry) {
4991                         /*
4992                          * Ok our backrefs were broken, we'll assume this is the
4993                          * correct value and add an entry for this range.
4994                          */
4995                         entry = malloc(sizeof(struct extent_entry));
4996                         if (!entry) {
4997                                 ret = -ENOMEM;
4998                                 goto out;
4999                         }
5000                         memset(entry, 0, sizeof(*entry));
5001                         entry->bytenr = rec->start;
5002                         entry->bytes = rec->nr;
5003                         list_add_tail(&entry->list, &entries);
5004                         nr_entries++;
5005                 }
5006                 entry->count++;
5007                 best = find_most_right_entry(&entries);
5008                 if (!best) {
5009                         fprintf(stderr, "Backrefs and extent record evenly "
5010                                 "split on who is right, this is going to "
5011                                 "require user input to fix bytenr %Lu bytes "
5012                                 "%Lu\n", rec->start, rec->nr);
5013                         ret = -EINVAL;
5014                         goto out;
5015                 }
5016         }
5017
5018         /*
5019          * I don't think this can happen currently as we'll abort() if we catch
5020          * this case higher up, but in case somebody removes that we still can't
5021          * deal with it properly here yet, so just bail out of that's the case.
5022          */
5023         if (best->bytenr != rec->start) {
5024                 fprintf(stderr, "Extent start and backref starts don't match, "
5025                         "please use btrfs-image on this file system and send "
5026                         "it to a btrfs developer so they can make fsck fix "
5027                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
5028                         rec->start, rec->nr);
5029                 ret = -EINVAL;
5030                 goto out;
5031         }
5032
5033         /*
5034          * Ok great we all agreed on an extent record, let's go find the real
5035          * references and fix up the ones that don't match.
5036          */
5037         list_for_each_entry(back, &rec->backrefs, list) {
5038                 dback = (struct data_backref *)back;
5039
5040                 /*
5041                  * Still ignoring backrefs that don't have a real ref attached
5042                  * to them.
5043                  */
5044                 if (dback->found_ref == 0)
5045                         continue;
5046                 if (back->full_backref)
5047                         continue;
5048
5049                 if (dback->bytes == best->bytes &&
5050                     dback->disk_bytenr == best->bytenr)
5051                         continue;
5052
5053                 ret = repair_ref(trans, info, path, dback, best);
5054                 if (ret)
5055                         goto out;
5056         }
5057
5058         /*
5059          * Ok we messed with the actual refs, which means we need to drop our
5060          * entire cache and go back and rescan.  I know this is a huge pain and
5061          * adds a lot of extra work, but it's the only way to be safe.  Once all
5062          * the backrefs agree we may not need to do anything to the extent
5063          * record itself.
5064          */
5065         ret = -EAGAIN;
5066 out:
5067         while (!list_empty(&entries)) {
5068                 entry = list_entry(entries.next, struct extent_entry, list);
5069                 list_del_init(&entry->list);
5070                 free(entry);
5071         }
5072         return ret;
5073 }
5074
5075 static int process_duplicates(struct btrfs_root *root,
5076                               struct cache_tree *extent_cache,
5077                               struct extent_record *rec)
5078 {
5079         struct extent_record *good, *tmp;
5080         struct cache_extent *cache;
5081         int ret;
5082
5083         /*
5084          * If we found a extent record for this extent then return, or if we
5085          * have more than one duplicate we are likely going to need to delete
5086          * something.
5087          */
5088         if (rec->found_rec || rec->num_duplicates > 1)
5089                 return 0;
5090
5091         /* Shouldn't happen but just in case */
5092         BUG_ON(!rec->num_duplicates);
5093
5094         /*
5095          * So this happens if we end up with a backref that doesn't match the
5096          * actual extent entry.  So either the backref is bad or the extent
5097          * entry is bad.  Either way we want to have the extent_record actually
5098          * reflect what we found in the extent_tree, so we need to take the
5099          * duplicate out and use that as the extent_record since the only way we
5100          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
5101          */
5102         remove_cache_extent(extent_cache, &rec->cache);
5103
5104         good = list_entry(rec->dups.next, struct extent_record, list);
5105         list_del_init(&good->list);
5106         INIT_LIST_HEAD(&good->backrefs);
5107         INIT_LIST_HEAD(&good->dups);
5108         good->cache.start = good->start;
5109         good->cache.size = good->nr;
5110         good->content_checked = 0;
5111         good->owner_ref_checked = 0;
5112         good->num_duplicates = 0;
5113         good->refs = rec->refs;
5114         list_splice_init(&rec->backrefs, &good->backrefs);
5115         while (1) {
5116                 cache = lookup_cache_extent(extent_cache, good->start,
5117                                             good->nr);
5118                 if (!cache)
5119                         break;
5120                 tmp = container_of(cache, struct extent_record, cache);
5121
5122                 /*
5123                  * If we find another overlapping extent and it's found_rec is
5124                  * set then it's a duplicate and we need to try and delete
5125                  * something.
5126                  */
5127                 if (tmp->found_rec || tmp->num_duplicates > 0) {
5128                         if (list_empty(&good->list))
5129                                 list_add_tail(&good->list,
5130                                               &duplicate_extents);
5131                         good->num_duplicates += tmp->num_duplicates + 1;
5132                         list_splice_init(&tmp->dups, &good->dups);
5133                         list_del_init(&tmp->list);
5134                         list_add_tail(&tmp->list, &good->dups);
5135                         remove_cache_extent(extent_cache, &tmp->cache);
5136                         continue;
5137                 }
5138
5139                 /*
5140                  * Ok we have another non extent item backed extent rec, so lets
5141                  * just add it to this extent and carry on like we did above.
5142                  */
5143                 good->refs += tmp->refs;
5144                 list_splice_init(&tmp->backrefs, &good->backrefs);
5145                 remove_cache_extent(extent_cache, &tmp->cache);
5146                 free(tmp);
5147         }
5148         ret = insert_cache_extent(extent_cache, &good->cache);
5149         BUG_ON(ret);
5150         free(rec);
5151         return good->num_duplicates ? 0 : 1;
5152 }
5153
5154 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5155                                     struct btrfs_root *root,
5156                                     struct extent_record *rec)
5157 {
5158         LIST_HEAD(delete_list);
5159         struct btrfs_path *path;
5160         struct extent_record *tmp, *good, *n;
5161         int nr_del = 0;
5162         int ret = 0;
5163         struct btrfs_key key;
5164
5165         path = btrfs_alloc_path();
5166         if (!path) {
5167                 ret = -ENOMEM;
5168                 goto out;
5169         }
5170
5171         good = rec;
5172         /* Find the record that covers all of the duplicates. */
5173         list_for_each_entry(tmp, &rec->dups, list) {
5174                 if (good->start < tmp->start)
5175                         continue;
5176                 if (good->nr > tmp->nr)
5177                         continue;
5178
5179                 if (tmp->start + tmp->nr < good->start + good->nr) {
5180                         fprintf(stderr, "Ok we have overlapping extents that "
5181                                 "aren't completely covered by eachother, this "
5182                                 "is going to require more careful thought.  "
5183                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5184                                 tmp->start, tmp->nr, good->start, good->nr);
5185                         abort();
5186                 }
5187                 good = tmp;
5188         }
5189
5190         if (good != rec)
5191                 list_add_tail(&rec->list, &delete_list);
5192
5193         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5194                 if (tmp == good)
5195                         continue;
5196                 list_move_tail(&tmp->list, &delete_list);
5197         }
5198
5199         root = root->fs_info->extent_root;
5200         list_for_each_entry(tmp, &delete_list, list) {
5201                 if (tmp->found_rec == 0)
5202                         continue;
5203                 key.objectid = tmp->start;
5204                 key.type = BTRFS_EXTENT_ITEM_KEY;
5205                 key.offset = tmp->nr;
5206
5207                 /* Shouldn't happen but just in case */
5208                 if (tmp->metadata) {
5209                         fprintf(stderr, "Well this shouldn't happen, extent "
5210                                 "record overlaps but is metadata? "
5211                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5212                         abort();
5213                 }
5214
5215                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5216                 if (ret) {
5217                         if (ret > 0)
5218                                 ret = -EINVAL;
5219                         goto out;
5220                 }
5221                 ret = btrfs_del_item(trans, root, path);
5222                 if (ret)
5223                         goto out;
5224                 btrfs_release_path(path);
5225                 nr_del++;
5226         }
5227
5228 out:
5229         while (!list_empty(&delete_list)) {
5230                 tmp = list_entry(delete_list.next, struct extent_record, list);
5231                 list_del_init(&tmp->list);
5232                 if (tmp == rec)
5233                         continue;
5234                 free(tmp);
5235         }
5236
5237         while (!list_empty(&rec->dups)) {
5238                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5239                 list_del_init(&tmp->list);
5240                 free(tmp);
5241         }
5242
5243         btrfs_free_path(path);
5244
5245         if (!ret && !nr_del)
5246                 rec->num_duplicates = 0;
5247
5248         return ret ? ret : nr_del;
5249 }
5250
5251 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5252                                   struct btrfs_fs_info *info,
5253                                   struct btrfs_path *path,
5254                                   struct cache_tree *extent_cache,
5255                                   struct extent_record *rec)
5256 {
5257         struct btrfs_root *root;
5258         struct extent_backref *back;
5259         struct data_backref *dback;
5260         struct cache_extent *cache;
5261         struct btrfs_file_extent_item *fi;
5262         struct btrfs_key key;
5263         u64 bytenr, bytes;
5264         int ret;
5265
5266         list_for_each_entry(back, &rec->backrefs, list) {
5267                 dback = (struct data_backref *)back;
5268
5269                 /* We found this one, we don't need to do a lookup */
5270                 if (dback->found_ref)
5271                         continue;
5272                 /* Don't care about full backrefs (poor unloved backrefs) */
5273                 if (back->full_backref)
5274                         continue;
5275                 key.objectid = dback->root;
5276                 key.type = BTRFS_ROOT_ITEM_KEY;
5277                 key.offset = (u64)-1;
5278
5279                 root = btrfs_read_fs_root(info, &key);
5280
5281                 /* No root, definitely a bad ref, skip */
5282                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5283                         continue;
5284                 /* Other err, exit */
5285                 if (IS_ERR(root))
5286                         return PTR_ERR(root);
5287
5288                 key.objectid = dback->owner;
5289                 key.type = BTRFS_EXTENT_DATA_KEY;
5290                 key.offset = dback->offset;
5291                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5292                 if (ret) {
5293                         btrfs_release_path(path);
5294                         if (ret < 0)
5295                                 return ret;
5296                         /* Didn't find it, we can carry on */
5297                         ret = 0;
5298                         continue;
5299                 }
5300
5301                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5302                                     struct btrfs_file_extent_item);
5303                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5304                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5305                 btrfs_release_path(path);
5306                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5307                 if (cache) {
5308                         struct extent_record *tmp;
5309                         tmp = container_of(cache, struct extent_record, cache);
5310
5311                         /*
5312                          * If we found an extent record for the bytenr for this
5313                          * particular backref then we can't add it to our
5314                          * current extent record.  We only want to add backrefs
5315                          * that don't have a corresponding extent item in the
5316                          * extent tree since they likely belong to this record
5317                          * and we need to fix it if it doesn't match bytenrs.
5318                          */
5319                         if  (tmp->found_rec)
5320                                 continue;
5321                 }
5322
5323                 dback->found_ref += 1;
5324                 dback->disk_bytenr = bytenr;
5325                 dback->bytes = bytes;
5326
5327                 /*
5328                  * Set this so the verify backref code knows not to trust the
5329                  * values in this backref.
5330                  */
5331                 back->broken = 1;
5332         }
5333
5334         return 0;
5335 }
5336
5337 /*
5338  * when an incorrect extent item is found, this will delete
5339  * all of the existing entries for it and recreate them
5340  * based on what the tree scan found.
5341  */
5342 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5343                              struct btrfs_fs_info *info,
5344                              struct cache_tree *extent_cache,
5345                              struct extent_record *rec)
5346 {
5347         int ret;
5348         struct btrfs_path *path;
5349         struct list_head *cur = rec->backrefs.next;
5350         struct cache_extent *cache;
5351         struct extent_backref *back;
5352         int allocated = 0;
5353         u64 flags = 0;
5354
5355         /*
5356          * remember our flags for recreating the extent.
5357          * FIXME, if we have cleared extent tree, we can not
5358          * lookup extent info in extent tree.
5359          */
5360         if (!init_extent_tree) {
5361                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5362                                         rec->start, rec->max_size,
5363                                         rec->metadata, NULL, &flags);
5364                 if (ret < 0)
5365                         flags = 0;
5366         } else {
5367                 flags = 0;
5368         }
5369
5370         path = btrfs_alloc_path();
5371         if (!path)
5372                 return -ENOMEM;
5373
5374         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5375                 /*
5376                  * Sometimes the backrefs themselves are so broken they don't
5377                  * get attached to any meaningful rec, so first go back and
5378                  * check any of our backrefs that we couldn't find and throw
5379                  * them into the list if we find the backref so that
5380                  * verify_backrefs can figure out what to do.
5381                  */
5382                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5383                                              rec);
5384                 if (ret < 0)
5385                         goto out;
5386         }
5387
5388         /* step one, make sure all of the backrefs agree */
5389         ret = verify_backrefs(trans, info, path, rec);
5390         if (ret < 0)
5391                 goto out;
5392
5393         /* step two, delete all the existing records */
5394         ret = delete_extent_records(trans, info->extent_root, path,
5395                                     rec->start, rec->max_size);
5396
5397         if (ret < 0)
5398                 goto out;
5399
5400         /* was this block corrupt?  If so, don't add references to it */
5401         cache = lookup_cache_extent(info->corrupt_blocks,
5402                                     rec->start, rec->max_size);
5403         if (cache) {
5404                 ret = 0;
5405                 goto out;
5406         }
5407
5408         /* step three, recreate all the refs we did find */
5409         while(cur != &rec->backrefs) {
5410                 back = list_entry(cur, struct extent_backref, list);
5411                 cur = cur->next;
5412
5413                 /*
5414                  * if we didn't find any references, don't create a
5415                  * new extent record
5416                  */
5417                 if (!back->found_ref)
5418                         continue;
5419
5420                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5421                 allocated = 1;
5422
5423                 if (ret)
5424                         goto out;
5425         }
5426 out:
5427         btrfs_free_path(path);
5428         return ret;
5429 }
5430
5431 /* right now we only prune from the extent allocation tree */
5432 static int prune_one_block(struct btrfs_trans_handle *trans,
5433                            struct btrfs_fs_info *info,
5434                            struct btrfs_corrupt_block *corrupt)
5435 {
5436         int ret;
5437         struct btrfs_path path;
5438         struct extent_buffer *eb;
5439         u64 found;
5440         int slot;
5441         int nritems;
5442         int level = corrupt->level + 1;
5443
5444         btrfs_init_path(&path);
5445 again:
5446         /* we want to stop at the parent to our busted block */
5447         path.lowest_level = level;
5448
5449         ret = btrfs_search_slot(trans, info->extent_root,
5450                                 &corrupt->key, &path, -1, 1);
5451
5452         if (ret < 0)
5453                 goto out;
5454
5455         eb = path.nodes[level];
5456         if (!eb) {
5457                 ret = -ENOENT;
5458                 goto out;
5459         }
5460
5461         /*
5462          * hopefully the search gave us the block we want to prune,
5463          * lets try that first
5464          */
5465         slot = path.slots[level];
5466         found =  btrfs_node_blockptr(eb, slot);
5467         if (found == corrupt->cache.start)
5468                 goto del_ptr;
5469
5470         nritems = btrfs_header_nritems(eb);
5471
5472         /* the search failed, lets scan this node and hope we find it */
5473         for (slot = 0; slot < nritems; slot++) {
5474                 found =  btrfs_node_blockptr(eb, slot);
5475                 if (found == corrupt->cache.start)
5476                         goto del_ptr;
5477         }
5478         /*
5479          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5480          * to this block
5481          */
5482         if (eb == info->extent_root->node) {
5483                 ret = -ENOENT;
5484                 goto out;
5485         } else {
5486                 level++;
5487                 btrfs_release_path(&path);
5488                 goto again;
5489         }
5490
5491 del_ptr:
5492         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5493         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5494
5495 out:
5496         btrfs_release_path(&path);
5497         return ret;
5498 }
5499
5500 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5501                                 struct btrfs_fs_info *info)
5502 {
5503         struct cache_extent *cache;
5504         struct btrfs_corrupt_block *corrupt;
5505
5506         cache = search_cache_extent(info->corrupt_blocks, 0);
5507         while (1) {
5508                 if (!cache)
5509                         break;
5510                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5511                 prune_one_block(trans, info, corrupt);
5512                 cache = next_cache_extent(cache);
5513         }
5514         return 0;
5515 }
5516
5517 static void free_corrupt_block(struct cache_extent *cache)
5518 {
5519         struct btrfs_corrupt_block *corrupt;
5520
5521         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5522         free(corrupt);
5523 }
5524
5525 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5526
5527 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5528 {
5529         struct btrfs_block_group_cache *cache;
5530         u64 start, end;
5531         int ret;
5532
5533         while (1) {
5534                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5535                                             &start, &end, EXTENT_DIRTY);
5536                 if (ret)
5537                         break;
5538                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5539                                    GFP_NOFS);
5540         }
5541
5542         start = 0;
5543         while (1) {
5544                 cache = btrfs_lookup_first_block_group(fs_info, start);
5545                 if (!cache)
5546                         break;
5547                 if (cache->cached)
5548                         cache->cached = 0;
5549                 start = cache->key.objectid + cache->key.offset;
5550         }
5551 }
5552
5553 static int check_extent_refs(struct btrfs_trans_handle *trans,
5554                              struct btrfs_root *root,
5555                              struct cache_tree *extent_cache)
5556 {
5557         struct extent_record *rec;
5558         struct cache_extent *cache;
5559         int err = 0;
5560         int ret = 0;
5561         int fixed = 0;
5562         int had_dups = 0;
5563
5564         if (repair) {
5565                 /*
5566                  * if we're doing a repair, we have to make sure
5567                  * we don't allocate from the problem extents.
5568                  * In the worst case, this will be all the
5569                  * extents in the FS
5570                  */
5571                 cache = search_cache_extent(extent_cache, 0);
5572                 while(cache) {
5573                         rec = container_of(cache, struct extent_record, cache);
5574                         btrfs_pin_extent(root->fs_info,
5575                                          rec->start, rec->max_size);
5576                         cache = next_cache_extent(cache);
5577                 }
5578
5579                 /* pin down all the corrupted blocks too */
5580                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5581                 while(cache) {
5582                         btrfs_pin_extent(root->fs_info,
5583                                          cache->start, cache->size);
5584                         cache = next_cache_extent(cache);
5585                 }
5586                 prune_corrupt_blocks(trans, root->fs_info);
5587                 reset_cached_block_groups(root->fs_info);
5588         }
5589
5590         /*
5591          * We need to delete any duplicate entries we find first otherwise we
5592          * could mess up the extent tree when we have backrefs that actually
5593          * belong to a different extent item and not the weird duplicate one.
5594          */
5595         while (repair && !list_empty(&duplicate_extents)) {
5596                 rec = list_entry(duplicate_extents.next, struct extent_record,
5597                                  list);
5598                 list_del_init(&rec->list);
5599
5600                 /* Sometimes we can find a backref before we find an actual
5601                  * extent, so we need to process it a little bit to see if there
5602                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5603                  * if this is a backref screwup.  If we need to delete stuff
5604                  * process_duplicates() will return 0, otherwise it will return
5605                  * 1 and we
5606                  */
5607                 if (process_duplicates(root, extent_cache, rec))
5608                         continue;
5609                 ret = delete_duplicate_records(trans, root, rec);
5610                 if (ret < 0)
5611                         return ret;
5612                 /*
5613                  * delete_duplicate_records will return the number of entries
5614                  * deleted, so if it's greater than 0 then we know we actually
5615                  * did something and we need to remove.
5616                  */
5617                 if (ret)
5618                         had_dups = 1;
5619         }
5620
5621         if (had_dups)
5622                 return -EAGAIN;
5623
5624         while(1) {
5625                 fixed = 0;
5626                 cache = search_cache_extent(extent_cache, 0);
5627                 if (!cache)
5628                         break;
5629                 rec = container_of(cache, struct extent_record, cache);
5630                 if (rec->num_duplicates) {
5631                         fprintf(stderr, "extent item %llu has multiple extent "
5632                                 "items\n", (unsigned long long)rec->start);
5633                         err = 1;
5634                 }
5635
5636                 if (rec->refs != rec->extent_item_refs) {
5637                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5638                                 (unsigned long long)rec->start,
5639                                 (unsigned long long)rec->nr);
5640                         fprintf(stderr, "extent item %llu, found %llu\n",
5641                                 (unsigned long long)rec->extent_item_refs,
5642                                 (unsigned long long)rec->refs);
5643                         if (!fixed && repair) {
5644                                 ret = fixup_extent_refs(trans, root->fs_info,
5645                                                         extent_cache, rec);
5646                                 if (ret)
5647                                         goto repair_abort;
5648                                 fixed = 1;
5649                         }
5650                         err = 1;
5651
5652                 }
5653                 if (all_backpointers_checked(rec, 1)) {
5654                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5655                                 (unsigned long long)rec->start,
5656                                 (unsigned long long)rec->nr);
5657
5658                         if (!fixed && repair) {
5659                                 ret = fixup_extent_refs(trans, root->fs_info,
5660                                                         extent_cache, rec);
5661                                 if (ret)
5662                                         goto repair_abort;
5663                                 fixed = 1;
5664                         }
5665
5666                         err = 1;
5667                 }
5668                 if (!rec->owner_ref_checked) {
5669                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5670                                 (unsigned long long)rec->start,
5671                                 (unsigned long long)rec->nr);
5672                         if (!fixed && repair) {
5673                                 ret = fixup_extent_refs(trans, root->fs_info,
5674                                                         extent_cache, rec);
5675                                 if (ret)
5676                                         goto repair_abort;
5677                                 fixed = 1;
5678                         }
5679                         err = 1;
5680                 }
5681
5682                 remove_cache_extent(extent_cache, cache);
5683                 free_all_extent_backrefs(rec);
5684                 free(rec);
5685         }
5686 repair_abort:
5687         if (repair) {
5688                 if (ret && ret != -EAGAIN) {
5689                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5690                         exit(1);
5691                 } else if (!ret) {
5692                         btrfs_fix_block_accounting(trans, root);
5693                 }
5694                 if (err)
5695                         fprintf(stderr, "repaired damaged extent references\n");
5696                 return ret;
5697         }
5698         return err;
5699 }
5700
5701 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5702 {
5703         u64 stripe_size;
5704
5705         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5706                 stripe_size = length;
5707                 stripe_size /= num_stripes;
5708         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5709                 stripe_size = length * 2;
5710                 stripe_size /= num_stripes;
5711         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5712                 stripe_size = length;
5713                 stripe_size /= (num_stripes - 1);
5714         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5715                 stripe_size = length;
5716                 stripe_size /= (num_stripes - 2);
5717         } else {
5718                 stripe_size = length;
5719         }
5720         return stripe_size;
5721 }
5722
5723 static int check_chunk_refs(struct chunk_record *chunk_rec,
5724                             struct block_group_tree *block_group_cache,
5725                             struct device_extent_tree *dev_extent_cache,
5726                             int silent)
5727 {
5728         struct cache_extent *block_group_item;
5729         struct block_group_record *block_group_rec;
5730         struct cache_extent *dev_extent_item;
5731         struct device_extent_record *dev_extent_rec;
5732         u64 devid;
5733         u64 offset;
5734         u64 length;
5735         int i;
5736         int ret = 0;
5737
5738         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5739                                                chunk_rec->offset,
5740                                                chunk_rec->length);
5741         if (block_group_item) {
5742                 block_group_rec = container_of(block_group_item,
5743                                                struct block_group_record,
5744                                                cache);
5745                 if (chunk_rec->length != block_group_rec->offset ||
5746                     chunk_rec->offset != block_group_rec->objectid ||
5747                     chunk_rec->type_flags != block_group_rec->flags) {
5748                         if (!silent)
5749                                 fprintf(stderr,
5750                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5751                                         chunk_rec->objectid,
5752                                         chunk_rec->type,
5753                                         chunk_rec->offset,
5754                                         chunk_rec->length,
5755                                         chunk_rec->offset,
5756                                         chunk_rec->type_flags,
5757                                         block_group_rec->objectid,
5758                                         block_group_rec->type,
5759                                         block_group_rec->offset,
5760                                         block_group_rec->offset,
5761                                         block_group_rec->objectid,
5762                                         block_group_rec->flags);
5763                         ret = -1;
5764                 } else {
5765                         list_del_init(&block_group_rec->list);
5766                         chunk_rec->bg_rec = block_group_rec;
5767                 }
5768         } else {
5769                 if (!silent)
5770                         fprintf(stderr,
5771                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5772                                 chunk_rec->objectid,
5773                                 chunk_rec->type,
5774                                 chunk_rec->offset,
5775                                 chunk_rec->length,
5776                                 chunk_rec->offset,
5777                                 chunk_rec->type_flags);
5778                 ret = -1;
5779         }
5780
5781         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5782                                     chunk_rec->num_stripes);
5783         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5784                 devid = chunk_rec->stripes[i].devid;
5785                 offset = chunk_rec->stripes[i].offset;
5786                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5787                                                        devid, offset, length);
5788                 if (dev_extent_item) {
5789                         dev_extent_rec = container_of(dev_extent_item,
5790                                                 struct device_extent_record,
5791                                                 cache);
5792                         if (dev_extent_rec->objectid != devid ||
5793                             dev_extent_rec->offset != offset ||
5794                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5795                             dev_extent_rec->length != length) {
5796                                 if (!silent)
5797                                         fprintf(stderr,
5798                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5799                                                 chunk_rec->objectid,
5800                                                 chunk_rec->type,
5801                                                 chunk_rec->offset,
5802                                                 chunk_rec->stripes[i].devid,
5803                                                 chunk_rec->stripes[i].offset,
5804                                                 dev_extent_rec->objectid,
5805                                                 dev_extent_rec->offset,
5806                                                 dev_extent_rec->length);
5807                                 ret = -1;
5808                         } else {
5809                                 list_move(&dev_extent_rec->chunk_list,
5810                                           &chunk_rec->dextents);
5811                         }
5812                 } else {
5813                         if (!silent)
5814                                 fprintf(stderr,
5815                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5816                                         chunk_rec->objectid,
5817                                         chunk_rec->type,
5818                                         chunk_rec->offset,
5819                                         chunk_rec->stripes[i].devid,
5820                                         chunk_rec->stripes[i].offset);
5821                         ret = -1;
5822                 }
5823         }
5824         return ret;
5825 }
5826
5827 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5828 int check_chunks(struct cache_tree *chunk_cache,
5829                  struct block_group_tree *block_group_cache,
5830                  struct device_extent_tree *dev_extent_cache,
5831                  struct list_head *good, struct list_head *bad, int silent)
5832 {
5833         struct cache_extent *chunk_item;
5834         struct chunk_record *chunk_rec;
5835         struct block_group_record *bg_rec;
5836         struct device_extent_record *dext_rec;
5837         int err;
5838         int ret = 0;
5839
5840         chunk_item = first_cache_extent(chunk_cache);
5841         while (chunk_item) {
5842                 chunk_rec = container_of(chunk_item, struct chunk_record,
5843                                          cache);
5844                 err = check_chunk_refs(chunk_rec, block_group_cache,
5845                                        dev_extent_cache, silent);
5846                 if (err) {
5847                         ret = err;
5848                         if (bad)
5849                                 list_add_tail(&chunk_rec->list, bad);
5850                 } else {
5851                         if (good)
5852                                 list_add_tail(&chunk_rec->list, good);
5853                 }
5854
5855                 chunk_item = next_cache_extent(chunk_item);
5856         }
5857
5858         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5859                 if (!silent)
5860                         fprintf(stderr,
5861                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5862                                 bg_rec->objectid,
5863                                 bg_rec->offset,
5864                                 bg_rec->flags);
5865                 if (!ret)
5866                         ret = 1;
5867         }
5868
5869         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5870                             chunk_list) {
5871                 if (!silent)
5872                         fprintf(stderr,
5873                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5874                                 dext_rec->objectid,
5875                                 dext_rec->offset,
5876                                 dext_rec->length);
5877                 if (!ret)
5878                         ret = 1;
5879         }
5880         return ret;
5881 }
5882
5883
5884 static int check_device_used(struct device_record *dev_rec,
5885                              struct device_extent_tree *dext_cache)
5886 {
5887         struct cache_extent *cache;
5888         struct device_extent_record *dev_extent_rec;
5889         u64 total_byte = 0;
5890
5891         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5892         while (cache) {
5893                 dev_extent_rec = container_of(cache,
5894                                               struct device_extent_record,
5895                                               cache);
5896                 if (dev_extent_rec->objectid != dev_rec->devid)
5897                         break;
5898
5899                 list_del(&dev_extent_rec->device_list);
5900                 total_byte += dev_extent_rec->length;
5901                 cache = next_cache_extent(cache);
5902         }
5903
5904         if (total_byte != dev_rec->byte_used) {
5905                 fprintf(stderr,
5906                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5907                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5908                         dev_rec->type, dev_rec->offset);
5909                 return -1;
5910         } else {
5911                 return 0;
5912         }
5913 }
5914
5915 /* check btrfs_dev_item -> btrfs_dev_extent */
5916 static int check_devices(struct rb_root *dev_cache,
5917                          struct device_extent_tree *dev_extent_cache)
5918 {
5919         struct rb_node *dev_node;
5920         struct device_record *dev_rec;
5921         struct device_extent_record *dext_rec;
5922         int err;
5923         int ret = 0;
5924
5925         dev_node = rb_first(dev_cache);
5926         while (dev_node) {
5927                 dev_rec = container_of(dev_node, struct device_record, node);
5928                 err = check_device_used(dev_rec, dev_extent_cache);
5929                 if (err)
5930                         ret = err;
5931
5932                 dev_node = rb_next(dev_node);
5933         }
5934         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5935                             device_list) {
5936                 fprintf(stderr,
5937                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5938                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5939                 if (!ret)
5940                         ret = 1;
5941         }
5942         return ret;
5943 }
5944
5945 static int check_chunks_and_extents(struct btrfs_root *root)
5946 {
5947         struct rb_root dev_cache;
5948         struct cache_tree chunk_cache;
5949         struct block_group_tree block_group_cache;
5950         struct device_extent_tree dev_extent_cache;
5951         struct cache_tree extent_cache;
5952         struct cache_tree seen;
5953         struct cache_tree pending;
5954         struct cache_tree reada;
5955         struct cache_tree nodes;
5956         struct cache_tree corrupt_blocks;
5957         struct btrfs_path path;
5958         struct btrfs_key key;
5959         struct btrfs_key found_key;
5960         int ret, err = 0;
5961         u64 last = 0;
5962         struct block_info *bits;
5963         int bits_nr;
5964         struct extent_buffer *leaf;
5965         struct btrfs_trans_handle *trans = NULL;
5966         int slot;
5967         struct btrfs_root_item ri;
5968         struct list_head dropping_trees;
5969
5970         dev_cache = RB_ROOT;
5971         cache_tree_init(&chunk_cache);
5972         block_group_tree_init(&block_group_cache);
5973         device_extent_tree_init(&dev_extent_cache);
5974
5975         cache_tree_init(&extent_cache);
5976         cache_tree_init(&seen);
5977         cache_tree_init(&pending);
5978         cache_tree_init(&nodes);
5979         cache_tree_init(&reada);
5980         cache_tree_init(&corrupt_blocks);
5981         INIT_LIST_HEAD(&dropping_trees);
5982
5983         if (repair) {
5984                 trans = btrfs_start_transaction(root, 1);
5985                 if (IS_ERR(trans)) {
5986                         fprintf(stderr, "Error starting transaction\n");
5987                         return PTR_ERR(trans);
5988                 }
5989                 root->fs_info->fsck_extent_cache = &extent_cache;
5990                 root->fs_info->free_extent_hook = free_extent_hook;
5991                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5992         }
5993
5994         bits_nr = 1024;
5995         bits = malloc(bits_nr * sizeof(struct block_info));
5996         if (!bits) {
5997                 perror("malloc");
5998                 exit(1);
5999         }
6000
6001 again:
6002         add_root_to_pending(root->fs_info->tree_root->node,
6003                             &extent_cache, &pending, &seen, &nodes,
6004                             &root->fs_info->tree_root->root_key);
6005
6006         add_root_to_pending(root->fs_info->chunk_root->node,
6007                             &extent_cache, &pending, &seen, &nodes,
6008                             &root->fs_info->chunk_root->root_key);
6009
6010         btrfs_init_path(&path);
6011         key.offset = 0;
6012         key.objectid = 0;
6013         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
6014         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
6015                                         &key, &path, 0, 0);
6016         if (ret < 0)
6017                 goto out;
6018         while(1) {
6019                 leaf = path.nodes[0];
6020                 slot = path.slots[0];
6021                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
6022                         ret = btrfs_next_leaf(root, &path);
6023                         if (ret != 0)
6024                                 break;
6025                         leaf = path.nodes[0];
6026                         slot = path.slots[0];
6027                 }
6028                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
6029                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
6030                         unsigned long offset;
6031                         struct extent_buffer *buf;
6032
6033                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6034                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
6035                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
6036                                 buf = read_tree_block(root->fs_info->tree_root,
6037                                                       btrfs_root_bytenr(&ri),
6038                                                       btrfs_level_size(root,
6039                                                       btrfs_root_level(&ri)),
6040                                                       0);
6041                                 if (!buf) {
6042                                         ret = -EIO;
6043                                         goto out;
6044                                 }
6045                                 add_root_to_pending(buf, &extent_cache,
6046                                                     &pending, &seen, &nodes,
6047                                                     &found_key);
6048                                 free_extent_buffer(buf);
6049                         } else {
6050                                 struct dropping_root_item_record *dri_rec;
6051                                 dri_rec = malloc(sizeof(*dri_rec));
6052                                 if (!dri_rec) {
6053                                         perror("malloc");
6054                                         exit(1);
6055                                 }
6056                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
6057                                 memcpy(&dri_rec->found_key, &found_key,
6058                                        sizeof(found_key));
6059                                 list_add_tail(&dri_rec->list, &dropping_trees);
6060                         }
6061                 }
6062                 path.slots[0]++;
6063         }
6064         btrfs_release_path(&path);
6065         while (1) {
6066                 ret = run_next_block(trans, root, bits, bits_nr, &last,
6067                                      &pending, &seen, &reada, &nodes,
6068                                      &extent_cache, &chunk_cache, &dev_cache,
6069                                      &block_group_cache, &dev_extent_cache,
6070                                      NULL);
6071                 if (ret != 0)
6072                         break;
6073         }
6074
6075         while (!list_empty(&dropping_trees)) {
6076                 struct dropping_root_item_record *rec;
6077                 struct extent_buffer *buf;
6078                 rec = list_entry(dropping_trees.next,
6079                                  struct dropping_root_item_record, list);
6080                 last = 0;
6081                 if (!bits) {
6082                         perror("realloc");
6083                         exit(1);
6084                 }
6085                 buf = read_tree_block(root->fs_info->tree_root,
6086                                       btrfs_root_bytenr(&rec->ri),
6087                                       btrfs_level_size(root,
6088                                       btrfs_root_level(&rec->ri)), 0);
6089                 if (!buf) {
6090                         ret = -EIO;
6091                         goto out;
6092                 }
6093                 add_root_to_pending(buf, &extent_cache, &pending,
6094                                     &seen, &nodes, &rec->found_key);
6095                 while (1) {
6096                         ret = run_next_block(trans, root, bits, bits_nr, &last,
6097                                              &pending, &seen, &reada,
6098                                              &nodes, &extent_cache,
6099                                              &chunk_cache, &dev_cache,
6100                                              &block_group_cache,
6101                                              &dev_extent_cache,
6102                                              &rec->ri);
6103                         if (ret != 0)
6104                                 break;
6105                 }
6106                 free_extent_buffer(buf);
6107                 list_del(&rec->list);
6108                 free(rec);
6109         }
6110
6111         if (ret >= 0)
6112                 ret = check_extent_refs(trans, root, &extent_cache);
6113         if (ret == -EAGAIN) {
6114                 ret = btrfs_commit_transaction(trans, root);
6115                 if (ret)
6116                         goto out;
6117
6118                 trans = btrfs_start_transaction(root, 1);
6119                 if (IS_ERR(trans)) {
6120                         ret = PTR_ERR(trans);
6121                         goto out;
6122                 }
6123
6124                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6125                 free_extent_cache_tree(&seen);
6126                 free_extent_cache_tree(&pending);
6127                 free_extent_cache_tree(&reada);
6128                 free_extent_cache_tree(&nodes);
6129                 free_extent_record_cache(root->fs_info, &extent_cache);
6130                 goto again;
6131         }
6132
6133         err = check_chunks(&chunk_cache, &block_group_cache,
6134                            &dev_extent_cache, NULL, NULL, 0);
6135         if (err && !ret)
6136                 ret = err;
6137
6138         err = check_devices(&dev_cache, &dev_extent_cache);
6139         if (err && !ret)
6140                 ret = err;
6141
6142 out:
6143         if (trans) {
6144                 err = btrfs_commit_transaction(trans, root);
6145                 if (!ret)
6146                         ret = err;
6147         }
6148         if (repair) {
6149                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6150                 root->fs_info->fsck_extent_cache = NULL;
6151                 root->fs_info->free_extent_hook = NULL;
6152                 root->fs_info->corrupt_blocks = NULL;
6153         }
6154         free(bits);
6155         free_chunk_cache_tree(&chunk_cache);
6156         free_device_cache_tree(&dev_cache);
6157         free_block_group_tree(&block_group_cache);
6158         free_device_extent_tree(&dev_extent_cache);
6159         free_extent_cache_tree(&seen);
6160         free_extent_cache_tree(&pending);
6161         free_extent_cache_tree(&reada);
6162         free_extent_cache_tree(&nodes);
6163         return ret;
6164 }
6165
6166 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6167                            struct btrfs_root *root, int overwrite)
6168 {
6169         struct extent_buffer *c;
6170         struct extent_buffer *old = root->node;
6171         int level;
6172         int ret;
6173         struct btrfs_disk_key disk_key = {0,0,0};
6174
6175         level = 0;
6176
6177         if (overwrite) {
6178                 c = old;
6179                 extent_buffer_get(c);
6180                 goto init;
6181         }
6182         c = btrfs_alloc_free_block(trans, root,
6183                                    btrfs_level_size(root, 0),
6184                                    root->root_key.objectid,
6185                                    &disk_key, level, 0, 0);
6186         if (IS_ERR(c)) {
6187                 c = old;
6188                 extent_buffer_get(c);
6189                 overwrite = 1;
6190         }
6191 init:
6192         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6193         btrfs_set_header_level(c, level);
6194         btrfs_set_header_bytenr(c, c->start);
6195         btrfs_set_header_generation(c, trans->transid);
6196         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6197         btrfs_set_header_owner(c, root->root_key.objectid);
6198
6199         write_extent_buffer(c, root->fs_info->fsid,
6200                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6201
6202         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6203                             btrfs_header_chunk_tree_uuid(c),
6204                             BTRFS_UUID_SIZE);
6205
6206         btrfs_mark_buffer_dirty(c);
6207         /*
6208          * this case can happen in the following case:
6209          *
6210          * 1.overwrite previous root.
6211          *
6212          * 2.reinit reloc data root, this is because we skip pin
6213          * down reloc data tree before which means we can allocate
6214          * same block bytenr here.
6215          */
6216         if (old->start == c->start) {
6217                 btrfs_set_root_generation(&root->root_item,
6218                                           trans->transid);
6219                 root->root_item.level = btrfs_header_level(root->node);
6220                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6221                                         &root->root_key, &root->root_item);
6222                 if (ret) {
6223                         free_extent_buffer(c);
6224                         return ret;
6225                 }
6226         }
6227         free_extent_buffer(old);
6228         root->node = c;
6229         add_root_to_dirty_list(root);
6230         return 0;
6231 }
6232
6233 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6234                                 struct extent_buffer *eb, int tree_root)
6235 {
6236         struct extent_buffer *tmp;
6237         struct btrfs_root_item *ri;
6238         struct btrfs_key key;
6239         u64 bytenr;
6240         u32 leafsize;
6241         int level = btrfs_header_level(eb);
6242         int nritems;
6243         int ret;
6244         int i;
6245
6246         /*
6247          * If we have pinned this block before, don't pin it again.
6248          * This can not only avoid forever loop with broken filesystem
6249          * but also give us some speedups.
6250          */
6251         if (test_range_bit(&fs_info->pinned_extents, eb->start,
6252                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
6253                 return 0;
6254
6255         btrfs_pin_extent(fs_info, eb->start, eb->len);
6256
6257         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6258         nritems = btrfs_header_nritems(eb);
6259         for (i = 0; i < nritems; i++) {
6260                 if (level == 0) {
6261                         btrfs_item_key_to_cpu(eb, &key, i);
6262                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6263                                 continue;
6264                         /* Skip the extent root and reloc roots */
6265                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6266                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6267                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6268                                 continue;
6269                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6270                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6271
6272                         /*
6273                          * If at any point we start needing the real root we
6274                          * will have to build a stump root for the root we are
6275                          * in, but for now this doesn't actually use the root so
6276                          * just pass in extent_root.
6277                          */
6278                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6279                                               leafsize, 0);
6280                         if (!tmp) {
6281                                 fprintf(stderr, "Error reading root block\n");
6282                                 return -EIO;
6283                         }
6284                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6285                         free_extent_buffer(tmp);
6286                         if (ret)
6287                                 return ret;
6288                 } else {
6289                         bytenr = btrfs_node_blockptr(eb, i);
6290
6291                         /* If we aren't the tree root don't read the block */
6292                         if (level == 1 && !tree_root) {
6293                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6294                                 continue;
6295                         }
6296
6297                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6298                                               leafsize, 0);
6299                         if (!tmp) {
6300                                 fprintf(stderr, "Error reading tree block\n");
6301                                 return -EIO;
6302                         }
6303                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6304                         free_extent_buffer(tmp);
6305                         if (ret)
6306                                 return ret;
6307                 }
6308         }
6309
6310         return 0;
6311 }
6312
6313 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6314 {
6315         int ret;
6316
6317         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6318         if (ret)
6319                 return ret;
6320
6321         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6322 }
6323
6324 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6325 {
6326         struct btrfs_block_group_cache *cache;
6327         struct btrfs_path *path;
6328         struct extent_buffer *leaf;
6329         struct btrfs_chunk *chunk;
6330         struct btrfs_key key;
6331         int ret;
6332         u64 start;
6333
6334         path = btrfs_alloc_path();
6335         if (!path)
6336                 return -ENOMEM;
6337
6338         key.objectid = 0;
6339         key.type = BTRFS_CHUNK_ITEM_KEY;
6340         key.offset = 0;
6341
6342         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6343         if (ret < 0) {
6344                 btrfs_free_path(path);
6345                 return ret;
6346         }
6347
6348         /*
6349          * We do this in case the block groups were screwed up and had alloc
6350          * bits that aren't actually set on the chunks.  This happens with
6351          * restored images every time and could happen in real life I guess.
6352          */
6353         fs_info->avail_data_alloc_bits = 0;
6354         fs_info->avail_metadata_alloc_bits = 0;
6355         fs_info->avail_system_alloc_bits = 0;
6356
6357         /* First we need to create the in-memory block groups */
6358         while (1) {
6359                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6360                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6361                         if (ret < 0) {
6362                                 btrfs_free_path(path);
6363                                 return ret;
6364                         }
6365                         if (ret) {
6366                                 ret = 0;
6367                                 break;
6368                         }
6369                 }
6370                 leaf = path->nodes[0];
6371                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6372                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6373                         path->slots[0]++;
6374                         continue;
6375                 }
6376
6377                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6378                                        struct btrfs_chunk);
6379                 btrfs_add_block_group(fs_info, 0,
6380                                       btrfs_chunk_type(leaf, chunk),
6381                                       key.objectid, key.offset,
6382                                       btrfs_chunk_length(leaf, chunk));
6383                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6384                                  key.offset + btrfs_chunk_length(leaf, chunk),
6385                                  GFP_NOFS);
6386                 path->slots[0]++;
6387         }
6388         start = 0;
6389         while (1) {
6390                 cache = btrfs_lookup_first_block_group(fs_info, start);
6391                 if (!cache)
6392                         break;
6393                 cache->cached = 1;
6394                 start = cache->key.objectid + cache->key.offset;
6395         }
6396
6397         btrfs_free_path(path);
6398         return 0;
6399 }
6400
6401 static int reset_balance(struct btrfs_trans_handle *trans,
6402                          struct btrfs_fs_info *fs_info)
6403 {
6404         struct btrfs_root *root = fs_info->tree_root;
6405         struct btrfs_path *path;
6406         struct extent_buffer *leaf;
6407         struct btrfs_key key;
6408         int del_slot, del_nr = 0;
6409         int ret;
6410         int found = 0;
6411
6412         path = btrfs_alloc_path();
6413         if (!path)
6414                 return -ENOMEM;
6415
6416         key.objectid = BTRFS_BALANCE_OBJECTID;
6417         key.type = BTRFS_BALANCE_ITEM_KEY;
6418         key.offset = 0;
6419
6420         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6421         if (ret) {
6422                 if (ret > 0)
6423                         ret = 0;
6424                 if (!ret)
6425                         goto reinit_data_reloc;
6426                 else
6427                         goto out;
6428         }
6429
6430         ret = btrfs_del_item(trans, root, path);
6431         if (ret)
6432                 goto out;
6433         btrfs_release_path(path);
6434
6435         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6436         key.type = BTRFS_ROOT_ITEM_KEY;
6437         key.offset = 0;
6438
6439         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6440         if (ret < 0)
6441                 goto out;
6442         while (1) {
6443                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6444                         if (!found)
6445                                 break;
6446
6447                         if (del_nr) {
6448                                 ret = btrfs_del_items(trans, root, path,
6449                                                       del_slot, del_nr);
6450                                 del_nr = 0;
6451                                 if (ret)
6452                                         goto out;
6453                         }
6454                         key.offset++;
6455                         btrfs_release_path(path);
6456
6457                         found = 0;
6458                         ret = btrfs_search_slot(trans, root, &key, path,
6459                                                 -1, 1);
6460                         if (ret < 0)
6461                                 goto out;
6462                         continue;
6463                 }
6464                 found = 1;
6465                 leaf = path->nodes[0];
6466                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6467                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6468                         break;
6469                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6470                         path->slots[0]++;
6471                         continue;
6472                 }
6473                 if (!del_nr) {
6474                         del_slot = path->slots[0];
6475                         del_nr = 1;
6476                 } else {
6477                         del_nr++;
6478                 }
6479                 path->slots[0]++;
6480         }
6481
6482         if (del_nr) {
6483                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6484                 if (ret)
6485                         goto out;
6486         }
6487         btrfs_release_path(path);
6488
6489 reinit_data_reloc:
6490         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6491         key.type = BTRFS_ROOT_ITEM_KEY;
6492         key.offset = (u64)-1;
6493         root = btrfs_read_fs_root(fs_info, &key);
6494         if (IS_ERR(root)) {
6495                 fprintf(stderr, "Error reading data reloc tree\n");
6496                 return PTR_ERR(root);
6497         }
6498         record_root_in_trans(trans, root);
6499         ret = btrfs_fsck_reinit_root(trans, root, 0);
6500         if (ret)
6501                 goto out;
6502         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6503 out:
6504         btrfs_free_path(path);
6505         return ret;
6506 }
6507
6508 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6509                               struct btrfs_fs_info *fs_info)
6510 {
6511         u64 start = 0;
6512         int ret;
6513
6514         /*
6515          * The only reason we don't do this is because right now we're just
6516          * walking the trees we find and pinning down their bytes, we don't look
6517          * at any of the leaves.  In order to do mixed groups we'd have to check
6518          * the leaves of any fs roots and pin down the bytes for any file
6519          * extents we find.  Not hard but why do it if we don't have to?
6520          */
6521         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6522                 fprintf(stderr, "We don't support re-initing the extent tree "
6523                         "for mixed block groups yet, please notify a btrfs "
6524                         "developer you want to do this so they can add this "
6525                         "functionality.\n");
6526                 return -EINVAL;
6527         }
6528
6529         /*
6530          * first we need to walk all of the trees except the extent tree and pin
6531          * down the bytes that are in use so we don't overwrite any existing
6532          * metadata.
6533          */
6534         ret = pin_metadata_blocks(fs_info);
6535         if (ret) {
6536                 fprintf(stderr, "error pinning down used bytes\n");
6537                 return ret;
6538         }
6539
6540         /*
6541          * Need to drop all the block groups since we're going to recreate all
6542          * of them again.
6543          */
6544         btrfs_free_block_groups(fs_info);
6545         ret = reset_block_groups(fs_info);
6546         if (ret) {
6547                 fprintf(stderr, "error resetting the block groups\n");
6548                 return ret;
6549         }
6550
6551         /* Ok we can allocate now, reinit the extent root */
6552         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6553         if (ret) {
6554                 fprintf(stderr, "extent root initialization failed\n");
6555                 /*
6556                  * When the transaction code is updated we should end the
6557                  * transaction, but for now progs only knows about commit so
6558                  * just return an error.
6559                  */
6560                 return ret;
6561         }
6562
6563         /*
6564          * Now we have all the in-memory block groups setup so we can make
6565          * allocations properly, and the metadata we care about is safe since we
6566          * pinned all of it above.
6567          */
6568         while (1) {
6569                 struct btrfs_block_group_cache *cache;
6570
6571                 cache = btrfs_lookup_first_block_group(fs_info, start);
6572                 if (!cache)
6573                         break;
6574                 start = cache->key.objectid + cache->key.offset;
6575                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6576                                         &cache->key, &cache->item,
6577                                         sizeof(cache->item));
6578                 if (ret) {
6579                         fprintf(stderr, "Error adding block group\n");
6580                         return ret;
6581                 }
6582                 btrfs_extent_post_op(trans, fs_info->extent_root);
6583         }
6584
6585         ret = reset_balance(trans, fs_info);
6586         if (ret)
6587                 fprintf(stderr, "error reseting the pending balance\n");
6588
6589         return ret;
6590 }
6591
6592 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6593 {
6594         struct btrfs_path *path;
6595         struct btrfs_trans_handle *trans;
6596         struct btrfs_key key;
6597         int ret;
6598
6599         printf("Recowing metadata block %llu\n", eb->start);
6600         key.objectid = btrfs_header_owner(eb);
6601         key.type = BTRFS_ROOT_ITEM_KEY;
6602         key.offset = (u64)-1;
6603
6604         root = btrfs_read_fs_root(root->fs_info, &key);
6605         if (IS_ERR(root)) {
6606                 fprintf(stderr, "Couldn't find owner root %llu\n",
6607                         key.objectid);
6608                 return PTR_ERR(root);
6609         }
6610
6611         path = btrfs_alloc_path();
6612         if (!path)
6613                 return -ENOMEM;
6614
6615         trans = btrfs_start_transaction(root, 1);
6616         if (IS_ERR(trans)) {
6617                 btrfs_free_path(path);
6618                 return PTR_ERR(trans);
6619         }
6620
6621         path->lowest_level = btrfs_header_level(eb);
6622         if (path->lowest_level)
6623                 btrfs_node_key_to_cpu(eb, &key, 0);
6624         else
6625                 btrfs_item_key_to_cpu(eb, &key, 0);
6626
6627         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6628         btrfs_commit_transaction(trans, root);
6629         btrfs_free_path(path);
6630         return ret;
6631 }
6632
6633 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6634 {
6635         struct btrfs_path *path;
6636         struct btrfs_trans_handle *trans;
6637         struct btrfs_key key;
6638         int ret;
6639
6640         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6641                bad->key.type, bad->key.offset);
6642         key.objectid = bad->root_id;
6643         key.type = BTRFS_ROOT_ITEM_KEY;
6644         key.offset = (u64)-1;
6645
6646         root = btrfs_read_fs_root(root->fs_info, &key);
6647         if (IS_ERR(root)) {
6648                 fprintf(stderr, "Couldn't find owner root %llu\n",
6649                         key.objectid);
6650                 return PTR_ERR(root);
6651         }
6652
6653         path = btrfs_alloc_path();
6654         if (!path)
6655                 return -ENOMEM;
6656
6657         trans = btrfs_start_transaction(root, 1);
6658         if (IS_ERR(trans)) {
6659                 btrfs_free_path(path);
6660                 return PTR_ERR(trans);
6661         }
6662
6663         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6664         if (ret) {
6665                 if (ret > 0)
6666                         ret = 0;
6667                 goto out;
6668         }
6669         ret = btrfs_del_item(trans, root, path);
6670 out:
6671         btrfs_commit_transaction(trans, root);
6672         btrfs_free_path(path);
6673         return ret;
6674 }
6675
6676 static int zero_log_tree(struct btrfs_root *root)
6677 {
6678         struct btrfs_trans_handle *trans;
6679         int ret;
6680
6681         trans = btrfs_start_transaction(root, 1);
6682         if (IS_ERR(trans)) {
6683                 ret = PTR_ERR(trans);
6684                 return ret;
6685         }
6686         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
6687         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
6688         ret = btrfs_commit_transaction(trans, root);
6689         return ret;
6690 }
6691
6692 static int populate_csum(struct btrfs_trans_handle *trans,
6693                          struct btrfs_root *csum_root, char *buf, u64 start,
6694                          u64 len)
6695 {
6696         u64 offset = 0;
6697         u64 sectorsize;
6698         int ret = 0;
6699
6700         while (offset < len) {
6701                 sectorsize = csum_root->sectorsize;
6702                 ret = read_extent_data(csum_root, buf, start + offset,
6703                                        &sectorsize, 0);
6704                 if (ret)
6705                         break;
6706                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
6707                                             start + offset, buf, sectorsize);
6708                 if (ret)
6709                         break;
6710                 offset += sectorsize;
6711         }
6712         return ret;
6713 }
6714
6715 static int fill_csum_tree(struct btrfs_trans_handle *trans,
6716                           struct btrfs_root *csum_root)
6717 {
6718         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
6719         struct btrfs_path *path;
6720         struct btrfs_extent_item *ei;
6721         struct extent_buffer *leaf;
6722         char *buf;
6723         struct btrfs_key key;
6724         int ret;
6725
6726         path = btrfs_alloc_path();
6727         if (!path)
6728                 return -ENOMEM;
6729
6730         key.objectid = 0;
6731         key.type = BTRFS_EXTENT_ITEM_KEY;
6732         key.offset = 0;
6733
6734         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
6735         if (ret < 0) {
6736                 btrfs_free_path(path);
6737                 return ret;
6738         }
6739
6740         buf = malloc(csum_root->sectorsize);
6741         if (!buf) {
6742                 btrfs_free_path(path);
6743                 return -ENOMEM;
6744         }
6745
6746         while (1) {
6747                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6748                         ret = btrfs_next_leaf(extent_root, path);
6749                         if (ret < 0)
6750                                 break;
6751                         if (ret) {
6752                                 ret = 0;
6753                                 break;
6754                         }
6755                 }
6756                 leaf = path->nodes[0];
6757
6758                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6759                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6760                         path->slots[0]++;
6761                         continue;
6762                 }
6763
6764                 ei = btrfs_item_ptr(leaf, path->slots[0],
6765                                     struct btrfs_extent_item);
6766                 if (!(btrfs_extent_flags(leaf, ei) &
6767                       BTRFS_EXTENT_FLAG_DATA)) {
6768                         path->slots[0]++;
6769                         continue;
6770                 }
6771
6772                 ret = populate_csum(trans, csum_root, buf, key.objectid,
6773                                     key.offset);
6774                 if (ret)
6775                         break;
6776                 path->slots[0]++;
6777         }
6778
6779         btrfs_free_path(path);
6780         free(buf);
6781         return ret;
6782 }
6783
6784 static struct option long_options[] = {
6785         { "super", 1, NULL, 's' },
6786         { "repair", 0, NULL, 0 },
6787         { "init-csum-tree", 0, NULL, 0 },
6788         { "init-extent-tree", 0, NULL, 0 },
6789         { "check-data-csum", 0, NULL, 0 },
6790         { "backup", 0, NULL, 0 },
6791         { "subvol-extents", no_argument, NULL, 'E' },
6792         { "qgroup-report", 0, NULL, 'Q' },
6793         { NULL, 0, NULL, 0}
6794 };
6795
6796 const char * const cmd_check_usage[] = {
6797         "btrfs check [options] <device>",
6798         "Check an unmounted btrfs filesystem.",
6799         "",
6800         "-s|--super <superblock>     use this superblock copy",
6801         "-b|--backup                 use the backup root copy",
6802         "--repair                    try to repair the filesystem",
6803         "--init-csum-tree            create a new CRC tree",
6804         "--init-extent-tree          create a new extent tree",
6805         "--check-data-csum           verify checkums of data blocks",
6806         "--qgroup-report             print a report on qgroup consistency",
6807         "--subvol-extents            print subvolume extents and sharing state",
6808         NULL
6809 };
6810
6811 int cmd_check(int argc, char **argv)
6812 {
6813         struct cache_tree root_cache;
6814         struct btrfs_root *root;
6815         struct btrfs_fs_info *info;
6816         u64 bytenr = 0;
6817         u64 subvolid = 0;
6818         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6819         int ret;
6820         u64 num;
6821         int option_index = 0;
6822         int init_csum_tree = 0;
6823         int qgroup_report = 0;
6824         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
6825
6826         while(1) {
6827                 int c;
6828                 c = getopt_long(argc, argv, "as:b", long_options,
6829                                 &option_index);
6830                 if (c < 0)
6831                         break;
6832                 switch(c) {
6833                         case 'a': /* ignored */ break;
6834                         case 'b':
6835                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6836                                 break;
6837                         case 's':
6838                                 num = arg_strtou64(optarg);
6839                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
6840                                         fprintf(stderr,
6841                                                 "ERROR: super mirror should be less than: %d\n",
6842                                                 BTRFS_SUPER_MIRROR_MAX);
6843                                         exit(1);
6844                                 }
6845                                 bytenr = btrfs_sb_offset(((int)num));
6846                                 printf("using SB copy %llu, bytenr %llu\n", num,
6847                                        (unsigned long long)bytenr);
6848                                 break;
6849                         case 'Q':
6850                                 qgroup_report = 1;
6851                                 break;
6852                         case 'E':
6853                                 subvolid = arg_strtou64(optarg);
6854                                 break;
6855                         case '?':
6856                         case 'h':
6857                                 usage(cmd_check_usage);
6858                 }
6859                 if (option_index == 1) {
6860                         printf("enabling repair mode\n");
6861                         repair = 1;
6862                         ctree_flags |= OPEN_CTREE_WRITES;
6863                 } else if (option_index == 2) {
6864                         printf("Creating a new CRC tree\n");
6865                         init_csum_tree = 1;
6866                         repair = 1;
6867                         ctree_flags |= OPEN_CTREE_WRITES;
6868                 } else if (option_index == 3) {
6869                         init_extent_tree = 1;
6870                         ctree_flags |= (OPEN_CTREE_WRITES |
6871                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6872                         repair = 1;
6873                 } else if (option_index == 4) {
6874                         check_data_csum = 1;
6875                 }
6876         }
6877         argc = argc - optind;
6878
6879         if (check_argc_exact(argc, 1))
6880                 usage(cmd_check_usage);
6881
6882         radix_tree_init();
6883         cache_tree_init(&root_cache);
6884
6885         if((ret = check_mounted(argv[optind])) < 0) {
6886                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6887                 goto err_out;
6888         } else if(ret) {
6889                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6890                 ret = -EBUSY;
6891                 goto err_out;
6892         }
6893
6894         /* only allow partial opening under repair mode */
6895         if (repair)
6896                 ctree_flags |= OPEN_CTREE_PARTIAL;
6897
6898         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6899         if (!info) {
6900                 fprintf(stderr, "Couldn't open file system\n");
6901                 ret = -EIO;
6902                 goto err_out;
6903         }
6904
6905         root = info->fs_root;
6906         /*
6907          * repair mode will force us to commit transaction which
6908          * will make us fail to load log tree when mounting.
6909          */
6910         if (repair && btrfs_super_log_root(info->super_copy)) {
6911                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
6912                 if (!ret) {
6913                         ret = 1;
6914                         goto close_out;
6915                 }
6916                 ret = zero_log_tree(root);
6917                 if (ret) {
6918                         fprintf(stderr, "fail to zero log tree\n");
6919                         goto close_out;
6920                 }
6921         }
6922
6923         uuid_unparse(info->super_copy->fsid, uuidbuf);
6924         if (qgroup_report) {
6925                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
6926                        uuidbuf);
6927                 ret = qgroup_verify_all(info);
6928                 if (ret == 0)
6929                         print_qgroup_report(1);
6930                 goto close_out;
6931         }
6932         if (subvolid) {
6933                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
6934                        subvolid, argv[optind], uuidbuf);
6935                 ret = print_extent_state(info, subvolid);
6936                 goto close_out;
6937         }
6938         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6939
6940         if (!extent_buffer_uptodate(info->tree_root->node) ||
6941             !extent_buffer_uptodate(info->dev_root->node) ||
6942             !extent_buffer_uptodate(info->chunk_root->node)) {
6943                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6944                 ret = -EIO;
6945                 goto close_out;
6946         }
6947
6948         if (init_extent_tree || init_csum_tree) {
6949                 struct btrfs_trans_handle *trans;
6950
6951                 trans = btrfs_start_transaction(info->extent_root, 0);
6952                 if (IS_ERR(trans)) {
6953                         fprintf(stderr, "Error starting transaction\n");
6954                         ret = PTR_ERR(trans);
6955                         goto close_out;
6956                 }
6957
6958                 if (init_extent_tree) {
6959                         printf("Creating a new extent tree\n");
6960                         ret = reinit_extent_tree(trans, info);
6961                         if (ret)
6962                                 goto close_out;
6963                 }
6964
6965                 if (init_csum_tree) {
6966                         fprintf(stderr, "Reinit crc root\n");
6967                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6968                         if (ret) {
6969                                 fprintf(stderr, "crc root initialization failed\n");
6970                                 ret = -EIO;
6971                                 goto close_out;
6972                         }
6973
6974                         ret = fill_csum_tree(trans, info->csum_root);
6975                         if (ret) {
6976                                 fprintf(stderr, "crc refilling failed\n");
6977                                 return -EIO;
6978                         }
6979                 }
6980                 /*
6981                  * Ok now we commit and run the normal fsck, which will add
6982                  * extent entries for all of the items it finds.
6983                  */
6984                 ret = btrfs_commit_transaction(trans, info->extent_root);
6985                 if (ret)
6986                         goto close_out;
6987         }
6988         if (!extent_buffer_uptodate(info->extent_root->node)) {
6989                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6990                 ret = -EIO;
6991                 goto close_out;
6992         }
6993         if (!extent_buffer_uptodate(info->csum_root->node)) {
6994                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
6995                 ret = -EIO;
6996                 goto close_out;
6997         }
6998
6999         fprintf(stderr, "checking extents\n");
7000         ret = check_chunks_and_extents(root);
7001         if (ret)
7002                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
7003
7004         fprintf(stderr, "checking free space cache\n");
7005         ret = check_space_cache(root);
7006         if (ret)
7007                 goto out;
7008
7009         /*
7010          * We used to have to have these hole extents in between our real
7011          * extents so if we don't have this flag set we need to make sure there
7012          * are no gaps in the file extents for inodes, otherwise we can just
7013          * ignore it when this happens.
7014          */
7015         no_holes = btrfs_fs_incompat(root->fs_info,
7016                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
7017         fprintf(stderr, "checking fs roots\n");
7018         ret = check_fs_roots(root, &root_cache);
7019         if (ret)
7020                 goto out;
7021
7022         fprintf(stderr, "checking csums\n");
7023         ret = check_csums(root);
7024         if (ret)
7025                 goto out;
7026
7027         fprintf(stderr, "checking root refs\n");
7028         ret = check_root_refs(root, &root_cache);
7029         if (ret)
7030                 goto out;
7031
7032         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
7033                 struct extent_buffer *eb;
7034
7035                 eb = list_first_entry(&root->fs_info->recow_ebs,
7036                                       struct extent_buffer, recow);
7037                 ret = recow_extent_buffer(root, eb);
7038                 if (ret)
7039                         break;
7040         }
7041
7042         while (!list_empty(&delete_items)) {
7043                 struct bad_item *bad;
7044
7045                 bad = list_first_entry(&delete_items, struct bad_item, list);
7046                 list_del_init(&bad->list);
7047                 if (repair)
7048                         ret = delete_bad_item(root, bad);
7049                 free(bad);
7050         }
7051
7052         if (info->quota_enabled) {
7053                 int err;
7054                 fprintf(stderr, "checking quota groups\n");
7055                 err = qgroup_verify_all(info);
7056                 if (err)
7057                         goto out;
7058         }
7059
7060         if (!list_empty(&root->fs_info->recow_ebs)) {
7061                 fprintf(stderr, "Transid errors in file system\n");
7062                 ret = 1;
7063         }
7064 out:
7065         print_qgroup_report(0);
7066         if (found_old_backref) { /*
7067                  * there was a disk format change when mixed
7068                  * backref was in testing tree. The old format
7069                  * existed about one week.
7070                  */
7071                 printf("\n * Found old mixed backref format. "
7072                        "The old format is not supported! *"
7073                        "\n * Please mount the FS in readonly mode, "
7074                        "backup data and re-format the FS. *\n\n");
7075                 ret = 1;
7076         }
7077         printf("found %llu bytes used err is %d\n",
7078                (unsigned long long)bytes_used, ret);
7079         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
7080         printf("total tree bytes: %llu\n",
7081                (unsigned long long)total_btree_bytes);
7082         printf("total fs tree bytes: %llu\n",
7083                (unsigned long long)total_fs_tree_bytes);
7084         printf("total extent tree bytes: %llu\n",
7085                (unsigned long long)total_extent_tree_bytes);
7086         printf("btree space waste bytes: %llu\n",
7087                (unsigned long long)btree_space_waste);
7088         printf("file data blocks allocated: %llu\n referenced %llu\n",
7089                 (unsigned long long)data_bytes_allocated,
7090                 (unsigned long long)data_bytes_referenced);
7091         printf("%s\n", BTRFS_BUILD_VERSION);
7092
7093         free_root_recs_tree(&root_cache);
7094 close_out:
7095         close_ctree(root);
7096 err_out:
7097         return ret;
7098 }