btrfs-progs: print B for bytes
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #define _GNU_SOURCE 1
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <getopt.h>
29 #include <uuid/uuid.h>
30 #include "ctree.h"
31 #include "volumes.h"
32 #include "repair.h"
33 #include "disk-io.h"
34 #include "print-tree.h"
35 #include "transaction.h"
36 #include "version.h"
37 #include "utils.h"
38 #include "commands.h"
39 #include "free-space-cache.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 struct tree_backref {
84         struct extent_backref node;
85         union {
86                 u64 parent;
87                 u64 root;
88         };
89 };
90
91 struct extent_record {
92         struct list_head backrefs;
93         struct list_head dups;
94         struct list_head list;
95         struct cache_extent cache;
96         struct btrfs_disk_key parent_key;
97         u64 start;
98         u64 max_size;
99         u64 nr;
100         u64 refs;
101         u64 extent_item_refs;
102         u64 generation;
103         u64 parent_generation;
104         u64 info_objectid;
105         u32 num_duplicates;
106         u8 info_level;
107         unsigned int found_rec:1;
108         unsigned int content_checked:1;
109         unsigned int owner_ref_checked:1;
110         unsigned int is_root:1;
111         unsigned int metadata:1;
112 };
113
114 struct inode_backref {
115         struct list_head list;
116         unsigned int found_dir_item:1;
117         unsigned int found_dir_index:1;
118         unsigned int found_inode_ref:1;
119         unsigned int filetype:8;
120         int errors;
121         unsigned int ref_type;
122         u64 dir;
123         u64 index;
124         u16 namelen;
125         char name[0];
126 };
127
128 struct dropping_root_item_record {
129         struct list_head list;
130         struct btrfs_root_item ri;
131         struct btrfs_key found_key;
132 };
133
134 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
135 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
136 #define REF_ERR_NO_INODE_REF            (1 << 2)
137 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
138 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
139 #define REF_ERR_DUP_INODE_REF           (1 << 5)
140 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
141 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
142 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
143 #define REF_ERR_NO_ROOT_REF             (1 << 9)
144 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
145 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
146 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
147
148 struct inode_record {
149         struct list_head backrefs;
150         unsigned int checked:1;
151         unsigned int merging:1;
152         unsigned int found_inode_item:1;
153         unsigned int found_dir_item:1;
154         unsigned int found_file_extent:1;
155         unsigned int found_csum_item:1;
156         unsigned int some_csum_missing:1;
157         unsigned int nodatasum:1;
158         int errors;
159
160         u64 ino;
161         u32 nlink;
162         u32 imode;
163         u64 isize;
164         u64 nbytes;
165
166         u32 found_link;
167         u64 found_size;
168         u64 extent_start;
169         u64 extent_end;
170         u64 first_extent_gap;
171
172         u32 refs;
173 };
174
175 #define I_ERR_NO_INODE_ITEM             (1 << 0)
176 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
177 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
178 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
179 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
180 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
181 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
182 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
183 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
184 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
185 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
186 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
187 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
188 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
189
190 struct root_backref {
191         struct list_head list;
192         unsigned int found_dir_item:1;
193         unsigned int found_dir_index:1;
194         unsigned int found_back_ref:1;
195         unsigned int found_forward_ref:1;
196         unsigned int reachable:1;
197         int errors;
198         u64 ref_root;
199         u64 dir;
200         u64 index;
201         u16 namelen;
202         char name[0];
203 };
204
205 struct root_record {
206         struct list_head backrefs;
207         struct cache_extent cache;
208         unsigned int found_root_item:1;
209         u64 objectid;
210         u32 found_ref;
211 };
212
213 struct ptr_node {
214         struct cache_extent cache;
215         void *data;
216 };
217
218 struct shared_node {
219         struct cache_extent cache;
220         struct cache_tree root_cache;
221         struct cache_tree inode_cache;
222         struct inode_record *current;
223         u32 refs;
224 };
225
226 struct block_info {
227         u64 start;
228         u32 size;
229 };
230
231 struct walk_control {
232         struct cache_tree shared;
233         struct shared_node *nodes[BTRFS_MAX_LEVEL];
234         int active_node;
235         int root_level;
236 };
237
238 struct bad_item {
239         struct btrfs_key key;
240         u64 root_id;
241         struct list_head list;
242 };
243
244 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
245
246 static void record_root_in_trans(struct btrfs_trans_handle *trans,
247                                  struct btrfs_root *root)
248 {
249         if (root->last_trans != trans->transid) {
250                 root->track_dirty = 1;
251                 root->last_trans = trans->transid;
252                 root->commit_root = root->node;
253                 extent_buffer_get(root->node);
254         }
255 }
256
257 static u8 imode_to_type(u32 imode)
258 {
259 #define S_SHIFT 12
260         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
261                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
262                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
263                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
264                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
265                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
266                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
267                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
268         };
269
270         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
271 #undef S_SHIFT
272 }
273
274 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
275 {
276         struct device_record *rec1;
277         struct device_record *rec2;
278
279         rec1 = rb_entry(node1, struct device_record, node);
280         rec2 = rb_entry(node2, struct device_record, node);
281         if (rec1->devid > rec2->devid)
282                 return -1;
283         else if (rec1->devid < rec2->devid)
284                 return 1;
285         else
286                 return 0;
287 }
288
289 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
290 {
291         struct inode_record *rec;
292         struct inode_backref *backref;
293         struct inode_backref *orig;
294         size_t size;
295
296         rec = malloc(sizeof(*rec));
297         memcpy(rec, orig_rec, sizeof(*rec));
298         rec->refs = 1;
299         INIT_LIST_HEAD(&rec->backrefs);
300
301         list_for_each_entry(orig, &orig_rec->backrefs, list) {
302                 size = sizeof(*orig) + orig->namelen + 1;
303                 backref = malloc(size);
304                 memcpy(backref, orig, size);
305                 list_add_tail(&backref->list, &rec->backrefs);
306         }
307         return rec;
308 }
309
310 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
311 {
312         u64 root_objectid = root->root_key.objectid;
313         int errors = rec->errors;
314
315         if (!errors)
316                 return;
317         /* reloc root errors, we print its corresponding fs root objectid*/
318         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
319                 root_objectid = root->root_key.offset;
320                 fprintf(stderr, "reloc");
321         }
322         fprintf(stderr, "root %llu inode %llu errors %x",
323                 (unsigned long long) root_objectid,
324                 (unsigned long long) rec->ino, rec->errors);
325
326         if (errors & I_ERR_NO_INODE_ITEM)
327                 fprintf(stderr, ", no inode item");
328         if (errors & I_ERR_NO_ORPHAN_ITEM)
329                 fprintf(stderr, ", no orphan item");
330         if (errors & I_ERR_DUP_INODE_ITEM)
331                 fprintf(stderr, ", dup inode item");
332         if (errors & I_ERR_DUP_DIR_INDEX)
333                 fprintf(stderr, ", dup dir index");
334         if (errors & I_ERR_ODD_DIR_ITEM)
335                 fprintf(stderr, ", odd dir item");
336         if (errors & I_ERR_ODD_FILE_EXTENT)
337                 fprintf(stderr, ", odd file extent");
338         if (errors & I_ERR_BAD_FILE_EXTENT)
339                 fprintf(stderr, ", bad file extent");
340         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
341                 fprintf(stderr, ", file extent overlap");
342         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
343                 fprintf(stderr, ", file extent discount");
344         if (errors & I_ERR_DIR_ISIZE_WRONG)
345                 fprintf(stderr, ", dir isize wrong");
346         if (errors & I_ERR_FILE_NBYTES_WRONG)
347                 fprintf(stderr, ", nbytes wrong");
348         if (errors & I_ERR_ODD_CSUM_ITEM)
349                 fprintf(stderr, ", odd csum item");
350         if (errors & I_ERR_SOME_CSUM_MISSING)
351                 fprintf(stderr, ", some csum missing");
352         if (errors & I_ERR_LINK_COUNT_WRONG)
353                 fprintf(stderr, ", link count wrong");
354         fprintf(stderr, "\n");
355 }
356
357 static void print_ref_error(int errors)
358 {
359         if (errors & REF_ERR_NO_DIR_ITEM)
360                 fprintf(stderr, ", no dir item");
361         if (errors & REF_ERR_NO_DIR_INDEX)
362                 fprintf(stderr, ", no dir index");
363         if (errors & REF_ERR_NO_INODE_REF)
364                 fprintf(stderr, ", no inode ref");
365         if (errors & REF_ERR_DUP_DIR_ITEM)
366                 fprintf(stderr, ", dup dir item");
367         if (errors & REF_ERR_DUP_DIR_INDEX)
368                 fprintf(stderr, ", dup dir index");
369         if (errors & REF_ERR_DUP_INODE_REF)
370                 fprintf(stderr, ", dup inode ref");
371         if (errors & REF_ERR_INDEX_UNMATCH)
372                 fprintf(stderr, ", index unmatch");
373         if (errors & REF_ERR_FILETYPE_UNMATCH)
374                 fprintf(stderr, ", filetype unmatch");
375         if (errors & REF_ERR_NAME_TOO_LONG)
376                 fprintf(stderr, ", name too long");
377         if (errors & REF_ERR_NO_ROOT_REF)
378                 fprintf(stderr, ", no root ref");
379         if (errors & REF_ERR_NO_ROOT_BACKREF)
380                 fprintf(stderr, ", no root backref");
381         if (errors & REF_ERR_DUP_ROOT_REF)
382                 fprintf(stderr, ", dup root ref");
383         if (errors & REF_ERR_DUP_ROOT_BACKREF)
384                 fprintf(stderr, ", dup root backref");
385         fprintf(stderr, "\n");
386 }
387
388 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
389                                           u64 ino, int mod)
390 {
391         struct ptr_node *node;
392         struct cache_extent *cache;
393         struct inode_record *rec = NULL;
394         int ret;
395
396         cache = lookup_cache_extent(inode_cache, ino, 1);
397         if (cache) {
398                 node = container_of(cache, struct ptr_node, cache);
399                 rec = node->data;
400                 if (mod && rec->refs > 1) {
401                         node->data = clone_inode_rec(rec);
402                         rec->refs--;
403                         rec = node->data;
404                 }
405         } else if (mod) {
406                 rec = calloc(1, sizeof(*rec));
407                 rec->ino = ino;
408                 rec->extent_start = (u64)-1;
409                 rec->first_extent_gap = (u64)-1;
410                 rec->refs = 1;
411                 INIT_LIST_HEAD(&rec->backrefs);
412
413                 node = malloc(sizeof(*node));
414                 node->cache.start = ino;
415                 node->cache.size = 1;
416                 node->data = rec;
417
418                 if (ino == BTRFS_FREE_INO_OBJECTID)
419                         rec->found_link = 1;
420
421                 ret = insert_cache_extent(inode_cache, &node->cache);
422                 BUG_ON(ret);
423         }
424         return rec;
425 }
426
427 static void free_inode_rec(struct inode_record *rec)
428 {
429         struct inode_backref *backref;
430
431         if (--rec->refs > 0)
432                 return;
433
434         while (!list_empty(&rec->backrefs)) {
435                 backref = list_entry(rec->backrefs.next,
436                                      struct inode_backref, list);
437                 list_del(&backref->list);
438                 free(backref);
439         }
440         free(rec);
441 }
442
443 static int can_free_inode_rec(struct inode_record *rec)
444 {
445         if (!rec->errors && rec->checked && rec->found_inode_item &&
446             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
447                 return 1;
448         return 0;
449 }
450
451 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
452                                  struct inode_record *rec)
453 {
454         struct cache_extent *cache;
455         struct inode_backref *tmp, *backref;
456         struct ptr_node *node;
457         unsigned char filetype;
458
459         if (!rec->found_inode_item)
460                 return;
461
462         filetype = imode_to_type(rec->imode);
463         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
464                 if (backref->found_dir_item && backref->found_dir_index) {
465                         if (backref->filetype != filetype)
466                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
467                         if (!backref->errors && backref->found_inode_ref) {
468                                 list_del(&backref->list);
469                                 free(backref);
470                         }
471                 }
472         }
473
474         if (!rec->checked || rec->merging)
475                 return;
476
477         if (S_ISDIR(rec->imode)) {
478                 if (rec->found_size != rec->isize)
479                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
480                 if (rec->found_file_extent)
481                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
482         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
483                 if (rec->found_dir_item)
484                         rec->errors |= I_ERR_ODD_DIR_ITEM;
485                 if (rec->found_size != rec->nbytes)
486                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
487                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
488                         rec->first_extent_gap = 0;
489                 if (rec->nlink > 0 && !no_holes &&
490                     (rec->extent_end < rec->isize ||
491                      rec->first_extent_gap < rec->isize))
492                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
493         }
494
495         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
496                 if (rec->found_csum_item && rec->nodatasum)
497                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
498                 if (rec->some_csum_missing && !rec->nodatasum)
499                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
500         }
501
502         BUG_ON(rec->refs != 1);
503         if (can_free_inode_rec(rec)) {
504                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
505                 node = container_of(cache, struct ptr_node, cache);
506                 BUG_ON(node->data != rec);
507                 remove_cache_extent(inode_cache, &node->cache);
508                 free(node);
509                 free_inode_rec(rec);
510         }
511 }
512
513 static int check_orphan_item(struct btrfs_root *root, u64 ino)
514 {
515         struct btrfs_path path;
516         struct btrfs_key key;
517         int ret;
518
519         key.objectid = BTRFS_ORPHAN_OBJECTID;
520         key.type = BTRFS_ORPHAN_ITEM_KEY;
521         key.offset = ino;
522
523         btrfs_init_path(&path);
524         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
525         btrfs_release_path(&path);
526         if (ret > 0)
527                 ret = -ENOENT;
528         return ret;
529 }
530
531 static int process_inode_item(struct extent_buffer *eb,
532                               int slot, struct btrfs_key *key,
533                               struct shared_node *active_node)
534 {
535         struct inode_record *rec;
536         struct btrfs_inode_item *item;
537
538         rec = active_node->current;
539         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
540         if (rec->found_inode_item) {
541                 rec->errors |= I_ERR_DUP_INODE_ITEM;
542                 return 1;
543         }
544         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
545         rec->nlink = btrfs_inode_nlink(eb, item);
546         rec->isize = btrfs_inode_size(eb, item);
547         rec->nbytes = btrfs_inode_nbytes(eb, item);
548         rec->imode = btrfs_inode_mode(eb, item);
549         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
550                 rec->nodatasum = 1;
551         rec->found_inode_item = 1;
552         if (rec->nlink == 0)
553                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
554         maybe_free_inode_rec(&active_node->inode_cache, rec);
555         return 0;
556 }
557
558 static struct inode_backref *get_inode_backref(struct inode_record *rec,
559                                                 const char *name,
560                                                 int namelen, u64 dir)
561 {
562         struct inode_backref *backref;
563
564         list_for_each_entry(backref, &rec->backrefs, list) {
565                 if (backref->dir != dir || backref->namelen != namelen)
566                         continue;
567                 if (memcmp(name, backref->name, namelen))
568                         continue;
569                 return backref;
570         }
571
572         backref = malloc(sizeof(*backref) + namelen + 1);
573         memset(backref, 0, sizeof(*backref));
574         backref->dir = dir;
575         backref->namelen = namelen;
576         memcpy(backref->name, name, namelen);
577         backref->name[namelen] = '\0';
578         list_add_tail(&backref->list, &rec->backrefs);
579         return backref;
580 }
581
582 static int add_inode_backref(struct cache_tree *inode_cache,
583                              u64 ino, u64 dir, u64 index,
584                              const char *name, int namelen,
585                              int filetype, int itemtype, int errors)
586 {
587         struct inode_record *rec;
588         struct inode_backref *backref;
589
590         rec = get_inode_rec(inode_cache, ino, 1);
591         backref = get_inode_backref(rec, name, namelen, dir);
592         if (errors)
593                 backref->errors |= errors;
594         if (itemtype == BTRFS_DIR_INDEX_KEY) {
595                 if (backref->found_dir_index)
596                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
597                 if (backref->found_inode_ref && backref->index != index)
598                         backref->errors |= REF_ERR_INDEX_UNMATCH;
599                 if (backref->found_dir_item && backref->filetype != filetype)
600                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
601
602                 backref->index = index;
603                 backref->filetype = filetype;
604                 backref->found_dir_index = 1;
605         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
606                 rec->found_link++;
607                 if (backref->found_dir_item)
608                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
609                 if (backref->found_dir_index && backref->filetype != filetype)
610                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
611
612                 backref->filetype = filetype;
613                 backref->found_dir_item = 1;
614         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
615                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
616                 if (backref->found_inode_ref)
617                         backref->errors |= REF_ERR_DUP_INODE_REF;
618                 if (backref->found_dir_index && backref->index != index)
619                         backref->errors |= REF_ERR_INDEX_UNMATCH;
620
621                 backref->ref_type = itemtype;
622                 backref->index = index;
623                 backref->found_inode_ref = 1;
624         } else {
625                 BUG_ON(1);
626         }
627
628         maybe_free_inode_rec(inode_cache, rec);
629         return 0;
630 }
631
632 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
633                             struct cache_tree *dst_cache)
634 {
635         struct inode_backref *backref;
636         u32 dir_count = 0;
637
638         dst->merging = 1;
639         list_for_each_entry(backref, &src->backrefs, list) {
640                 if (backref->found_dir_index) {
641                         add_inode_backref(dst_cache, dst->ino, backref->dir,
642                                         backref->index, backref->name,
643                                         backref->namelen, backref->filetype,
644                                         BTRFS_DIR_INDEX_KEY, backref->errors);
645                 }
646                 if (backref->found_dir_item) {
647                         dir_count++;
648                         add_inode_backref(dst_cache, dst->ino,
649                                         backref->dir, 0, backref->name,
650                                         backref->namelen, backref->filetype,
651                                         BTRFS_DIR_ITEM_KEY, backref->errors);
652                 }
653                 if (backref->found_inode_ref) {
654                         add_inode_backref(dst_cache, dst->ino,
655                                         backref->dir, backref->index,
656                                         backref->name, backref->namelen, 0,
657                                         backref->ref_type, backref->errors);
658                 }
659         }
660
661         if (src->found_dir_item)
662                 dst->found_dir_item = 1;
663         if (src->found_file_extent)
664                 dst->found_file_extent = 1;
665         if (src->found_csum_item)
666                 dst->found_csum_item = 1;
667         if (src->some_csum_missing)
668                 dst->some_csum_missing = 1;
669         if (dst->first_extent_gap > src->first_extent_gap)
670                 dst->first_extent_gap = src->first_extent_gap;
671
672         BUG_ON(src->found_link < dir_count);
673         dst->found_link += src->found_link - dir_count;
674         dst->found_size += src->found_size;
675         if (src->extent_start != (u64)-1) {
676                 if (dst->extent_start == (u64)-1) {
677                         dst->extent_start = src->extent_start;
678                         dst->extent_end = src->extent_end;
679                 } else {
680                         if (dst->extent_end > src->extent_start)
681                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
682                         else if (dst->extent_end < src->extent_start &&
683                                  dst->extent_end < dst->first_extent_gap)
684                                 dst->first_extent_gap = dst->extent_end;
685                         if (dst->extent_end < src->extent_end)
686                                 dst->extent_end = src->extent_end;
687                 }
688         }
689
690         dst->errors |= src->errors;
691         if (src->found_inode_item) {
692                 if (!dst->found_inode_item) {
693                         dst->nlink = src->nlink;
694                         dst->isize = src->isize;
695                         dst->nbytes = src->nbytes;
696                         dst->imode = src->imode;
697                         dst->nodatasum = src->nodatasum;
698                         dst->found_inode_item = 1;
699                 } else {
700                         dst->errors |= I_ERR_DUP_INODE_ITEM;
701                 }
702         }
703         dst->merging = 0;
704
705         return 0;
706 }
707
708 static int splice_shared_node(struct shared_node *src_node,
709                               struct shared_node *dst_node)
710 {
711         struct cache_extent *cache;
712         struct ptr_node *node, *ins;
713         struct cache_tree *src, *dst;
714         struct inode_record *rec, *conflict;
715         u64 current_ino = 0;
716         int splice = 0;
717         int ret;
718
719         if (--src_node->refs == 0)
720                 splice = 1;
721         if (src_node->current)
722                 current_ino = src_node->current->ino;
723
724         src = &src_node->root_cache;
725         dst = &dst_node->root_cache;
726 again:
727         cache = search_cache_extent(src, 0);
728         while (cache) {
729                 node = container_of(cache, struct ptr_node, cache);
730                 rec = node->data;
731                 cache = next_cache_extent(cache);
732
733                 if (splice) {
734                         remove_cache_extent(src, &node->cache);
735                         ins = node;
736                 } else {
737                         ins = malloc(sizeof(*ins));
738                         ins->cache.start = node->cache.start;
739                         ins->cache.size = node->cache.size;
740                         ins->data = rec;
741                         rec->refs++;
742                 }
743                 ret = insert_cache_extent(dst, &ins->cache);
744                 if (ret == -EEXIST) {
745                         conflict = get_inode_rec(dst, rec->ino, 1);
746                         merge_inode_recs(rec, conflict, dst);
747                         if (rec->checked) {
748                                 conflict->checked = 1;
749                                 if (dst_node->current == conflict)
750                                         dst_node->current = NULL;
751                         }
752                         maybe_free_inode_rec(dst, conflict);
753                         free_inode_rec(rec);
754                         free(ins);
755                 } else {
756                         BUG_ON(ret);
757                 }
758         }
759
760         if (src == &src_node->root_cache) {
761                 src = &src_node->inode_cache;
762                 dst = &dst_node->inode_cache;
763                 goto again;
764         }
765
766         if (current_ino > 0 && (!dst_node->current ||
767             current_ino > dst_node->current->ino)) {
768                 if (dst_node->current) {
769                         dst_node->current->checked = 1;
770                         maybe_free_inode_rec(dst, dst_node->current);
771                 }
772                 dst_node->current = get_inode_rec(dst, current_ino, 1);
773         }
774         return 0;
775 }
776
777 static void free_inode_ptr(struct cache_extent *cache)
778 {
779         struct ptr_node *node;
780         struct inode_record *rec;
781
782         node = container_of(cache, struct ptr_node, cache);
783         rec = node->data;
784         free_inode_rec(rec);
785         free(node);
786 }
787
788 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
789
790 static struct shared_node *find_shared_node(struct cache_tree *shared,
791                                             u64 bytenr)
792 {
793         struct cache_extent *cache;
794         struct shared_node *node;
795
796         cache = lookup_cache_extent(shared, bytenr, 1);
797         if (cache) {
798                 node = container_of(cache, struct shared_node, cache);
799                 return node;
800         }
801         return NULL;
802 }
803
804 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
805 {
806         int ret;
807         struct shared_node *node;
808
809         node = calloc(1, sizeof(*node));
810         node->cache.start = bytenr;
811         node->cache.size = 1;
812         cache_tree_init(&node->root_cache);
813         cache_tree_init(&node->inode_cache);
814         node->refs = refs;
815
816         ret = insert_cache_extent(shared, &node->cache);
817         BUG_ON(ret);
818         return 0;
819 }
820
821 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
822                              struct walk_control *wc, int level)
823 {
824         struct shared_node *node;
825         struct shared_node *dest;
826
827         if (level == wc->active_node)
828                 return 0;
829
830         BUG_ON(wc->active_node <= level);
831         node = find_shared_node(&wc->shared, bytenr);
832         if (!node) {
833                 add_shared_node(&wc->shared, bytenr, refs);
834                 node = find_shared_node(&wc->shared, bytenr);
835                 wc->nodes[level] = node;
836                 wc->active_node = level;
837                 return 0;
838         }
839
840         if (wc->root_level == wc->active_node &&
841             btrfs_root_refs(&root->root_item) == 0) {
842                 if (--node->refs == 0) {
843                         free_inode_recs_tree(&node->root_cache);
844                         free_inode_recs_tree(&node->inode_cache);
845                         remove_cache_extent(&wc->shared, &node->cache);
846                         free(node);
847                 }
848                 return 1;
849         }
850
851         dest = wc->nodes[wc->active_node];
852         splice_shared_node(node, dest);
853         if (node->refs == 0) {
854                 remove_cache_extent(&wc->shared, &node->cache);
855                 free(node);
856         }
857         return 1;
858 }
859
860 static int leave_shared_node(struct btrfs_root *root,
861                              struct walk_control *wc, int level)
862 {
863         struct shared_node *node;
864         struct shared_node *dest;
865         int i;
866
867         if (level == wc->root_level)
868                 return 0;
869
870         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
871                 if (wc->nodes[i])
872                         break;
873         }
874         BUG_ON(i >= BTRFS_MAX_LEVEL);
875
876         node = wc->nodes[wc->active_node];
877         wc->nodes[wc->active_node] = NULL;
878         wc->active_node = i;
879
880         dest = wc->nodes[wc->active_node];
881         if (wc->active_node < wc->root_level ||
882             btrfs_root_refs(&root->root_item) > 0) {
883                 BUG_ON(node->refs <= 1);
884                 splice_shared_node(node, dest);
885         } else {
886                 BUG_ON(node->refs < 2);
887                 node->refs--;
888         }
889         return 0;
890 }
891
892 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
893                          u64 child_root_id)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         struct extent_buffer *leaf;
898         int has_parent = 0;
899         int ret;
900
901         btrfs_init_path(&path);
902
903         key.objectid = parent_root_id;
904         key.type = BTRFS_ROOT_REF_KEY;
905         key.offset = child_root_id;
906         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
907                                 0, 0);
908         if (ret < 0)
909                 return ret;
910         btrfs_release_path(&path);
911         if (!ret)
912                 return 1;
913
914         key.objectid = child_root_id;
915         key.type = BTRFS_ROOT_BACKREF_KEY;
916         key.offset = 0;
917         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
918                                 0, 0);
919         if (ret < 0)
920                 goto out;
921
922         while (1) {
923                 leaf = path.nodes[0];
924                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
925                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
926                         if (ret)
927                                 break;
928                         leaf = path.nodes[0];
929                 }
930
931                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
932                 if (key.objectid != child_root_id ||
933                     key.type != BTRFS_ROOT_BACKREF_KEY)
934                         break;
935
936                 has_parent = 1;
937
938                 if (key.offset == parent_root_id) {
939                         btrfs_release_path(&path);
940                         return 1;
941                 }
942
943                 path.slots[0]++;
944         }
945 out:
946         btrfs_release_path(&path);
947         if (ret < 0)
948                 return ret;
949         return has_parent? 0 : -1;
950 }
951
952 static int process_dir_item(struct btrfs_root *root,
953                             struct extent_buffer *eb,
954                             int slot, struct btrfs_key *key,
955                             struct shared_node *active_node)
956 {
957         u32 total;
958         u32 cur = 0;
959         u32 len;
960         u32 name_len;
961         u32 data_len;
962         int error;
963         int nritems = 0;
964         int filetype;
965         struct btrfs_dir_item *di;
966         struct inode_record *rec;
967         struct cache_tree *root_cache;
968         struct cache_tree *inode_cache;
969         struct btrfs_key location;
970         char namebuf[BTRFS_NAME_LEN];
971
972         root_cache = &active_node->root_cache;
973         inode_cache = &active_node->inode_cache;
974         rec = active_node->current;
975         rec->found_dir_item = 1;
976
977         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
978         total = btrfs_item_size_nr(eb, slot);
979         while (cur < total) {
980                 nritems++;
981                 btrfs_dir_item_key_to_cpu(eb, di, &location);
982                 name_len = btrfs_dir_name_len(eb, di);
983                 data_len = btrfs_dir_data_len(eb, di);
984                 filetype = btrfs_dir_type(eb, di);
985
986                 rec->found_size += name_len;
987                 if (name_len <= BTRFS_NAME_LEN) {
988                         len = name_len;
989                         error = 0;
990                 } else {
991                         len = BTRFS_NAME_LEN;
992                         error = REF_ERR_NAME_TOO_LONG;
993                 }
994                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
995
996                 if (location.type == BTRFS_INODE_ITEM_KEY) {
997                         add_inode_backref(inode_cache, location.objectid,
998                                           key->objectid, key->offset, namebuf,
999                                           len, filetype, key->type, error);
1000                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1001                         add_inode_backref(root_cache, location.objectid,
1002                                           key->objectid, key->offset,
1003                                           namebuf, len, filetype,
1004                                           key->type, error);
1005                 } else {
1006                         fprintf(stderr, "warning line %d\n", __LINE__);
1007                 }
1008
1009                 len = sizeof(*di) + name_len + data_len;
1010                 di = (struct btrfs_dir_item *)((char *)di + len);
1011                 cur += len;
1012         }
1013         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1014                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1015
1016         return 0;
1017 }
1018
1019 static int process_inode_ref(struct extent_buffer *eb,
1020                              int slot, struct btrfs_key *key,
1021                              struct shared_node *active_node)
1022 {
1023         u32 total;
1024         u32 cur = 0;
1025         u32 len;
1026         u32 name_len;
1027         u64 index;
1028         int error;
1029         struct cache_tree *inode_cache;
1030         struct btrfs_inode_ref *ref;
1031         char namebuf[BTRFS_NAME_LEN];
1032
1033         inode_cache = &active_node->inode_cache;
1034
1035         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1036         total = btrfs_item_size_nr(eb, slot);
1037         while (cur < total) {
1038                 name_len = btrfs_inode_ref_name_len(eb, ref);
1039                 index = btrfs_inode_ref_index(eb, ref);
1040                 if (name_len <= BTRFS_NAME_LEN) {
1041                         len = name_len;
1042                         error = 0;
1043                 } else {
1044                         len = BTRFS_NAME_LEN;
1045                         error = REF_ERR_NAME_TOO_LONG;
1046                 }
1047                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1048                 add_inode_backref(inode_cache, key->objectid, key->offset,
1049                                   index, namebuf, len, 0, key->type, error);
1050
1051                 len = sizeof(*ref) + name_len;
1052                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1053                 cur += len;
1054         }
1055         return 0;
1056 }
1057
1058 static int process_inode_extref(struct extent_buffer *eb,
1059                                 int slot, struct btrfs_key *key,
1060                                 struct shared_node *active_node)
1061 {
1062         u32 total;
1063         u32 cur = 0;
1064         u32 len;
1065         u32 name_len;
1066         u64 index;
1067         u64 parent;
1068         int error;
1069         struct cache_tree *inode_cache;
1070         struct btrfs_inode_extref *extref;
1071         char namebuf[BTRFS_NAME_LEN];
1072
1073         inode_cache = &active_node->inode_cache;
1074
1075         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1076         total = btrfs_item_size_nr(eb, slot);
1077         while (cur < total) {
1078                 name_len = btrfs_inode_extref_name_len(eb, extref);
1079                 index = btrfs_inode_extref_index(eb, extref);
1080                 parent = btrfs_inode_extref_parent(eb, extref);
1081                 if (name_len <= BTRFS_NAME_LEN) {
1082                         len = name_len;
1083                         error = 0;
1084                 } else {
1085                         len = BTRFS_NAME_LEN;
1086                         error = REF_ERR_NAME_TOO_LONG;
1087                 }
1088                 read_extent_buffer(eb, namebuf,
1089                                    (unsigned long)(extref + 1), len);
1090                 add_inode_backref(inode_cache, key->objectid, parent,
1091                                   index, namebuf, len, 0, key->type, error);
1092
1093                 len = sizeof(*extref) + name_len;
1094                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1095                 cur += len;
1096         }
1097         return 0;
1098
1099 }
1100
1101 static int count_csum_range(struct btrfs_root *root, u64 start,
1102                             u64 len, u64 *found)
1103 {
1104         struct btrfs_key key;
1105         struct btrfs_path path;
1106         struct extent_buffer *leaf;
1107         int ret;
1108         size_t size;
1109         *found = 0;
1110         u64 csum_end;
1111         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1112
1113         btrfs_init_path(&path);
1114
1115         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1116         key.offset = start;
1117         key.type = BTRFS_EXTENT_CSUM_KEY;
1118
1119         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1120                                 &key, &path, 0, 0);
1121         if (ret < 0)
1122                 goto out;
1123         if (ret > 0 && path.slots[0] > 0) {
1124                 leaf = path.nodes[0];
1125                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1126                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1127                     key.type == BTRFS_EXTENT_CSUM_KEY)
1128                         path.slots[0]--;
1129         }
1130
1131         while (len > 0) {
1132                 leaf = path.nodes[0];
1133                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1134                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1135                         if (ret > 0)
1136                                 break;
1137                         else if (ret < 0)
1138                                 goto out;
1139                         leaf = path.nodes[0];
1140                 }
1141
1142                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1143                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1144                     key.type != BTRFS_EXTENT_CSUM_KEY)
1145                         break;
1146
1147                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1148                 if (key.offset >= start + len)
1149                         break;
1150
1151                 if (key.offset > start)
1152                         start = key.offset;
1153
1154                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1155                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1156                 if (csum_end > start) {
1157                         size = min(csum_end - start, len);
1158                         len -= size;
1159                         start += size;
1160                         *found += size;
1161                 }
1162
1163                 path.slots[0]++;
1164         }
1165 out:
1166         if (ret < 0)
1167                 return ret;
1168         btrfs_release_path(&path);
1169         return 0;
1170 }
1171
1172 static int process_file_extent(struct btrfs_root *root,
1173                                 struct extent_buffer *eb,
1174                                 int slot, struct btrfs_key *key,
1175                                 struct shared_node *active_node)
1176 {
1177         struct inode_record *rec;
1178         struct btrfs_file_extent_item *fi;
1179         u64 num_bytes = 0;
1180         u64 disk_bytenr = 0;
1181         u64 extent_offset = 0;
1182         u64 mask = root->sectorsize - 1;
1183         int extent_type;
1184         int ret;
1185
1186         rec = active_node->current;
1187         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1188         rec->found_file_extent = 1;
1189
1190         if (rec->extent_start == (u64)-1) {
1191                 rec->extent_start = key->offset;
1192                 rec->extent_end = key->offset;
1193         }
1194
1195         if (rec->extent_end > key->offset)
1196                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1197         else if (rec->extent_end < key->offset &&
1198                  rec->extent_end < rec->first_extent_gap)
1199                 rec->first_extent_gap = rec->extent_end;
1200
1201         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1202         extent_type = btrfs_file_extent_type(eb, fi);
1203
1204         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1205                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1206                 if (num_bytes == 0)
1207                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1208                 rec->found_size += num_bytes;
1209                 num_bytes = (num_bytes + mask) & ~mask;
1210         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1211                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1212                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1213                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1214                 extent_offset = btrfs_file_extent_offset(eb, fi);
1215                 if (num_bytes == 0 || (num_bytes & mask))
1216                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1217                 if (num_bytes + extent_offset >
1218                     btrfs_file_extent_ram_bytes(eb, fi))
1219                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1220                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1221                     (btrfs_file_extent_compression(eb, fi) ||
1222                      btrfs_file_extent_encryption(eb, fi) ||
1223                      btrfs_file_extent_other_encoding(eb, fi)))
1224                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1225                 if (disk_bytenr > 0)
1226                         rec->found_size += num_bytes;
1227         } else {
1228                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1229         }
1230         rec->extent_end = key->offset + num_bytes;
1231
1232         if (disk_bytenr > 0) {
1233                 u64 found;
1234                 if (btrfs_file_extent_compression(eb, fi))
1235                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1236                 else
1237                         disk_bytenr += extent_offset;
1238
1239                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1240                 if (ret < 0)
1241                         return ret;
1242                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1243                         if (found > 0)
1244                                 rec->found_csum_item = 1;
1245                         if (found < num_bytes)
1246                                 rec->some_csum_missing = 1;
1247                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1248                         if (found > 0)
1249                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1250                 }
1251         }
1252         return 0;
1253 }
1254
1255 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1256                             struct walk_control *wc)
1257 {
1258         struct btrfs_key key;
1259         u32 nritems;
1260         int i;
1261         int ret = 0;
1262         struct cache_tree *inode_cache;
1263         struct shared_node *active_node;
1264
1265         if (wc->root_level == wc->active_node &&
1266             btrfs_root_refs(&root->root_item) == 0)
1267                 return 0;
1268
1269         active_node = wc->nodes[wc->active_node];
1270         inode_cache = &active_node->inode_cache;
1271         nritems = btrfs_header_nritems(eb);
1272         for (i = 0; i < nritems; i++) {
1273                 btrfs_item_key_to_cpu(eb, &key, i);
1274
1275                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1276                         continue;
1277                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1278                         continue;
1279
1280                 if (active_node->current == NULL ||
1281                     active_node->current->ino < key.objectid) {
1282                         if (active_node->current) {
1283                                 active_node->current->checked = 1;
1284                                 maybe_free_inode_rec(inode_cache,
1285                                                      active_node->current);
1286                         }
1287                         active_node->current = get_inode_rec(inode_cache,
1288                                                              key.objectid, 1);
1289                 }
1290                 switch (key.type) {
1291                 case BTRFS_DIR_ITEM_KEY:
1292                 case BTRFS_DIR_INDEX_KEY:
1293                         ret = process_dir_item(root, eb, i, &key, active_node);
1294                         break;
1295                 case BTRFS_INODE_REF_KEY:
1296                         ret = process_inode_ref(eb, i, &key, active_node);
1297                         break;
1298                 case BTRFS_INODE_EXTREF_KEY:
1299                         ret = process_inode_extref(eb, i, &key, active_node);
1300                         break;
1301                 case BTRFS_INODE_ITEM_KEY:
1302                         ret = process_inode_item(eb, i, &key, active_node);
1303                         break;
1304                 case BTRFS_EXTENT_DATA_KEY:
1305                         ret = process_file_extent(root, eb, i, &key,
1306                                                   active_node);
1307                         break;
1308                 default:
1309                         break;
1310                 };
1311         }
1312         return ret;
1313 }
1314
1315 static void reada_walk_down(struct btrfs_root *root,
1316                             struct extent_buffer *node, int slot)
1317 {
1318         u64 bytenr;
1319         u64 ptr_gen;
1320         u32 nritems;
1321         u32 blocksize;
1322         int i;
1323         int level;
1324
1325         level = btrfs_header_level(node);
1326         if (level != 1)
1327                 return;
1328
1329         nritems = btrfs_header_nritems(node);
1330         blocksize = btrfs_level_size(root, level - 1);
1331         for (i = slot; i < nritems; i++) {
1332                 bytenr = btrfs_node_blockptr(node, i);
1333                 ptr_gen = btrfs_node_ptr_generation(node, i);
1334                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1335         }
1336 }
1337
1338 /*
1339  * Check the child node/leaf by the following condition:
1340  * 1. the first item key of the node/leaf should be the same with the one
1341  *    in parent.
1342  * 2. block in parent node should match the child node/leaf.
1343  * 3. generation of parent node and child's header should be consistent.
1344  *
1345  * Or the child node/leaf pointed by the key in parent is not valid.
1346  *
1347  * We hope to check leaf owner too, but since subvol may share leaves,
1348  * which makes leaf owner check not so strong, key check should be
1349  * sufficient enough for that case.
1350  */
1351 static int check_child_node(struct btrfs_root *root,
1352                             struct extent_buffer *parent, int slot,
1353                             struct extent_buffer *child)
1354 {
1355         struct btrfs_key parent_key;
1356         struct btrfs_key child_key;
1357         int ret = 0;
1358
1359         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1360         if (btrfs_header_level(child) == 0)
1361                 btrfs_item_key_to_cpu(child, &child_key, 0);
1362         else
1363                 btrfs_node_key_to_cpu(child, &child_key, 0);
1364
1365         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1366                 ret = -EINVAL;
1367                 fprintf(stderr,
1368                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1369                         parent_key.objectid, parent_key.type, parent_key.offset,
1370                         child_key.objectid, child_key.type, child_key.offset);
1371         }
1372         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1373                 ret = -EINVAL;
1374                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1375                         btrfs_node_blockptr(parent, slot),
1376                         btrfs_header_bytenr(child));
1377         }
1378         if (btrfs_node_ptr_generation(parent, slot) !=
1379             btrfs_header_generation(child)) {
1380                 ret = -EINVAL;
1381                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1382                         btrfs_header_generation(child),
1383                         btrfs_node_ptr_generation(parent, slot));
1384         }
1385         return ret;
1386 }
1387
1388 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1389                           struct walk_control *wc, int *level)
1390 {
1391         u64 bytenr;
1392         u64 ptr_gen;
1393         struct extent_buffer *next;
1394         struct extent_buffer *cur;
1395         u32 blocksize;
1396         int ret, err = 0;
1397         u64 refs;
1398
1399         WARN_ON(*level < 0);
1400         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1401         ret = btrfs_lookup_extent_info(NULL, root,
1402                                        path->nodes[*level]->start,
1403                                        *level, 1, &refs, NULL);
1404         if (ret < 0) {
1405                 err = ret;
1406                 goto out;
1407         }
1408
1409         if (refs > 1) {
1410                 ret = enter_shared_node(root, path->nodes[*level]->start,
1411                                         refs, wc, *level);
1412                 if (ret > 0) {
1413                         err = ret;
1414                         goto out;
1415                 }
1416         }
1417
1418         while (*level >= 0) {
1419                 WARN_ON(*level < 0);
1420                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1421                 cur = path->nodes[*level];
1422
1423                 if (btrfs_header_level(cur) != *level)
1424                         WARN_ON(1);
1425
1426                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1427                         break;
1428                 if (*level == 0) {
1429                         ret = process_one_leaf(root, cur, wc);
1430                         if (ret < 0)
1431                                 err = ret;
1432                         break;
1433                 }
1434                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1435                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1436                 blocksize = btrfs_level_size(root, *level - 1);
1437                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1438                                                1, &refs, NULL);
1439                 if (ret < 0)
1440                         refs = 0;
1441
1442                 if (refs > 1) {
1443                         ret = enter_shared_node(root, bytenr, refs,
1444                                                 wc, *level - 1);
1445                         if (ret > 0) {
1446                                 path->slots[*level]++;
1447                                 continue;
1448                         }
1449                 }
1450
1451                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1452                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1453                         free_extent_buffer(next);
1454                         reada_walk_down(root, cur, path->slots[*level]);
1455                         next = read_tree_block(root, bytenr, blocksize,
1456                                                ptr_gen);
1457                         if (!next) {
1458                                 err = -EIO;
1459                                 goto out;
1460                         }
1461                 }
1462
1463                 ret = check_child_node(root, cur, path->slots[*level], next);
1464                 if (ret) {
1465                         err = ret;
1466                         goto out;
1467                 }
1468                 *level = *level - 1;
1469                 free_extent_buffer(path->nodes[*level]);
1470                 path->nodes[*level] = next;
1471                 path->slots[*level] = 0;
1472         }
1473 out:
1474         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1475         return err;
1476 }
1477
1478 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1479                         struct walk_control *wc, int *level)
1480 {
1481         int i;
1482         struct extent_buffer *leaf;
1483
1484         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1485                 leaf = path->nodes[i];
1486                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1487                         path->slots[i]++;
1488                         *level = i;
1489                         return 0;
1490                 } else {
1491                         free_extent_buffer(path->nodes[*level]);
1492                         path->nodes[*level] = NULL;
1493                         BUG_ON(*level > wc->active_node);
1494                         if (*level == wc->active_node)
1495                                 leave_shared_node(root, wc, *level);
1496                         *level = i + 1;
1497                 }
1498         }
1499         return 1;
1500 }
1501
1502 static int check_root_dir(struct inode_record *rec)
1503 {
1504         struct inode_backref *backref;
1505         int ret = -1;
1506
1507         if (!rec->found_inode_item || rec->errors)
1508                 goto out;
1509         if (rec->nlink != 1 || rec->found_link != 0)
1510                 goto out;
1511         if (list_empty(&rec->backrefs))
1512                 goto out;
1513         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1514         if (!backref->found_inode_ref)
1515                 goto out;
1516         if (backref->index != 0 || backref->namelen != 2 ||
1517             memcmp(backref->name, "..", 2))
1518                 goto out;
1519         if (backref->found_dir_index || backref->found_dir_item)
1520                 goto out;
1521         ret = 0;
1522 out:
1523         return ret;
1524 }
1525
1526 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1527                               struct btrfs_root *root, struct btrfs_path *path,
1528                               struct inode_record *rec)
1529 {
1530         struct btrfs_inode_item *ei;
1531         struct btrfs_key key;
1532         int ret;
1533
1534         key.objectid = rec->ino;
1535         key.type = BTRFS_INODE_ITEM_KEY;
1536         key.offset = (u64)-1;
1537
1538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1539         if (ret < 0)
1540                 goto out;
1541         if (ret) {
1542                 if (!path->slots[0]) {
1543                         ret = -ENOENT;
1544                         goto out;
1545                 }
1546                 path->slots[0]--;
1547                 ret = 0;
1548         }
1549         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1550         if (key.objectid != rec->ino) {
1551                 ret = -ENOENT;
1552                 goto out;
1553         }
1554
1555         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1556                             struct btrfs_inode_item);
1557         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1558         btrfs_mark_buffer_dirty(path->nodes[0]);
1559         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1560         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1561                root->root_key.objectid);
1562 out:
1563         btrfs_release_path(path);
1564         return ret;
1565 }
1566
1567 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1568                                     struct btrfs_root *root,
1569                                     struct btrfs_path *path,
1570                                     struct inode_record *rec)
1571 {
1572         struct btrfs_key key;
1573         int ret;
1574
1575         key.objectid = BTRFS_ORPHAN_OBJECTID;
1576         key.type = BTRFS_ORPHAN_ITEM_KEY;
1577         key.offset = rec->ino;
1578
1579         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1580         btrfs_release_path(path);
1581         if (!ret)
1582                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1583         return ret;
1584 }
1585
1586 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
1587 {
1588         struct btrfs_trans_handle *trans;
1589         struct btrfs_path *path;
1590         int ret = 0;
1591
1592         /* So far we just fix dir isize wrong */
1593         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG | I_ERR_NO_ORPHAN_ITEM)))
1594                 return 1;
1595
1596         path = btrfs_alloc_path();
1597         if (!path)
1598                 return -ENOMEM;
1599
1600         trans = btrfs_start_transaction(root, 1);
1601         if (IS_ERR(trans)) {
1602                 btrfs_free_path(path);
1603                 return PTR_ERR(trans);
1604         }
1605
1606         if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
1607                 ret = repair_inode_isize(trans, root, path, rec);
1608         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
1609                 ret = repair_inode_orphan_item(trans, root, path, rec);
1610         btrfs_commit_transaction(trans, root);
1611         btrfs_free_path(path);
1612         return ret;
1613 }
1614
1615 static int check_inode_recs(struct btrfs_root *root,
1616                             struct cache_tree *inode_cache)
1617 {
1618         struct cache_extent *cache;
1619         struct ptr_node *node;
1620         struct inode_record *rec;
1621         struct inode_backref *backref;
1622         int ret;
1623         u64 error = 0;
1624         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1625
1626         if (btrfs_root_refs(&root->root_item) == 0) {
1627                 if (!cache_tree_empty(inode_cache))
1628                         fprintf(stderr, "warning line %d\n", __LINE__);
1629                 return 0;
1630         }
1631
1632         rec = get_inode_rec(inode_cache, root_dirid, 0);
1633         if (rec) {
1634                 ret = check_root_dir(rec);
1635                 if (ret) {
1636                         fprintf(stderr, "root %llu root dir %llu error\n",
1637                                 (unsigned long long)root->root_key.objectid,
1638                                 (unsigned long long)root_dirid);
1639                         error++;
1640                 }
1641         } else {
1642                 fprintf(stderr, "root %llu root dir %llu not found\n",
1643                         (unsigned long long)root->root_key.objectid,
1644                         (unsigned long long)root_dirid);
1645         }
1646
1647         while (1) {
1648                 cache = search_cache_extent(inode_cache, 0);
1649                 if (!cache)
1650                         break;
1651                 node = container_of(cache, struct ptr_node, cache);
1652                 rec = node->data;
1653                 remove_cache_extent(inode_cache, &node->cache);
1654                 free(node);
1655                 if (rec->ino == root_dirid ||
1656                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
1657                         free_inode_rec(rec);
1658                         continue;
1659                 }
1660
1661                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
1662                         ret = check_orphan_item(root, rec->ino);
1663                         if (ret == 0)
1664                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1665                         if (can_free_inode_rec(rec)) {
1666                                 free_inode_rec(rec);
1667                                 continue;
1668                         }
1669                 }
1670
1671                 if (repair) {
1672                         ret = try_repair_inode(root, rec);
1673                         if (ret == 0 && can_free_inode_rec(rec)) {
1674                                 free_inode_rec(rec);
1675                                 continue;
1676                         }
1677                         ret = 0;
1678                 }
1679
1680                 error++;
1681                 if (!rec->found_inode_item)
1682                         rec->errors |= I_ERR_NO_INODE_ITEM;
1683                 if (rec->found_link != rec->nlink)
1684                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
1685                 print_inode_error(root, rec);
1686                 list_for_each_entry(backref, &rec->backrefs, list) {
1687                         if (!backref->found_dir_item)
1688                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1689                         if (!backref->found_dir_index)
1690                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1691                         if (!backref->found_inode_ref)
1692                                 backref->errors |= REF_ERR_NO_INODE_REF;
1693                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
1694                                 " namelen %u name %s filetype %d errors %x",
1695                                 (unsigned long long)backref->dir,
1696                                 (unsigned long long)backref->index,
1697                                 backref->namelen, backref->name,
1698                                 backref->filetype, backref->errors);
1699                         print_ref_error(backref->errors);
1700                 }
1701                 free_inode_rec(rec);
1702         }
1703         return (error > 0) ? -1 : 0;
1704 }
1705
1706 static struct root_record *get_root_rec(struct cache_tree *root_cache,
1707                                         u64 objectid)
1708 {
1709         struct cache_extent *cache;
1710         struct root_record *rec = NULL;
1711         int ret;
1712
1713         cache = lookup_cache_extent(root_cache, objectid, 1);
1714         if (cache) {
1715                 rec = container_of(cache, struct root_record, cache);
1716         } else {
1717                 rec = calloc(1, sizeof(*rec));
1718                 rec->objectid = objectid;
1719                 INIT_LIST_HEAD(&rec->backrefs);
1720                 rec->cache.start = objectid;
1721                 rec->cache.size = 1;
1722
1723                 ret = insert_cache_extent(root_cache, &rec->cache);
1724                 BUG_ON(ret);
1725         }
1726         return rec;
1727 }
1728
1729 static struct root_backref *get_root_backref(struct root_record *rec,
1730                                              u64 ref_root, u64 dir, u64 index,
1731                                              const char *name, int namelen)
1732 {
1733         struct root_backref *backref;
1734
1735         list_for_each_entry(backref, &rec->backrefs, list) {
1736                 if (backref->ref_root != ref_root || backref->dir != dir ||
1737                     backref->namelen != namelen)
1738                         continue;
1739                 if (memcmp(name, backref->name, namelen))
1740                         continue;
1741                 return backref;
1742         }
1743
1744         backref = malloc(sizeof(*backref) + namelen + 1);
1745         memset(backref, 0, sizeof(*backref));
1746         backref->ref_root = ref_root;
1747         backref->dir = dir;
1748         backref->index = index;
1749         backref->namelen = namelen;
1750         memcpy(backref->name, name, namelen);
1751         backref->name[namelen] = '\0';
1752         list_add_tail(&backref->list, &rec->backrefs);
1753         return backref;
1754 }
1755
1756 static void free_root_record(struct cache_extent *cache)
1757 {
1758         struct root_record *rec;
1759         struct root_backref *backref;
1760
1761         rec = container_of(cache, struct root_record, cache);
1762         while (!list_empty(&rec->backrefs)) {
1763                 backref = list_entry(rec->backrefs.next,
1764                                      struct root_backref, list);
1765                 list_del(&backref->list);
1766                 free(backref);
1767         }
1768
1769         kfree(rec);
1770 }
1771
1772 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
1773
1774 static int add_root_backref(struct cache_tree *root_cache,
1775                             u64 root_id, u64 ref_root, u64 dir, u64 index,
1776                             const char *name, int namelen,
1777                             int item_type, int errors)
1778 {
1779         struct root_record *rec;
1780         struct root_backref *backref;
1781
1782         rec = get_root_rec(root_cache, root_id);
1783         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
1784
1785         backref->errors |= errors;
1786
1787         if (item_type != BTRFS_DIR_ITEM_KEY) {
1788                 if (backref->found_dir_index || backref->found_back_ref ||
1789                     backref->found_forward_ref) {
1790                         if (backref->index != index)
1791                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
1792                 } else {
1793                         backref->index = index;
1794                 }
1795         }
1796
1797         if (item_type == BTRFS_DIR_ITEM_KEY) {
1798                 if (backref->found_forward_ref)
1799                         rec->found_ref++;
1800                 backref->found_dir_item = 1;
1801         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
1802                 backref->found_dir_index = 1;
1803         } else if (item_type == BTRFS_ROOT_REF_KEY) {
1804                 if (backref->found_forward_ref)
1805                         backref->errors |= REF_ERR_DUP_ROOT_REF;
1806                 else if (backref->found_dir_item)
1807                         rec->found_ref++;
1808                 backref->found_forward_ref = 1;
1809         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
1810                 if (backref->found_back_ref)
1811                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
1812                 backref->found_back_ref = 1;
1813         } else {
1814                 BUG_ON(1);
1815         }
1816
1817         if (backref->found_forward_ref && backref->found_dir_item)
1818                 backref->reachable = 1;
1819         return 0;
1820 }
1821
1822 static int merge_root_recs(struct btrfs_root *root,
1823                            struct cache_tree *src_cache,
1824                            struct cache_tree *dst_cache)
1825 {
1826         struct cache_extent *cache;
1827         struct ptr_node *node;
1828         struct inode_record *rec;
1829         struct inode_backref *backref;
1830         int ret = 0;
1831
1832         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1833                 free_inode_recs_tree(src_cache);
1834                 return 0;
1835         }
1836
1837         while (1) {
1838                 cache = search_cache_extent(src_cache, 0);
1839                 if (!cache)
1840                         break;
1841                 node = container_of(cache, struct ptr_node, cache);
1842                 rec = node->data;
1843                 remove_cache_extent(src_cache, &node->cache);
1844                 free(node);
1845
1846                 ret = is_child_root(root, root->objectid, rec->ino);
1847                 if (ret < 0)
1848                         break;
1849                 else if (ret == 0)
1850                         goto skip;
1851
1852                 list_for_each_entry(backref, &rec->backrefs, list) {
1853                         BUG_ON(backref->found_inode_ref);
1854                         if (backref->found_dir_item)
1855                                 add_root_backref(dst_cache, rec->ino,
1856                                         root->root_key.objectid, backref->dir,
1857                                         backref->index, backref->name,
1858                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
1859                                         backref->errors);
1860                         if (backref->found_dir_index)
1861                                 add_root_backref(dst_cache, rec->ino,
1862                                         root->root_key.objectid, backref->dir,
1863                                         backref->index, backref->name,
1864                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
1865                                         backref->errors);
1866                 }
1867 skip:
1868                 free_inode_rec(rec);
1869         }
1870         if (ret < 0)
1871                 return ret;
1872         return 0;
1873 }
1874
1875 static int check_root_refs(struct btrfs_root *root,
1876                            struct cache_tree *root_cache)
1877 {
1878         struct root_record *rec;
1879         struct root_record *ref_root;
1880         struct root_backref *backref;
1881         struct cache_extent *cache;
1882         int loop = 1;
1883         int ret;
1884         int error;
1885         int errors = 0;
1886
1887         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
1888         rec->found_ref = 1;
1889
1890         /* fixme: this can not detect circular references */
1891         while (loop) {
1892                 loop = 0;
1893                 cache = search_cache_extent(root_cache, 0);
1894                 while (1) {
1895                         if (!cache)
1896                                 break;
1897                         rec = container_of(cache, struct root_record, cache);
1898                         cache = next_cache_extent(cache);
1899
1900                         if (rec->found_ref == 0)
1901                                 continue;
1902
1903                         list_for_each_entry(backref, &rec->backrefs, list) {
1904                                 if (!backref->reachable)
1905                                         continue;
1906
1907                                 ref_root = get_root_rec(root_cache,
1908                                                         backref->ref_root);
1909                                 if (ref_root->found_ref > 0)
1910                                         continue;
1911
1912                                 backref->reachable = 0;
1913                                 rec->found_ref--;
1914                                 if (rec->found_ref == 0)
1915                                         loop = 1;
1916                         }
1917                 }
1918         }
1919
1920         cache = search_cache_extent(root_cache, 0);
1921         while (1) {
1922                 if (!cache)
1923                         break;
1924                 rec = container_of(cache, struct root_record, cache);
1925                 cache = next_cache_extent(cache);
1926
1927                 if (rec->found_ref == 0 &&
1928                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1929                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
1930                         ret = check_orphan_item(root->fs_info->tree_root,
1931                                                 rec->objectid);
1932                         if (ret == 0)
1933                                 continue;
1934
1935                         /*
1936                          * If we don't have a root item then we likely just have
1937                          * a dir item in a snapshot for this root but no actual
1938                          * ref key or anything so it's meaningless.
1939                          */
1940                         if (!rec->found_root_item)
1941                                 continue;
1942                         errors++;
1943                         fprintf(stderr, "fs tree %llu not referenced\n",
1944                                 (unsigned long long)rec->objectid);
1945                 }
1946
1947                 error = 0;
1948                 if (rec->found_ref > 0 && !rec->found_root_item)
1949                         error = 1;
1950                 list_for_each_entry(backref, &rec->backrefs, list) {
1951                         if (!backref->found_dir_item)
1952                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
1953                         if (!backref->found_dir_index)
1954                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
1955                         if (!backref->found_back_ref)
1956                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
1957                         if (!backref->found_forward_ref)
1958                                 backref->errors |= REF_ERR_NO_ROOT_REF;
1959                         if (backref->reachable && backref->errors)
1960                                 error = 1;
1961                 }
1962                 if (!error)
1963                         continue;
1964
1965                 errors++;
1966                 fprintf(stderr, "fs tree %llu refs %u %s\n",
1967                         (unsigned long long)rec->objectid, rec->found_ref,
1968                          rec->found_root_item ? "" : "not found");
1969
1970                 list_for_each_entry(backref, &rec->backrefs, list) {
1971                         if (!backref->reachable)
1972                                 continue;
1973                         if (!backref->errors && rec->found_root_item)
1974                                 continue;
1975                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
1976                                 " index %llu namelen %u name %s errors %x\n",
1977                                 (unsigned long long)backref->ref_root,
1978                                 (unsigned long long)backref->dir,
1979                                 (unsigned long long)backref->index,
1980                                 backref->namelen, backref->name,
1981                                 backref->errors);
1982                         print_ref_error(backref->errors);
1983                 }
1984         }
1985         return errors > 0 ? 1 : 0;
1986 }
1987
1988 static int process_root_ref(struct extent_buffer *eb, int slot,
1989                             struct btrfs_key *key,
1990                             struct cache_tree *root_cache)
1991 {
1992         u64 dirid;
1993         u64 index;
1994         u32 len;
1995         u32 name_len;
1996         struct btrfs_root_ref *ref;
1997         char namebuf[BTRFS_NAME_LEN];
1998         int error;
1999
2000         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
2001
2002         dirid = btrfs_root_ref_dirid(eb, ref);
2003         index = btrfs_root_ref_sequence(eb, ref);
2004         name_len = btrfs_root_ref_name_len(eb, ref);
2005
2006         if (name_len <= BTRFS_NAME_LEN) {
2007                 len = name_len;
2008                 error = 0;
2009         } else {
2010                 len = BTRFS_NAME_LEN;
2011                 error = REF_ERR_NAME_TOO_LONG;
2012         }
2013         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
2014
2015         if (key->type == BTRFS_ROOT_REF_KEY) {
2016                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
2017                                  index, namebuf, len, key->type, error);
2018         } else {
2019                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
2020                                  index, namebuf, len, key->type, error);
2021         }
2022         return 0;
2023 }
2024
2025 static int check_fs_root(struct btrfs_root *root,
2026                          struct cache_tree *root_cache,
2027                          struct walk_control *wc)
2028 {
2029         int ret = 0;
2030         int err = 0;
2031         int wret;
2032         int level;
2033         struct btrfs_path path;
2034         struct shared_node root_node;
2035         struct root_record *rec;
2036         struct btrfs_root_item *root_item = &root->root_item;
2037
2038         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2039                 rec = get_root_rec(root_cache, root->root_key.objectid);
2040                 if (btrfs_root_refs(root_item) > 0)
2041                         rec->found_root_item = 1;
2042         }
2043
2044         btrfs_init_path(&path);
2045         memset(&root_node, 0, sizeof(root_node));
2046         cache_tree_init(&root_node.root_cache);
2047         cache_tree_init(&root_node.inode_cache);
2048
2049         level = btrfs_header_level(root->node);
2050         memset(wc->nodes, 0, sizeof(wc->nodes));
2051         wc->nodes[level] = &root_node;
2052         wc->active_node = level;
2053         wc->root_level = level;
2054
2055         if (btrfs_root_refs(root_item) > 0 ||
2056             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2057                 path.nodes[level] = root->node;
2058                 extent_buffer_get(root->node);
2059                 path.slots[level] = 0;
2060         } else {
2061                 struct btrfs_key key;
2062                 struct btrfs_disk_key found_key;
2063
2064                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2065                 level = root_item->drop_level;
2066                 path.lowest_level = level;
2067                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2068                 if (wret < 0)
2069                         goto skip_walking;
2070                 btrfs_node_key(path.nodes[level], &found_key,
2071                                 path.slots[level]);
2072                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2073                                         sizeof(found_key)));
2074         }
2075
2076         while (1) {
2077                 wret = walk_down_tree(root, &path, wc, &level);
2078                 if (wret < 0)
2079                         ret = wret;
2080                 if (wret != 0)
2081                         break;
2082
2083                 wret = walk_up_tree(root, &path, wc, &level);
2084                 if (wret < 0)
2085                         ret = wret;
2086                 if (wret != 0)
2087                         break;
2088         }
2089 skip_walking:
2090         btrfs_release_path(&path);
2091
2092         err = merge_root_recs(root, &root_node.root_cache, root_cache);
2093         if (err < 0)
2094                 ret = err;
2095
2096         if (root_node.current) {
2097                 root_node.current->checked = 1;
2098                 maybe_free_inode_rec(&root_node.inode_cache,
2099                                 root_node.current);
2100         }
2101
2102         err = check_inode_recs(root, &root_node.inode_cache);
2103         if (!ret)
2104                 ret = err;
2105         return ret;
2106 }
2107
2108 static int fs_root_objectid(u64 objectid)
2109 {
2110         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
2111             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2112                 return 1;
2113         return is_fstree(objectid);
2114 }
2115
2116 static int check_fs_roots(struct btrfs_root *root,
2117                           struct cache_tree *root_cache)
2118 {
2119         struct btrfs_path path;
2120         struct btrfs_key key;
2121         struct walk_control wc;
2122         struct extent_buffer *leaf;
2123         struct btrfs_root *tmp_root;
2124         struct btrfs_root *tree_root = root->fs_info->tree_root;
2125         int ret;
2126         int err = 0;
2127
2128         /*
2129          * Just in case we made any changes to the extent tree that weren't
2130          * reflected into the free space cache yet.
2131          */
2132         if (repair)
2133                 reset_cached_block_groups(root->fs_info);
2134         memset(&wc, 0, sizeof(wc));
2135         cache_tree_init(&wc.shared);
2136         btrfs_init_path(&path);
2137
2138         key.offset = 0;
2139         key.objectid = 0;
2140         key.type = BTRFS_ROOT_ITEM_KEY;
2141         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
2142         if (ret < 0) {
2143                 err = 1;
2144                 goto out;
2145         }
2146         while (1) {
2147                 leaf = path.nodes[0];
2148                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
2149                         ret = btrfs_next_leaf(tree_root, &path);
2150                         if (ret) {
2151                                 if (ret < 0)
2152                                         err = 1;
2153                                 break;
2154                         }
2155                         leaf = path.nodes[0];
2156                 }
2157                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
2158                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
2159                     fs_root_objectid(key.objectid)) {
2160                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2161                                 tmp_root = btrfs_read_fs_root_no_cache(
2162                                                 root->fs_info, &key);
2163                         } else {
2164                                 key.offset = (u64)-1;
2165                                 tmp_root = btrfs_read_fs_root(
2166                                                 root->fs_info, &key);
2167                         }
2168                         if (IS_ERR(tmp_root)) {
2169                                 err = 1;
2170                                 goto next;
2171                         }
2172                         ret = check_fs_root(tmp_root, root_cache, &wc);
2173                         if (ret)
2174                                 err = 1;
2175                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
2176                                 btrfs_free_fs_root(tmp_root);
2177                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
2178                            key.type == BTRFS_ROOT_BACKREF_KEY) {
2179                         process_root_ref(leaf, path.slots[0], &key,
2180                                          root_cache);
2181                 }
2182 next:
2183                 path.slots[0]++;
2184         }
2185 out:
2186         btrfs_release_path(&path);
2187         if (err)
2188                 free_extent_cache_tree(&wc.shared);
2189         if (!cache_tree_empty(&wc.shared))
2190                 fprintf(stderr, "warning line %d\n", __LINE__);
2191
2192         return err;
2193 }
2194
2195 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
2196 {
2197         struct list_head *cur = rec->backrefs.next;
2198         struct extent_backref *back;
2199         struct tree_backref *tback;
2200         struct data_backref *dback;
2201         u64 found = 0;
2202         int err = 0;
2203
2204         while(cur != &rec->backrefs) {
2205                 back = list_entry(cur, struct extent_backref, list);
2206                 cur = cur->next;
2207                 if (!back->found_extent_tree) {
2208                         err = 1;
2209                         if (!print_errs)
2210                                 goto out;
2211                         if (back->is_data) {
2212                                 dback = (struct data_backref *)back;
2213                                 fprintf(stderr, "Backref %llu %s %llu"
2214                                         " owner %llu offset %llu num_refs %lu"
2215                                         " not found in extent tree\n",
2216                                         (unsigned long long)rec->start,
2217                                         back->full_backref ?
2218                                         "parent" : "root",
2219                                         back->full_backref ?
2220                                         (unsigned long long)dback->parent:
2221                                         (unsigned long long)dback->root,
2222                                         (unsigned long long)dback->owner,
2223                                         (unsigned long long)dback->offset,
2224                                         (unsigned long)dback->num_refs);
2225                         } else {
2226                                 tback = (struct tree_backref *)back;
2227                                 fprintf(stderr, "Backref %llu parent %llu"
2228                                         " root %llu not found in extent tree\n",
2229                                         (unsigned long long)rec->start,
2230                                         (unsigned long long)tback->parent,
2231                                         (unsigned long long)tback->root);
2232                         }
2233                 }
2234                 if (!back->is_data && !back->found_ref) {
2235                         err = 1;
2236                         if (!print_errs)
2237                                 goto out;
2238                         tback = (struct tree_backref *)back;
2239                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
2240                                 (unsigned long long)rec->start,
2241                                 back->full_backref ? "parent" : "root",
2242                                 back->full_backref ?
2243                                 (unsigned long long)tback->parent :
2244                                 (unsigned long long)tback->root, back);
2245                 }
2246                 if (back->is_data) {
2247                         dback = (struct data_backref *)back;
2248                         if (dback->found_ref != dback->num_refs) {
2249                                 err = 1;
2250                                 if (!print_errs)
2251                                         goto out;
2252                                 fprintf(stderr, "Incorrect local backref count"
2253                                         " on %llu %s %llu owner %llu"
2254                                         " offset %llu found %u wanted %u back %p\n",
2255                                         (unsigned long long)rec->start,
2256                                         back->full_backref ?
2257                                         "parent" : "root",
2258                                         back->full_backref ?
2259                                         (unsigned long long)dback->parent:
2260                                         (unsigned long long)dback->root,
2261                                         (unsigned long long)dback->owner,
2262                                         (unsigned long long)dback->offset,
2263                                         dback->found_ref, dback->num_refs, back);
2264                         }
2265                         if (dback->disk_bytenr != rec->start) {
2266                                 err = 1;
2267                                 if (!print_errs)
2268                                         goto out;
2269                                 fprintf(stderr, "Backref disk bytenr does not"
2270                                         " match extent record, bytenr=%llu, "
2271                                         "ref bytenr=%llu\n",
2272                                         (unsigned long long)rec->start,
2273                                         (unsigned long long)dback->disk_bytenr);
2274                         }
2275
2276                         if (dback->bytes != rec->nr) {
2277                                 err = 1;
2278                                 if (!print_errs)
2279                                         goto out;
2280                                 fprintf(stderr, "Backref bytes do not match "
2281                                         "extent backref, bytenr=%llu, ref "
2282                                         "bytes=%llu, backref bytes=%llu\n",
2283                                         (unsigned long long)rec->start,
2284                                         (unsigned long long)rec->nr,
2285                                         (unsigned long long)dback->bytes);
2286                         }
2287                 }
2288                 if (!back->is_data) {
2289                         found += 1;
2290                 } else {
2291                         dback = (struct data_backref *)back;
2292                         found += dback->found_ref;
2293                 }
2294         }
2295         if (found != rec->refs) {
2296                 err = 1;
2297                 if (!print_errs)
2298                         goto out;
2299                 fprintf(stderr, "Incorrect global backref count "
2300                         "on %llu found %llu wanted %llu\n",
2301                         (unsigned long long)rec->start,
2302                         (unsigned long long)found,
2303                         (unsigned long long)rec->refs);
2304         }
2305 out:
2306         return err;
2307 }
2308
2309 static int free_all_extent_backrefs(struct extent_record *rec)
2310 {
2311         struct extent_backref *back;
2312         struct list_head *cur;
2313         while (!list_empty(&rec->backrefs)) {
2314                 cur = rec->backrefs.next;
2315                 back = list_entry(cur, struct extent_backref, list);
2316                 list_del(cur);
2317                 free(back);
2318         }
2319         return 0;
2320 }
2321
2322 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
2323                                      struct cache_tree *extent_cache)
2324 {
2325         struct cache_extent *cache;
2326         struct extent_record *rec;
2327
2328         while (1) {
2329                 cache = first_cache_extent(extent_cache);
2330                 if (!cache)
2331                         break;
2332                 rec = container_of(cache, struct extent_record, cache);
2333                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
2334                 remove_cache_extent(extent_cache, cache);
2335                 free_all_extent_backrefs(rec);
2336                 free(rec);
2337         }
2338 }
2339
2340 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
2341                                  struct extent_record *rec)
2342 {
2343         if (rec->content_checked && rec->owner_ref_checked &&
2344             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
2345             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
2346                 remove_cache_extent(extent_cache, &rec->cache);
2347                 free_all_extent_backrefs(rec);
2348                 list_del_init(&rec->list);
2349                 free(rec);
2350         }
2351         return 0;
2352 }
2353
2354 static int check_owner_ref(struct btrfs_root *root,
2355                             struct extent_record *rec,
2356                             struct extent_buffer *buf)
2357 {
2358         struct extent_backref *node;
2359         struct tree_backref *back;
2360         struct btrfs_root *ref_root;
2361         struct btrfs_key key;
2362         struct btrfs_path path;
2363         struct extent_buffer *parent;
2364         int level;
2365         int found = 0;
2366         int ret;
2367
2368         list_for_each_entry(node, &rec->backrefs, list) {
2369                 if (node->is_data)
2370                         continue;
2371                 if (!node->found_ref)
2372                         continue;
2373                 if (node->full_backref)
2374                         continue;
2375                 back = (struct tree_backref *)node;
2376                 if (btrfs_header_owner(buf) == back->root)
2377                         return 0;
2378         }
2379         BUG_ON(rec->is_root);
2380
2381         /* try to find the block by search corresponding fs tree */
2382         key.objectid = btrfs_header_owner(buf);
2383         key.type = BTRFS_ROOT_ITEM_KEY;
2384         key.offset = (u64)-1;
2385
2386         ref_root = btrfs_read_fs_root(root->fs_info, &key);
2387         if (IS_ERR(ref_root))
2388                 return 1;
2389
2390         level = btrfs_header_level(buf);
2391         if (level == 0)
2392                 btrfs_item_key_to_cpu(buf, &key, 0);
2393         else
2394                 btrfs_node_key_to_cpu(buf, &key, 0);
2395
2396         btrfs_init_path(&path);
2397         path.lowest_level = level + 1;
2398         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
2399         if (ret < 0)
2400                 return 0;
2401
2402         parent = path.nodes[level + 1];
2403         if (parent && buf->start == btrfs_node_blockptr(parent,
2404                                                         path.slots[level + 1]))
2405                 found = 1;
2406
2407         btrfs_release_path(&path);
2408         return found ? 0 : 1;
2409 }
2410
2411 static int is_extent_tree_record(struct extent_record *rec)
2412 {
2413         struct list_head *cur = rec->backrefs.next;
2414         struct extent_backref *node;
2415         struct tree_backref *back;
2416         int is_extent = 0;
2417
2418         while(cur != &rec->backrefs) {
2419                 node = list_entry(cur, struct extent_backref, list);
2420                 cur = cur->next;
2421                 if (node->is_data)
2422                         return 0;
2423                 back = (struct tree_backref *)node;
2424                 if (node->full_backref)
2425                         return 0;
2426                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
2427                         is_extent = 1;
2428         }
2429         return is_extent;
2430 }
2431
2432
2433 static int record_bad_block_io(struct btrfs_fs_info *info,
2434                                struct cache_tree *extent_cache,
2435                                u64 start, u64 len)
2436 {
2437         struct extent_record *rec;
2438         struct cache_extent *cache;
2439         struct btrfs_key key;
2440
2441         cache = lookup_cache_extent(extent_cache, start, len);
2442         if (!cache)
2443                 return 0;
2444
2445         rec = container_of(cache, struct extent_record, cache);
2446         if (!is_extent_tree_record(rec))
2447                 return 0;
2448
2449         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
2450         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
2451 }
2452
2453 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
2454                        struct extent_buffer *buf, int slot)
2455 {
2456         if (btrfs_header_level(buf)) {
2457                 struct btrfs_key_ptr ptr1, ptr2;
2458
2459                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
2460                                    sizeof(struct btrfs_key_ptr));
2461                 read_extent_buffer(buf, &ptr2,
2462                                    btrfs_node_key_ptr_offset(slot + 1),
2463                                    sizeof(struct btrfs_key_ptr));
2464                 write_extent_buffer(buf, &ptr1,
2465                                     btrfs_node_key_ptr_offset(slot + 1),
2466                                     sizeof(struct btrfs_key_ptr));
2467                 write_extent_buffer(buf, &ptr2,
2468                                     btrfs_node_key_ptr_offset(slot),
2469                                     sizeof(struct btrfs_key_ptr));
2470                 if (slot == 0) {
2471                         struct btrfs_disk_key key;
2472                         btrfs_node_key(buf, &key, 0);
2473                         btrfs_fixup_low_keys(root, path, &key,
2474                                              btrfs_header_level(buf) + 1);
2475                 }
2476         } else {
2477                 struct btrfs_item *item1, *item2;
2478                 struct btrfs_key k1, k2;
2479                 char *item1_data, *item2_data;
2480                 u32 item1_offset, item2_offset, item1_size, item2_size;
2481
2482                 item1 = btrfs_item_nr(slot);
2483                 item2 = btrfs_item_nr(slot + 1);
2484                 btrfs_item_key_to_cpu(buf, &k1, slot);
2485                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
2486                 item1_offset = btrfs_item_offset(buf, item1);
2487                 item2_offset = btrfs_item_offset(buf, item2);
2488                 item1_size = btrfs_item_size(buf, item1);
2489                 item2_size = btrfs_item_size(buf, item2);
2490
2491                 item1_data = malloc(item1_size);
2492                 if (!item1_data)
2493                         return -ENOMEM;
2494                 item2_data = malloc(item2_size);
2495                 if (!item2_data) {
2496                         free(item1_data);
2497                         return -ENOMEM;
2498                 }
2499
2500                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
2501                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
2502
2503                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
2504                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
2505                 free(item1_data);
2506                 free(item2_data);
2507
2508                 btrfs_set_item_offset(buf, item1, item2_offset);
2509                 btrfs_set_item_offset(buf, item2, item1_offset);
2510                 btrfs_set_item_size(buf, item1, item2_size);
2511                 btrfs_set_item_size(buf, item2, item1_size);
2512
2513                 path->slots[0] = slot;
2514                 btrfs_set_item_key_unsafe(root, path, &k2);
2515                 path->slots[0] = slot + 1;
2516                 btrfs_set_item_key_unsafe(root, path, &k1);
2517         }
2518         return 0;
2519 }
2520
2521 /*
2522  * Attempt to fix basic block failures.  Currently we only handle bad key
2523  * orders, we will cycle through the keys and swap them if necessary.
2524  */
2525 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
2526                                 struct btrfs_root *root,
2527                                 struct extent_buffer *buf,
2528                                 struct btrfs_disk_key *parent_key,
2529                                 enum btrfs_tree_block_status status)
2530 {
2531         struct btrfs_path *path;
2532         struct btrfs_key k1, k2;
2533         int i;
2534         int level;
2535         int ret;
2536
2537         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
2538                 return -EIO;
2539
2540         k1.objectid = btrfs_header_owner(buf);
2541         k1.type = BTRFS_ROOT_ITEM_KEY;
2542         k1.offset = (u64)-1;
2543
2544         root = btrfs_read_fs_root(root->fs_info, &k1);
2545         if (IS_ERR(root))
2546                 return -EIO;
2547
2548         record_root_in_trans(trans, root);
2549
2550         path = btrfs_alloc_path();
2551         if (!path)
2552                 return -EIO;
2553
2554         level = btrfs_header_level(buf);
2555         path->lowest_level = level;
2556         path->skip_check_block = 1;
2557         if (level)
2558                 btrfs_node_key_to_cpu(buf, &k1, 0);
2559         else
2560                 btrfs_item_key_to_cpu(buf, &k1, 0);
2561
2562         ret = btrfs_search_slot(trans, root, &k1, path, 0, 1);
2563         if (ret) {
2564                 btrfs_free_path(path);
2565                 return -EIO;
2566         }
2567
2568         buf = path->nodes[level];
2569         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
2570                 if (level) {
2571                         btrfs_node_key_to_cpu(buf, &k1, i);
2572                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
2573                 } else {
2574                         btrfs_item_key_to_cpu(buf, &k1, i);
2575                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
2576                 }
2577                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
2578                         continue;
2579                 ret = swap_values(root, path, buf, i);
2580                 if (ret)
2581                         break;
2582                 btrfs_mark_buffer_dirty(buf);
2583                 i = 0;
2584         }
2585
2586         btrfs_free_path(path);
2587         return ret;
2588 }
2589
2590 static int check_block(struct btrfs_trans_handle *trans,
2591                        struct btrfs_root *root,
2592                        struct cache_tree *extent_cache,
2593                        struct extent_buffer *buf, u64 flags)
2594 {
2595         struct extent_record *rec;
2596         struct cache_extent *cache;
2597         struct btrfs_key key;
2598         enum btrfs_tree_block_status status;
2599         int ret = 0;
2600         int level;
2601
2602         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
2603         if (!cache)
2604                 return 1;
2605         rec = container_of(cache, struct extent_record, cache);
2606         rec->generation = btrfs_header_generation(buf);
2607
2608         level = btrfs_header_level(buf);
2609         if (btrfs_header_nritems(buf) > 0) {
2610
2611                 if (level == 0)
2612                         btrfs_item_key_to_cpu(buf, &key, 0);
2613                 else
2614                         btrfs_node_key_to_cpu(buf, &key, 0);
2615
2616                 rec->info_objectid = key.objectid;
2617         }
2618         rec->info_level = level;
2619
2620         if (btrfs_is_leaf(buf))
2621                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
2622         else
2623                 status = btrfs_check_node(root, &rec->parent_key, buf);
2624
2625         if (status != BTRFS_TREE_BLOCK_CLEAN) {
2626                 if (repair)
2627                         status = try_to_fix_bad_block(trans, root, buf,
2628                                                       &rec->parent_key,
2629                                                       status);
2630                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2631                         ret = -EIO;
2632                         fprintf(stderr, "bad block %llu\n",
2633                                 (unsigned long long)buf->start);
2634                 } else {
2635                         /*
2636                          * Signal to callers we need to start the scan over
2637                          * again since we'll have cow'ed blocks.
2638                          */
2639                         ret = -EAGAIN;
2640                 }
2641         } else {
2642                 rec->content_checked = 1;
2643                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2644                         rec->owner_ref_checked = 1;
2645                 else {
2646                         ret = check_owner_ref(root, rec, buf);
2647                         if (!ret)
2648                                 rec->owner_ref_checked = 1;
2649                 }
2650         }
2651         if (!ret)
2652                 maybe_free_extent_rec(extent_cache, rec);
2653         return ret;
2654 }
2655
2656 static struct tree_backref *find_tree_backref(struct extent_record *rec,
2657                                                 u64 parent, u64 root)
2658 {
2659         struct list_head *cur = rec->backrefs.next;
2660         struct extent_backref *node;
2661         struct tree_backref *back;
2662
2663         while(cur != &rec->backrefs) {
2664                 node = list_entry(cur, struct extent_backref, list);
2665                 cur = cur->next;
2666                 if (node->is_data)
2667                         continue;
2668                 back = (struct tree_backref *)node;
2669                 if (parent > 0) {
2670                         if (!node->full_backref)
2671                                 continue;
2672                         if (parent == back->parent)
2673                                 return back;
2674                 } else {
2675                         if (node->full_backref)
2676                                 continue;
2677                         if (back->root == root)
2678                                 return back;
2679                 }
2680         }
2681         return NULL;
2682 }
2683
2684 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
2685                                                 u64 parent, u64 root)
2686 {
2687         struct tree_backref *ref = malloc(sizeof(*ref));
2688         memset(&ref->node, 0, sizeof(ref->node));
2689         if (parent > 0) {
2690                 ref->parent = parent;
2691                 ref->node.full_backref = 1;
2692         } else {
2693                 ref->root = root;
2694                 ref->node.full_backref = 0;
2695         }
2696         list_add_tail(&ref->node.list, &rec->backrefs);
2697
2698         return ref;
2699 }
2700
2701 static struct data_backref *find_data_backref(struct extent_record *rec,
2702                                                 u64 parent, u64 root,
2703                                                 u64 owner, u64 offset,
2704                                                 int found_ref,
2705                                                 u64 disk_bytenr, u64 bytes)
2706 {
2707         struct list_head *cur = rec->backrefs.next;
2708         struct extent_backref *node;
2709         struct data_backref *back;
2710
2711         while(cur != &rec->backrefs) {
2712                 node = list_entry(cur, struct extent_backref, list);
2713                 cur = cur->next;
2714                 if (!node->is_data)
2715                         continue;
2716                 back = (struct data_backref *)node;
2717                 if (parent > 0) {
2718                         if (!node->full_backref)
2719                                 continue;
2720                         if (parent == back->parent)
2721                                 return back;
2722                 } else {
2723                         if (node->full_backref)
2724                                 continue;
2725                         if (back->root == root && back->owner == owner &&
2726                             back->offset == offset) {
2727                                 if (found_ref && node->found_ref &&
2728                                     (back->bytes != bytes ||
2729                                     back->disk_bytenr != disk_bytenr))
2730                                         continue;
2731                                 return back;
2732                         }
2733                 }
2734         }
2735         return NULL;
2736 }
2737
2738 static struct data_backref *alloc_data_backref(struct extent_record *rec,
2739                                                 u64 parent, u64 root,
2740                                                 u64 owner, u64 offset,
2741                                                 u64 max_size)
2742 {
2743         struct data_backref *ref = malloc(sizeof(*ref));
2744         memset(&ref->node, 0, sizeof(ref->node));
2745         ref->node.is_data = 1;
2746
2747         if (parent > 0) {
2748                 ref->parent = parent;
2749                 ref->owner = 0;
2750                 ref->offset = 0;
2751                 ref->node.full_backref = 1;
2752         } else {
2753                 ref->root = root;
2754                 ref->owner = owner;
2755                 ref->offset = offset;
2756                 ref->node.full_backref = 0;
2757         }
2758         ref->bytes = max_size;
2759         ref->found_ref = 0;
2760         ref->num_refs = 0;
2761         list_add_tail(&ref->node.list, &rec->backrefs);
2762         if (max_size > rec->max_size)
2763                 rec->max_size = max_size;
2764         return ref;
2765 }
2766
2767 static int add_extent_rec(struct cache_tree *extent_cache,
2768                           struct btrfs_key *parent_key, u64 parent_gen,
2769                           u64 start, u64 nr, u64 extent_item_refs,
2770                           int is_root, int inc_ref, int set_checked,
2771                           int metadata, int extent_rec, u64 max_size)
2772 {
2773         struct extent_record *rec;
2774         struct cache_extent *cache;
2775         int ret = 0;
2776         int dup = 0;
2777
2778         cache = lookup_cache_extent(extent_cache, start, nr);
2779         if (cache) {
2780                 rec = container_of(cache, struct extent_record, cache);
2781                 if (inc_ref)
2782                         rec->refs++;
2783                 if (rec->nr == 1)
2784                         rec->nr = max(nr, max_size);
2785
2786                 /*
2787                  * We need to make sure to reset nr to whatever the extent
2788                  * record says was the real size, this way we can compare it to
2789                  * the backrefs.
2790                  */
2791                 if (extent_rec) {
2792                         if (start != rec->start || rec->found_rec) {
2793                                 struct extent_record *tmp;
2794
2795                                 dup = 1;
2796                                 if (list_empty(&rec->list))
2797                                         list_add_tail(&rec->list,
2798                                                       &duplicate_extents);
2799
2800                                 /*
2801                                  * We have to do this song and dance in case we
2802                                  * find an extent record that falls inside of
2803                                  * our current extent record but does not have
2804                                  * the same objectid.
2805                                  */
2806                                 tmp = malloc(sizeof(*tmp));
2807                                 if (!tmp)
2808                                         return -ENOMEM;
2809                                 tmp->start = start;
2810                                 tmp->max_size = max_size;
2811                                 tmp->nr = nr;
2812                                 tmp->found_rec = 1;
2813                                 tmp->metadata = metadata;
2814                                 tmp->extent_item_refs = extent_item_refs;
2815                                 INIT_LIST_HEAD(&tmp->list);
2816                                 list_add_tail(&tmp->list, &rec->dups);
2817                                 rec->num_duplicates++;
2818                         } else {
2819                                 rec->nr = nr;
2820                                 rec->found_rec = 1;
2821                         }
2822                 }
2823
2824                 if (extent_item_refs && !dup) {
2825                         if (rec->extent_item_refs) {
2826                                 fprintf(stderr, "block %llu rec "
2827                                         "extent_item_refs %llu, passed %llu\n",
2828                                         (unsigned long long)start,
2829                                         (unsigned long long)
2830                                                         rec->extent_item_refs,
2831                                         (unsigned long long)extent_item_refs);
2832                         }
2833                         rec->extent_item_refs = extent_item_refs;
2834                 }
2835                 if (is_root)
2836                         rec->is_root = 1;
2837                 if (set_checked) {
2838                         rec->content_checked = 1;
2839                         rec->owner_ref_checked = 1;
2840                 }
2841
2842                 if (parent_key)
2843                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2844                 if (parent_gen)
2845                         rec->parent_generation = parent_gen;
2846
2847                 if (rec->max_size < max_size)
2848                         rec->max_size = max_size;
2849
2850                 maybe_free_extent_rec(extent_cache, rec);
2851                 return ret;
2852         }
2853         rec = malloc(sizeof(*rec));
2854         rec->start = start;
2855         rec->max_size = max_size;
2856         rec->nr = max(nr, max_size);
2857         rec->found_rec = !!extent_rec;
2858         rec->content_checked = 0;
2859         rec->owner_ref_checked = 0;
2860         rec->num_duplicates = 0;
2861         rec->metadata = metadata;
2862         INIT_LIST_HEAD(&rec->backrefs);
2863         INIT_LIST_HEAD(&rec->dups);
2864         INIT_LIST_HEAD(&rec->list);
2865
2866         if (is_root)
2867                 rec->is_root = 1;
2868         else
2869                 rec->is_root = 0;
2870
2871         if (inc_ref)
2872                 rec->refs = 1;
2873         else
2874                 rec->refs = 0;
2875
2876         if (extent_item_refs)
2877                 rec->extent_item_refs = extent_item_refs;
2878         else
2879                 rec->extent_item_refs = 0;
2880
2881         if (parent_key)
2882                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
2883         else
2884                 memset(&rec->parent_key, 0, sizeof(*parent_key));
2885
2886         if (parent_gen)
2887                 rec->parent_generation = parent_gen;
2888         else
2889                 rec->parent_generation = 0;
2890
2891         rec->cache.start = start;
2892         rec->cache.size = nr;
2893         ret = insert_cache_extent(extent_cache, &rec->cache);
2894         BUG_ON(ret);
2895         bytes_used += nr;
2896         if (set_checked) {
2897                 rec->content_checked = 1;
2898                 rec->owner_ref_checked = 1;
2899         }
2900         return ret;
2901 }
2902
2903 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
2904                             u64 parent, u64 root, int found_ref)
2905 {
2906         struct extent_record *rec;
2907         struct tree_backref *back;
2908         struct cache_extent *cache;
2909
2910         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2911         if (!cache) {
2912                 add_extent_rec(extent_cache, NULL, 0, bytenr,
2913                                1, 0, 0, 0, 0, 1, 0, 0);
2914                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2915                 if (!cache)
2916                         abort();
2917         }
2918
2919         rec = container_of(cache, struct extent_record, cache);
2920         if (rec->start != bytenr) {
2921                 abort();
2922         }
2923
2924         back = find_tree_backref(rec, parent, root);
2925         if (!back)
2926                 back = alloc_tree_backref(rec, parent, root);
2927
2928         if (found_ref) {
2929                 if (back->node.found_ref) {
2930                         fprintf(stderr, "Extent back ref already exists "
2931                                 "for %llu parent %llu root %llu \n",
2932                                 (unsigned long long)bytenr,
2933                                 (unsigned long long)parent,
2934                                 (unsigned long long)root);
2935                 }
2936                 back->node.found_ref = 1;
2937         } else {
2938                 if (back->node.found_extent_tree) {
2939                         fprintf(stderr, "Extent back ref already exists "
2940                                 "for %llu parent %llu root %llu \n",
2941                                 (unsigned long long)bytenr,
2942                                 (unsigned long long)parent,
2943                                 (unsigned long long)root);
2944                 }
2945                 back->node.found_extent_tree = 1;
2946         }
2947         maybe_free_extent_rec(extent_cache, rec);
2948         return 0;
2949 }
2950
2951 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
2952                             u64 parent, u64 root, u64 owner, u64 offset,
2953                             u32 num_refs, int found_ref, u64 max_size)
2954 {
2955         struct extent_record *rec;
2956         struct data_backref *back;
2957         struct cache_extent *cache;
2958
2959         cache = lookup_cache_extent(extent_cache, bytenr, 1);
2960         if (!cache) {
2961                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
2962                                0, 0, max_size);
2963                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
2964                 if (!cache)
2965                         abort();
2966         }
2967
2968         rec = container_of(cache, struct extent_record, cache);
2969         if (rec->max_size < max_size)
2970                 rec->max_size = max_size;
2971
2972         /*
2973          * If found_ref is set then max_size is the real size and must match the
2974          * existing refs.  So if we have already found a ref then we need to
2975          * make sure that this ref matches the existing one, otherwise we need
2976          * to add a new backref so we can notice that the backrefs don't match
2977          * and we need to figure out who is telling the truth.  This is to
2978          * account for that awful fsync bug I introduced where we'd end up with
2979          * a btrfs_file_extent_item that would have its length include multiple
2980          * prealloc extents or point inside of a prealloc extent.
2981          */
2982         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
2983                                  bytenr, max_size);
2984         if (!back)
2985                 back = alloc_data_backref(rec, parent, root, owner, offset,
2986                                           max_size);
2987
2988         if (found_ref) {
2989                 BUG_ON(num_refs != 1);
2990                 if (back->node.found_ref)
2991                         BUG_ON(back->bytes != max_size);
2992                 back->node.found_ref = 1;
2993                 back->found_ref += 1;
2994                 back->bytes = max_size;
2995                 back->disk_bytenr = bytenr;
2996                 rec->refs += 1;
2997                 rec->content_checked = 1;
2998                 rec->owner_ref_checked = 1;
2999         } else {
3000                 if (back->node.found_extent_tree) {
3001                         fprintf(stderr, "Extent back ref already exists "
3002                                 "for %llu parent %llu root %llu "
3003                                 "owner %llu offset %llu num_refs %lu\n",
3004                                 (unsigned long long)bytenr,
3005                                 (unsigned long long)parent,
3006                                 (unsigned long long)root,
3007                                 (unsigned long long)owner,
3008                                 (unsigned long long)offset,
3009                                 (unsigned long)num_refs);
3010                 }
3011                 back->num_refs = num_refs;
3012                 back->node.found_extent_tree = 1;
3013         }
3014         maybe_free_extent_rec(extent_cache, rec);
3015         return 0;
3016 }
3017
3018 static int add_pending(struct cache_tree *pending,
3019                        struct cache_tree *seen, u64 bytenr, u32 size)
3020 {
3021         int ret;
3022         ret = add_cache_extent(seen, bytenr, size);
3023         if (ret)
3024                 return ret;
3025         add_cache_extent(pending, bytenr, size);
3026         return 0;
3027 }
3028
3029 static int pick_next_pending(struct cache_tree *pending,
3030                         struct cache_tree *reada,
3031                         struct cache_tree *nodes,
3032                         u64 last, struct block_info *bits, int bits_nr,
3033                         int *reada_bits)
3034 {
3035         unsigned long node_start = last;
3036         struct cache_extent *cache;
3037         int ret;
3038
3039         cache = search_cache_extent(reada, 0);
3040         if (cache) {
3041                 bits[0].start = cache->start;
3042                 bits[0].size = cache->size;
3043                 *reada_bits = 1;
3044                 return 1;
3045         }
3046         *reada_bits = 0;
3047         if (node_start > 32768)
3048                 node_start -= 32768;
3049
3050         cache = search_cache_extent(nodes, node_start);
3051         if (!cache)
3052                 cache = search_cache_extent(nodes, 0);
3053
3054         if (!cache) {
3055                  cache = search_cache_extent(pending, 0);
3056                  if (!cache)
3057                          return 0;
3058                  ret = 0;
3059                  do {
3060                          bits[ret].start = cache->start;
3061                          bits[ret].size = cache->size;
3062                          cache = next_cache_extent(cache);
3063                          ret++;
3064                  } while (cache && ret < bits_nr);
3065                  return ret;
3066         }
3067
3068         ret = 0;
3069         do {
3070                 bits[ret].start = cache->start;
3071                 bits[ret].size = cache->size;
3072                 cache = next_cache_extent(cache);
3073                 ret++;
3074         } while (cache && ret < bits_nr);
3075
3076         if (bits_nr - ret > 8) {
3077                 u64 lookup = bits[0].start + bits[0].size;
3078                 struct cache_extent *next;
3079                 next = search_cache_extent(pending, lookup);
3080                 while(next) {
3081                         if (next->start - lookup > 32768)
3082                                 break;
3083                         bits[ret].start = next->start;
3084                         bits[ret].size = next->size;
3085                         lookup = next->start + next->size;
3086                         ret++;
3087                         if (ret == bits_nr)
3088                                 break;
3089                         next = next_cache_extent(next);
3090                         if (!next)
3091                                 break;
3092                 }
3093         }
3094         return ret;
3095 }
3096
3097 static void free_chunk_record(struct cache_extent *cache)
3098 {
3099         struct chunk_record *rec;
3100
3101         rec = container_of(cache, struct chunk_record, cache);
3102         free(rec);
3103 }
3104
3105 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
3106 {
3107         cache_tree_free_extents(chunk_cache, free_chunk_record);
3108 }
3109
3110 static void free_device_record(struct rb_node *node)
3111 {
3112         struct device_record *rec;
3113
3114         rec = container_of(node, struct device_record, node);
3115         free(rec);
3116 }
3117
3118 FREE_RB_BASED_TREE(device_cache, free_device_record);
3119
3120 int insert_block_group_record(struct block_group_tree *tree,
3121                               struct block_group_record *bg_rec)
3122 {
3123         int ret;
3124
3125         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
3126         if (ret)
3127                 return ret;
3128
3129         list_add_tail(&bg_rec->list, &tree->block_groups);
3130         return 0;
3131 }
3132
3133 static void free_block_group_record(struct cache_extent *cache)
3134 {
3135         struct block_group_record *rec;
3136
3137         rec = container_of(cache, struct block_group_record, cache);
3138         free(rec);
3139 }
3140
3141 void free_block_group_tree(struct block_group_tree *tree)
3142 {
3143         cache_tree_free_extents(&tree->tree, free_block_group_record);
3144 }
3145
3146 int insert_device_extent_record(struct device_extent_tree *tree,
3147                                 struct device_extent_record *de_rec)
3148 {
3149         int ret;
3150
3151         /*
3152          * Device extent is a bit different from the other extents, because
3153          * the extents which belong to the different devices may have the
3154          * same start and size, so we need use the special extent cache
3155          * search/insert functions.
3156          */
3157         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
3158         if (ret)
3159                 return ret;
3160
3161         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
3162         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
3163         return 0;
3164 }
3165
3166 static void free_device_extent_record(struct cache_extent *cache)
3167 {
3168         struct device_extent_record *rec;
3169
3170         rec = container_of(cache, struct device_extent_record, cache);
3171         free(rec);
3172 }
3173
3174 void free_device_extent_tree(struct device_extent_tree *tree)
3175 {
3176         cache_tree_free_extents(&tree->tree, free_device_extent_record);
3177 }
3178
3179 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3180 static int process_extent_ref_v0(struct cache_tree *extent_cache,
3181                                  struct extent_buffer *leaf, int slot)
3182 {
3183         struct btrfs_extent_ref_v0 *ref0;
3184         struct btrfs_key key;
3185
3186         btrfs_item_key_to_cpu(leaf, &key, slot);
3187         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
3188         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
3189                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
3190         } else {
3191                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
3192                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
3193         }
3194         return 0;
3195 }
3196 #endif
3197
3198 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
3199                                             struct btrfs_key *key,
3200                                             int slot)
3201 {
3202         struct btrfs_chunk *ptr;
3203         struct chunk_record *rec;
3204         int num_stripes, i;
3205
3206         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3207         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
3208
3209         rec = malloc(btrfs_chunk_record_size(num_stripes));
3210         if (!rec) {
3211                 fprintf(stderr, "memory allocation failed\n");
3212                 exit(-1);
3213         }
3214
3215         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
3216
3217         INIT_LIST_HEAD(&rec->list);
3218         INIT_LIST_HEAD(&rec->dextents);
3219         rec->bg_rec = NULL;
3220
3221         rec->cache.start = key->offset;
3222         rec->cache.size = btrfs_chunk_length(leaf, ptr);
3223
3224         rec->generation = btrfs_header_generation(leaf);
3225
3226         rec->objectid = key->objectid;
3227         rec->type = key->type;
3228         rec->offset = key->offset;
3229
3230         rec->length = rec->cache.size;
3231         rec->owner = btrfs_chunk_owner(leaf, ptr);
3232         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
3233         rec->type_flags = btrfs_chunk_type(leaf, ptr);
3234         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
3235         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
3236         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
3237         rec->num_stripes = num_stripes;
3238         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
3239
3240         for (i = 0; i < rec->num_stripes; ++i) {
3241                 rec->stripes[i].devid =
3242                         btrfs_stripe_devid_nr(leaf, ptr, i);
3243                 rec->stripes[i].offset =
3244                         btrfs_stripe_offset_nr(leaf, ptr, i);
3245                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
3246                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
3247                                 BTRFS_UUID_SIZE);
3248         }
3249
3250         return rec;
3251 }
3252
3253 static int process_chunk_item(struct cache_tree *chunk_cache,
3254                               struct btrfs_key *key, struct extent_buffer *eb,
3255                               int slot)
3256 {
3257         struct chunk_record *rec;
3258         int ret = 0;
3259
3260         rec = btrfs_new_chunk_record(eb, key, slot);
3261         ret = insert_cache_extent(chunk_cache, &rec->cache);
3262         if (ret) {
3263                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
3264                         rec->offset, rec->length);
3265                 free(rec);
3266         }
3267
3268         return ret;
3269 }
3270
3271 static int process_device_item(struct rb_root *dev_cache,
3272                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
3273 {
3274         struct btrfs_dev_item *ptr;
3275         struct device_record *rec;
3276         int ret = 0;
3277
3278         ptr = btrfs_item_ptr(eb,
3279                 slot, struct btrfs_dev_item);
3280
3281         rec = malloc(sizeof(*rec));
3282         if (!rec) {
3283                 fprintf(stderr, "memory allocation failed\n");
3284                 return -ENOMEM;
3285         }
3286
3287         rec->devid = key->offset;
3288         rec->generation = btrfs_header_generation(eb);
3289
3290         rec->objectid = key->objectid;
3291         rec->type = key->type;
3292         rec->offset = key->offset;
3293
3294         rec->devid = btrfs_device_id(eb, ptr);
3295         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
3296         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
3297
3298         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
3299         if (ret) {
3300                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
3301                 free(rec);
3302         }
3303
3304         return ret;
3305 }
3306
3307 struct block_group_record *
3308 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
3309                              int slot)
3310 {
3311         struct btrfs_block_group_item *ptr;
3312         struct block_group_record *rec;
3313
3314         rec = malloc(sizeof(*rec));
3315         if (!rec) {
3316                 fprintf(stderr, "memory allocation failed\n");
3317                 exit(-1);
3318         }
3319         memset(rec, 0, sizeof(*rec));
3320
3321         rec->cache.start = key->objectid;
3322         rec->cache.size = key->offset;
3323
3324         rec->generation = btrfs_header_generation(leaf);
3325
3326         rec->objectid = key->objectid;
3327         rec->type = key->type;
3328         rec->offset = key->offset;
3329
3330         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
3331         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
3332
3333         INIT_LIST_HEAD(&rec->list);
3334
3335         return rec;
3336 }
3337
3338 static int process_block_group_item(struct block_group_tree *block_group_cache,
3339                                     struct btrfs_key *key,
3340                                     struct extent_buffer *eb, int slot)
3341 {
3342         struct block_group_record *rec;
3343         int ret = 0;
3344
3345         rec = btrfs_new_block_group_record(eb, key, slot);
3346         ret = insert_block_group_record(block_group_cache, rec);
3347         if (ret) {
3348                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
3349                         rec->objectid, rec->offset);
3350                 free(rec);
3351         }
3352
3353         return ret;
3354 }
3355
3356 struct device_extent_record *
3357 btrfs_new_device_extent_record(struct extent_buffer *leaf,
3358                                struct btrfs_key *key, int slot)
3359 {
3360         struct device_extent_record *rec;
3361         struct btrfs_dev_extent *ptr;
3362
3363         rec = malloc(sizeof(*rec));
3364         if (!rec) {
3365                 fprintf(stderr, "memory allocation failed\n");
3366                 exit(-1);
3367         }
3368         memset(rec, 0, sizeof(*rec));
3369
3370         rec->cache.objectid = key->objectid;
3371         rec->cache.start = key->offset;
3372
3373         rec->generation = btrfs_header_generation(leaf);
3374
3375         rec->objectid = key->objectid;
3376         rec->type = key->type;
3377         rec->offset = key->offset;
3378
3379         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
3380         rec->chunk_objecteid =
3381                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
3382         rec->chunk_offset =
3383                 btrfs_dev_extent_chunk_offset(leaf, ptr);
3384         rec->length = btrfs_dev_extent_length(leaf, ptr);
3385         rec->cache.size = rec->length;
3386
3387         INIT_LIST_HEAD(&rec->chunk_list);
3388         INIT_LIST_HEAD(&rec->device_list);
3389
3390         return rec;
3391 }
3392
3393 static int
3394 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
3395                            struct btrfs_key *key, struct extent_buffer *eb,
3396                            int slot)
3397 {
3398         struct device_extent_record *rec;
3399         int ret;
3400
3401         rec = btrfs_new_device_extent_record(eb, key, slot);
3402         ret = insert_device_extent_record(dev_extent_cache, rec);
3403         if (ret) {
3404                 fprintf(stderr,
3405                         "Device extent[%llu, %llu, %llu] existed.\n",
3406                         rec->objectid, rec->offset, rec->length);
3407                 free(rec);
3408         }
3409
3410         return ret;
3411 }
3412
3413 static int process_extent_item(struct btrfs_root *root,
3414                                struct cache_tree *extent_cache,
3415                                struct extent_buffer *eb, int slot)
3416 {
3417         struct btrfs_extent_item *ei;
3418         struct btrfs_extent_inline_ref *iref;
3419         struct btrfs_extent_data_ref *dref;
3420         struct btrfs_shared_data_ref *sref;
3421         struct btrfs_key key;
3422         unsigned long end;
3423         unsigned long ptr;
3424         int type;
3425         u32 item_size = btrfs_item_size_nr(eb, slot);
3426         u64 refs = 0;
3427         u64 offset;
3428         u64 num_bytes;
3429         int metadata = 0;
3430
3431         btrfs_item_key_to_cpu(eb, &key, slot);
3432
3433         if (key.type == BTRFS_METADATA_ITEM_KEY) {
3434                 metadata = 1;
3435                 num_bytes = root->leafsize;
3436         } else {
3437                 num_bytes = key.offset;
3438         }
3439
3440         if (item_size < sizeof(*ei)) {
3441 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3442                 struct btrfs_extent_item_v0 *ei0;
3443                 BUG_ON(item_size != sizeof(*ei0));
3444                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
3445                 refs = btrfs_extent_refs_v0(eb, ei0);
3446 #else
3447                 BUG();
3448 #endif
3449                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
3450                                       num_bytes, refs, 0, 0, 0, metadata, 1,
3451                                       num_bytes);
3452         }
3453
3454         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
3455         refs = btrfs_extent_refs(eb, ei);
3456
3457         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
3458                        refs, 0, 0, 0, metadata, 1, num_bytes);
3459
3460         ptr = (unsigned long)(ei + 1);
3461         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
3462             key.type == BTRFS_EXTENT_ITEM_KEY)
3463                 ptr += sizeof(struct btrfs_tree_block_info);
3464
3465         end = (unsigned long)ei + item_size;
3466         while (ptr < end) {
3467                 iref = (struct btrfs_extent_inline_ref *)ptr;
3468                 type = btrfs_extent_inline_ref_type(eb, iref);
3469                 offset = btrfs_extent_inline_ref_offset(eb, iref);
3470                 switch (type) {
3471                 case BTRFS_TREE_BLOCK_REF_KEY:
3472                         add_tree_backref(extent_cache, key.objectid,
3473                                          0, offset, 0);
3474                         break;
3475                 case BTRFS_SHARED_BLOCK_REF_KEY:
3476                         add_tree_backref(extent_cache, key.objectid,
3477                                          offset, 0, 0);
3478                         break;
3479                 case BTRFS_EXTENT_DATA_REF_KEY:
3480                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3481                         add_data_backref(extent_cache, key.objectid, 0,
3482                                         btrfs_extent_data_ref_root(eb, dref),
3483                                         btrfs_extent_data_ref_objectid(eb,
3484                                                                        dref),
3485                                         btrfs_extent_data_ref_offset(eb, dref),
3486                                         btrfs_extent_data_ref_count(eb, dref),
3487                                         0, num_bytes);
3488                         break;
3489                 case BTRFS_SHARED_DATA_REF_KEY:
3490                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
3491                         add_data_backref(extent_cache, key.objectid, offset,
3492                                         0, 0, 0,
3493                                         btrfs_shared_data_ref_count(eb, sref),
3494                                         0, num_bytes);
3495                         break;
3496                 default:
3497                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
3498                                 key.objectid, key.type, num_bytes);
3499                         goto out;
3500                 }
3501                 ptr += btrfs_extent_inline_ref_size(type);
3502         }
3503         WARN_ON(ptr > end);
3504 out:
3505         return 0;
3506 }
3507
3508 static int check_cache_range(struct btrfs_root *root,
3509                              struct btrfs_block_group_cache *cache,
3510                              u64 offset, u64 bytes)
3511 {
3512         struct btrfs_free_space *entry;
3513         u64 *logical;
3514         u64 bytenr;
3515         int stripe_len;
3516         int i, nr, ret;
3517
3518         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3519                 bytenr = btrfs_sb_offset(i);
3520                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
3521                                        cache->key.objectid, bytenr, 0,
3522                                        &logical, &nr, &stripe_len);
3523                 if (ret)
3524                         return ret;
3525
3526                 while (nr--) {
3527                         if (logical[nr] + stripe_len <= offset)
3528                                 continue;
3529                         if (offset + bytes <= logical[nr])
3530                                 continue;
3531                         if (logical[nr] == offset) {
3532                                 if (stripe_len >= bytes) {
3533                                         kfree(logical);
3534                                         return 0;
3535                                 }
3536                                 bytes -= stripe_len;
3537                                 offset += stripe_len;
3538                         } else if (logical[nr] < offset) {
3539                                 if (logical[nr] + stripe_len >=
3540                                     offset + bytes) {
3541                                         kfree(logical);
3542                                         return 0;
3543                                 }
3544                                 bytes = (offset + bytes) -
3545                                         (logical[nr] + stripe_len);
3546                                 offset = logical[nr] + stripe_len;
3547                         } else {
3548                                 /*
3549                                  * Could be tricky, the super may land in the
3550                                  * middle of the area we're checking.  First
3551                                  * check the easiest case, it's at the end.
3552                                  */
3553                                 if (logical[nr] + stripe_len >=
3554                                     bytes + offset) {
3555                                         bytes = logical[nr] - offset;
3556                                         continue;
3557                                 }
3558
3559                                 /* Check the left side */
3560                                 ret = check_cache_range(root, cache,
3561                                                         offset,
3562                                                         logical[nr] - offset);
3563                                 if (ret) {
3564                                         kfree(logical);
3565                                         return ret;
3566                                 }
3567
3568                                 /* Now we continue with the right side */
3569                                 bytes = (offset + bytes) -
3570                                         (logical[nr] + stripe_len);
3571                                 offset = logical[nr] + stripe_len;
3572                         }
3573                 }
3574
3575                 kfree(logical);
3576         }
3577
3578         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
3579         if (!entry) {
3580                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
3581                         offset, offset+bytes);
3582                 return -EINVAL;
3583         }
3584
3585         if (entry->offset != offset) {
3586                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
3587                         entry->offset);
3588                 return -EINVAL;
3589         }
3590
3591         if (entry->bytes != bytes) {
3592                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
3593                         bytes, entry->bytes, offset);
3594                 return -EINVAL;
3595         }
3596
3597         unlink_free_space(cache->free_space_ctl, entry);
3598         free(entry);
3599         return 0;
3600 }
3601
3602 static int verify_space_cache(struct btrfs_root *root,
3603                               struct btrfs_block_group_cache *cache)
3604 {
3605         struct btrfs_path *path;
3606         struct extent_buffer *leaf;
3607         struct btrfs_key key;
3608         u64 last;
3609         int ret = 0;
3610
3611         path = btrfs_alloc_path();
3612         if (!path)
3613                 return -ENOMEM;
3614
3615         root = root->fs_info->extent_root;
3616
3617         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
3618
3619         key.objectid = last;
3620         key.offset = 0;
3621         key.type = BTRFS_EXTENT_ITEM_KEY;
3622
3623         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3624         if (ret < 0)
3625                 goto out;
3626         ret = 0;
3627         while (1) {
3628                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3629                         ret = btrfs_next_leaf(root, path);
3630                         if (ret < 0)
3631                                 goto out;
3632                         if (ret > 0) {
3633                                 ret = 0;
3634                                 break;
3635                         }
3636                 }
3637                 leaf = path->nodes[0];
3638                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3639                 if (key.objectid >= cache->key.offset + cache->key.objectid)
3640                         break;
3641                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3642                     key.type != BTRFS_METADATA_ITEM_KEY) {
3643                         path->slots[0]++;
3644                         continue;
3645                 }
3646
3647                 if (last == key.objectid) {
3648                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
3649                                 last = key.objectid + key.offset;
3650                         else
3651                                 last = key.objectid + root->leafsize;
3652                         path->slots[0]++;
3653                         continue;
3654                 }
3655
3656                 ret = check_cache_range(root, cache, last,
3657                                         key.objectid - last);
3658                 if (ret)
3659                         break;
3660                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3661                         last = key.objectid + key.offset;
3662                 else
3663                         last = key.objectid + root->leafsize;
3664                 path->slots[0]++;
3665         }
3666
3667         if (last < cache->key.objectid + cache->key.offset)
3668                 ret = check_cache_range(root, cache, last,
3669                                         cache->key.objectid +
3670                                         cache->key.offset - last);
3671
3672 out:
3673         btrfs_free_path(path);
3674
3675         if (!ret &&
3676             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
3677                 fprintf(stderr, "There are still entries left in the space "
3678                         "cache\n");
3679                 ret = -EINVAL;
3680         }
3681
3682         return ret;
3683 }
3684
3685 static int check_space_cache(struct btrfs_root *root)
3686 {
3687         struct btrfs_block_group_cache *cache;
3688         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
3689         int ret;
3690         int error = 0;
3691
3692         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
3693             btrfs_super_generation(root->fs_info->super_copy) !=
3694             btrfs_super_cache_generation(root->fs_info->super_copy)) {
3695                 printf("cache and super generation don't match, space cache "
3696                        "will be invalidated\n");
3697                 return 0;
3698         }
3699
3700         while (1) {
3701                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
3702                 if (!cache)
3703                         break;
3704
3705                 start = cache->key.objectid + cache->key.offset;
3706                 if (!cache->free_space_ctl) {
3707                         if (btrfs_init_free_space_ctl(cache,
3708                                                       root->sectorsize)) {
3709                                 ret = -ENOMEM;
3710                                 break;
3711                         }
3712                 } else {
3713                         btrfs_remove_free_space_cache(cache);
3714                 }
3715
3716                 ret = load_free_space_cache(root->fs_info, cache);
3717                 if (!ret)
3718                         continue;
3719
3720                 ret = verify_space_cache(root, cache);
3721                 if (ret) {
3722                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
3723                                 cache->key.objectid);
3724                         error++;
3725                 }
3726         }
3727
3728         return error ? -EINVAL : 0;
3729 }
3730
3731 static int read_extent_data(struct btrfs_root *root, char *data,
3732                         u64 logical, u64 *len, int mirror)
3733 {
3734         u64 offset = 0;
3735         struct btrfs_multi_bio *multi = NULL;
3736         struct btrfs_fs_info *info = root->fs_info;
3737         struct btrfs_device *device;
3738         int ret = 0;
3739         u64 max_len = *len;
3740
3741         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
3742                               &multi, mirror, NULL);
3743         if (ret) {
3744                 fprintf(stderr, "Couldn't map the block %llu\n",
3745                                 logical + offset);
3746                 goto err;
3747         }
3748         device = multi->stripes[0].dev;
3749
3750         if (device->fd == 0)
3751                 goto err;
3752         if (*len > max_len)
3753                 *len = max_len;
3754
3755         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
3756         if (ret != *len)
3757                 ret = -EIO;
3758         else
3759                 ret = 0;
3760 err:
3761         kfree(multi);
3762         return ret;
3763 }
3764
3765 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
3766                         u64 num_bytes, unsigned long leaf_offset,
3767                         struct extent_buffer *eb) {
3768
3769         u64 offset = 0;
3770         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3771         char *data;
3772         unsigned long csum_offset;
3773         u32 csum;
3774         u32 csum_expected;
3775         u64 read_len;
3776         u64 data_checked = 0;
3777         u64 tmp;
3778         int ret = 0;
3779         int mirror;
3780         int num_copies;
3781
3782         if (num_bytes % root->sectorsize)
3783                 return -EINVAL;
3784
3785         data = malloc(num_bytes);
3786         if (!data)
3787                 return -ENOMEM;
3788
3789         while (offset < num_bytes) {
3790                 mirror = 0;
3791 again:
3792                 read_len = num_bytes - offset;
3793                 /* read as much space once a time */
3794                 ret = read_extent_data(root, data + offset,
3795                                 bytenr + offset, &read_len, mirror);
3796                 if (ret)
3797                         goto out;
3798                 data_checked = 0;
3799                 /* verify every 4k data's checksum */
3800                 while (data_checked < read_len) {
3801                         csum = ~(u32)0;
3802                         tmp = offset + data_checked;
3803
3804                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
3805                                                csum, root->sectorsize);
3806                         btrfs_csum_final(csum, (char *)&csum);
3807
3808                         csum_offset = leaf_offset +
3809                                  tmp / root->sectorsize * csum_size;
3810                         read_extent_buffer(eb, (char *)&csum_expected,
3811                                            csum_offset, csum_size);
3812                         /* try another mirror */
3813                         if (csum != csum_expected) {
3814                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
3815                                                 mirror, bytenr + tmp,
3816                                                 csum, csum_expected);
3817                                 num_copies = btrfs_num_copies(
3818                                                 &root->fs_info->mapping_tree,
3819                                                 bytenr, num_bytes);
3820                                 if (mirror < num_copies - 1) {
3821                                         mirror += 1;
3822                                         goto again;
3823                                 }
3824                         }
3825                         data_checked += root->sectorsize;
3826                 }
3827                 offset += read_len;
3828         }
3829 out:
3830         free(data);
3831         return ret;
3832 }
3833
3834 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
3835                                u64 num_bytes)
3836 {
3837         struct btrfs_path *path;
3838         struct extent_buffer *leaf;
3839         struct btrfs_key key;
3840         int ret;
3841
3842         path = btrfs_alloc_path();
3843         if (!path) {
3844                 fprintf(stderr, "Error allocing path\n");
3845                 return -ENOMEM;
3846         }
3847
3848         key.objectid = bytenr;
3849         key.type = BTRFS_EXTENT_ITEM_KEY;
3850         key.offset = (u64)-1;
3851
3852 again:
3853         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
3854                                 0, 0);
3855         if (ret < 0) {
3856                 fprintf(stderr, "Error looking up extent record %d\n", ret);
3857                 btrfs_free_path(path);
3858                 return ret;
3859         } else if (ret) {
3860                 if (path->slots[0] > 0) {
3861                         path->slots[0]--;
3862                 } else {
3863                         ret = btrfs_prev_leaf(root, path);
3864                         if (ret < 0) {
3865                                 goto out;
3866                         } else if (ret > 0) {
3867                                 ret = 0;
3868                                 goto out;
3869                         }
3870                 }
3871         }
3872
3873         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3874
3875         /*
3876          * Block group items come before extent items if they have the same
3877          * bytenr, so walk back one more just in case.  Dear future traveler,
3878          * first congrats on mastering time travel.  Now if it's not too much
3879          * trouble could you go back to 2006 and tell Chris to make the
3880          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
3881          * EXTENT_ITEM_KEY please?
3882          */
3883         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
3884                 if (path->slots[0] > 0) {
3885                         path->slots[0]--;
3886                 } else {
3887                         ret = btrfs_prev_leaf(root, path);
3888                         if (ret < 0) {
3889                                 goto out;
3890                         } else if (ret > 0) {
3891                                 ret = 0;
3892                                 goto out;
3893                         }
3894                 }
3895                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3896         }
3897
3898         while (num_bytes) {
3899                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3900                         ret = btrfs_next_leaf(root, path);
3901                         if (ret < 0) {
3902                                 fprintf(stderr, "Error going to next leaf "
3903                                         "%d\n", ret);
3904                                 btrfs_free_path(path);
3905                                 return ret;
3906                         } else if (ret) {
3907                                 break;
3908                         }
3909                 }
3910                 leaf = path->nodes[0];
3911                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3912                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
3913                         path->slots[0]++;
3914                         continue;
3915                 }
3916                 if (key.objectid + key.offset < bytenr) {
3917                         path->slots[0]++;
3918                         continue;
3919                 }
3920                 if (key.objectid > bytenr + num_bytes)
3921                         break;
3922
3923                 if (key.objectid == bytenr) {
3924                         if (key.offset >= num_bytes) {
3925                                 num_bytes = 0;
3926                                 break;
3927                         }
3928                         num_bytes -= key.offset;
3929                         bytenr += key.offset;
3930                 } else if (key.objectid < bytenr) {
3931                         if (key.objectid + key.offset >= bytenr + num_bytes) {
3932                                 num_bytes = 0;
3933                                 break;
3934                         }
3935                         num_bytes = (bytenr + num_bytes) -
3936                                 (key.objectid + key.offset);
3937                         bytenr = key.objectid + key.offset;
3938                 } else {
3939                         if (key.objectid + key.offset < bytenr + num_bytes) {
3940                                 u64 new_start = key.objectid + key.offset;
3941                                 u64 new_bytes = bytenr + num_bytes - new_start;
3942
3943                                 /*
3944                                  * Weird case, the extent is in the middle of
3945                                  * our range, we'll have to search one side
3946                                  * and then the other.  Not sure if this happens
3947                                  * in real life, but no harm in coding it up
3948                                  * anyway just in case.
3949                                  */
3950                                 btrfs_release_path(path);
3951                                 ret = check_extent_exists(root, new_start,
3952                                                           new_bytes);
3953                                 if (ret) {
3954                                         fprintf(stderr, "Right section didn't "
3955                                                 "have a record\n");
3956                                         break;
3957                                 }
3958                                 num_bytes = key.objectid - bytenr;
3959                                 goto again;
3960                         }
3961                         num_bytes = key.objectid - bytenr;
3962                 }
3963                 path->slots[0]++;
3964         }
3965         ret = 0;
3966
3967 out:
3968         if (num_bytes && !ret) {
3969                 fprintf(stderr, "There are no extents for csum range "
3970                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
3971                 ret = 1;
3972         }
3973
3974         btrfs_free_path(path);
3975         return ret;
3976 }
3977
3978 static int check_csums(struct btrfs_root *root)
3979 {
3980         struct btrfs_path *path;
3981         struct extent_buffer *leaf;
3982         struct btrfs_key key;
3983         u64 offset = 0, num_bytes = 0;
3984         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
3985         int errors = 0;
3986         int ret;
3987         u64 data_len;
3988         unsigned long leaf_offset;
3989
3990         root = root->fs_info->csum_root;
3991         if (!extent_buffer_uptodate(root->node)) {
3992                 fprintf(stderr, "No valid csum tree found\n");
3993                 return -ENOENT;
3994         }
3995
3996         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3997         key.type = BTRFS_EXTENT_CSUM_KEY;
3998         key.offset = 0;
3999
4000         path = btrfs_alloc_path();
4001         if (!path)
4002                 return -ENOMEM;
4003
4004         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4005         if (ret < 0) {
4006                 fprintf(stderr, "Error searching csum tree %d\n", ret);
4007                 btrfs_free_path(path);
4008                 return ret;
4009         }
4010
4011         if (ret > 0 && path->slots[0])
4012                 path->slots[0]--;
4013         ret = 0;
4014
4015         while (1) {
4016                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4017                         ret = btrfs_next_leaf(root, path);
4018                         if (ret < 0) {
4019                                 fprintf(stderr, "Error going to next leaf "
4020                                         "%d\n", ret);
4021                                 break;
4022                         }
4023                         if (ret)
4024                                 break;
4025                 }
4026                 leaf = path->nodes[0];
4027
4028                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4029                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
4030                         path->slots[0]++;
4031                         continue;
4032                 }
4033
4034                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
4035                               csum_size) * root->sectorsize;
4036                 if (!check_data_csum)
4037                         goto skip_csum_check;
4038                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
4039                 ret = check_extent_csums(root, key.offset, data_len,
4040                                          leaf_offset, leaf);
4041                 if (ret)
4042                         break;
4043 skip_csum_check:
4044                 if (!num_bytes) {
4045                         offset = key.offset;
4046                 } else if (key.offset != offset + num_bytes) {
4047                         ret = check_extent_exists(root, offset, num_bytes);
4048                         if (ret) {
4049                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
4050                                         "there is no extent record\n",
4051                                         offset, offset+num_bytes);
4052                                 errors++;
4053                         }
4054                         offset = key.offset;
4055                         num_bytes = 0;
4056                 }
4057                 num_bytes += data_len;
4058                 path->slots[0]++;
4059         }
4060
4061         btrfs_free_path(path);
4062         return errors;
4063 }
4064
4065 static int is_dropped_key(struct btrfs_key *key,
4066                           struct btrfs_key *drop_key) {
4067         if (key->objectid < drop_key->objectid)
4068                 return 1;
4069         else if (key->objectid == drop_key->objectid) {
4070                 if (key->type < drop_key->type)
4071                         return 1;
4072                 else if (key->type == drop_key->type) {
4073                         if (key->offset < drop_key->offset)
4074                                 return 1;
4075                 }
4076         }
4077         return 0;
4078 }
4079
4080 static int run_next_block(struct btrfs_trans_handle *trans,
4081                           struct btrfs_root *root,
4082                           struct block_info *bits,
4083                           int bits_nr,
4084                           u64 *last,
4085                           struct cache_tree *pending,
4086                           struct cache_tree *seen,
4087                           struct cache_tree *reada,
4088                           struct cache_tree *nodes,
4089                           struct cache_tree *extent_cache,
4090                           struct cache_tree *chunk_cache,
4091                           struct rb_root *dev_cache,
4092                           struct block_group_tree *block_group_cache,
4093                           struct device_extent_tree *dev_extent_cache,
4094                           struct btrfs_root_item *ri)
4095 {
4096         struct extent_buffer *buf;
4097         u64 bytenr;
4098         u32 size;
4099         u64 parent;
4100         u64 owner;
4101         u64 flags;
4102         u64 ptr;
4103         u64 gen = 0;
4104         int ret = 0;
4105         int i;
4106         int nritems;
4107         struct btrfs_key key;
4108         struct cache_extent *cache;
4109         int reada_bits;
4110
4111         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
4112                                     bits_nr, &reada_bits);
4113         if (nritems == 0)
4114                 return 1;
4115
4116         if (!reada_bits) {
4117                 for(i = 0; i < nritems; i++) {
4118                         ret = add_cache_extent(reada, bits[i].start,
4119                                                bits[i].size);
4120                         if (ret == -EEXIST)
4121                                 continue;
4122
4123                         /* fixme, get the parent transid */
4124                         readahead_tree_block(root, bits[i].start,
4125                                              bits[i].size, 0);
4126                 }
4127         }
4128         *last = bits[0].start;
4129         bytenr = bits[0].start;
4130         size = bits[0].size;
4131
4132         cache = lookup_cache_extent(pending, bytenr, size);
4133         if (cache) {
4134                 remove_cache_extent(pending, cache);
4135                 free(cache);
4136         }
4137         cache = lookup_cache_extent(reada, bytenr, size);
4138         if (cache) {
4139                 remove_cache_extent(reada, cache);
4140                 free(cache);
4141         }
4142         cache = lookup_cache_extent(nodes, bytenr, size);
4143         if (cache) {
4144                 remove_cache_extent(nodes, cache);
4145                 free(cache);
4146         }
4147         cache = lookup_cache_extent(extent_cache, bytenr, size);
4148         if (cache) {
4149                 struct extent_record *rec;
4150
4151                 rec = container_of(cache, struct extent_record, cache);
4152                 gen = rec->parent_generation;
4153         }
4154
4155         /* fixme, get the real parent transid */
4156         buf = read_tree_block(root, bytenr, size, gen);
4157         if (!extent_buffer_uptodate(buf)) {
4158                 record_bad_block_io(root->fs_info,
4159                                     extent_cache, bytenr, size);
4160                 goto out;
4161         }
4162
4163         nritems = btrfs_header_nritems(buf);
4164
4165         /*
4166          * FIXME, this only works only if we don't have any full
4167          * backref mode.
4168          */
4169         if (!init_extent_tree) {
4170                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
4171                                        btrfs_header_level(buf), 1, NULL,
4172                                        &flags);
4173                 if (ret < 0)
4174                         flags = 0;
4175         } else {
4176                 flags = 0;
4177         }
4178
4179         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4180                 parent = bytenr;
4181                 owner = 0;
4182         } else {
4183                 parent = 0;
4184                 owner = btrfs_header_owner(buf);
4185         }
4186
4187         ret = check_block(trans, root, extent_cache, buf, flags);
4188         if (ret)
4189                 goto out;
4190
4191         if (btrfs_is_leaf(buf)) {
4192                 btree_space_waste += btrfs_leaf_free_space(root, buf);
4193                 for (i = 0; i < nritems; i++) {
4194                         struct btrfs_file_extent_item *fi;
4195                         btrfs_item_key_to_cpu(buf, &key, i);
4196                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
4197                                 process_extent_item(root, extent_cache, buf,
4198                                                     i);
4199                                 continue;
4200                         }
4201                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4202                                 process_extent_item(root, extent_cache, buf,
4203                                                     i);
4204                                 continue;
4205                         }
4206                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
4207                                 total_csum_bytes +=
4208                                         btrfs_item_size_nr(buf, i);
4209                                 continue;
4210                         }
4211                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
4212                                 process_chunk_item(chunk_cache, &key, buf, i);
4213                                 continue;
4214                         }
4215                         if (key.type == BTRFS_DEV_ITEM_KEY) {
4216                                 process_device_item(dev_cache, &key, buf, i);
4217                                 continue;
4218                         }
4219                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
4220                                 process_block_group_item(block_group_cache,
4221                                         &key, buf, i);
4222                                 continue;
4223                         }
4224                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
4225                                 process_device_extent_item(dev_extent_cache,
4226                                         &key, buf, i);
4227                                 continue;
4228
4229                         }
4230                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
4231 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4232                                 process_extent_ref_v0(extent_cache, buf, i);
4233 #else
4234                                 BUG();
4235 #endif
4236                                 continue;
4237                         }
4238
4239                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
4240                                 add_tree_backref(extent_cache, key.objectid, 0,
4241                                                  key.offset, 0);
4242                                 continue;
4243                         }
4244                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
4245                                 add_tree_backref(extent_cache, key.objectid,
4246                                                  key.offset, 0, 0);
4247                                 continue;
4248                         }
4249                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
4250                                 struct btrfs_extent_data_ref *ref;
4251                                 ref = btrfs_item_ptr(buf, i,
4252                                                 struct btrfs_extent_data_ref);
4253                                 add_data_backref(extent_cache,
4254                                         key.objectid, 0,
4255                                         btrfs_extent_data_ref_root(buf, ref),
4256                                         btrfs_extent_data_ref_objectid(buf,
4257                                                                        ref),
4258                                         btrfs_extent_data_ref_offset(buf, ref),
4259                                         btrfs_extent_data_ref_count(buf, ref),
4260                                         0, root->sectorsize);
4261                                 continue;
4262                         }
4263                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
4264                                 struct btrfs_shared_data_ref *ref;
4265                                 ref = btrfs_item_ptr(buf, i,
4266                                                 struct btrfs_shared_data_ref);
4267                                 add_data_backref(extent_cache,
4268                                         key.objectid, key.offset, 0, 0, 0,
4269                                         btrfs_shared_data_ref_count(buf, ref),
4270                                         0, root->sectorsize);
4271                                 continue;
4272                         }
4273                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
4274                                 struct bad_item *bad;
4275
4276                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
4277                                         continue;
4278                                 if (!owner)
4279                                         continue;
4280                                 bad = malloc(sizeof(struct bad_item));
4281                                 if (!bad)
4282                                         continue;
4283                                 INIT_LIST_HEAD(&bad->list);
4284                                 memcpy(&bad->key, &key,
4285                                        sizeof(struct btrfs_key));
4286                                 bad->root_id = owner;
4287                                 list_add_tail(&bad->list, &delete_items);
4288                                 continue;
4289                         }
4290                         if (key.type != BTRFS_EXTENT_DATA_KEY)
4291                                 continue;
4292                         fi = btrfs_item_ptr(buf, i,
4293                                             struct btrfs_file_extent_item);
4294                         if (btrfs_file_extent_type(buf, fi) ==
4295                             BTRFS_FILE_EXTENT_INLINE)
4296                                 continue;
4297                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
4298                                 continue;
4299
4300                         data_bytes_allocated +=
4301                                 btrfs_file_extent_disk_num_bytes(buf, fi);
4302                         if (data_bytes_allocated < root->sectorsize) {
4303                                 abort();
4304                         }
4305                         data_bytes_referenced +=
4306                                 btrfs_file_extent_num_bytes(buf, fi);
4307                         add_data_backref(extent_cache,
4308                                 btrfs_file_extent_disk_bytenr(buf, fi),
4309                                 parent, owner, key.objectid, key.offset -
4310                                 btrfs_file_extent_offset(buf, fi), 1, 1,
4311                                 btrfs_file_extent_disk_num_bytes(buf, fi));
4312                 }
4313         } else {
4314                 int level;
4315                 struct btrfs_key first_key;
4316
4317                 first_key.objectid = 0;
4318
4319                 if (nritems > 0)
4320                         btrfs_item_key_to_cpu(buf, &first_key, 0);
4321                 level = btrfs_header_level(buf);
4322                 for (i = 0; i < nritems; i++) {
4323                         ptr = btrfs_node_blockptr(buf, i);
4324                         size = btrfs_level_size(root, level - 1);
4325                         btrfs_node_key_to_cpu(buf, &key, i);
4326                         if (ri != NULL) {
4327                                 struct btrfs_key drop_key;
4328                                 btrfs_disk_key_to_cpu(&drop_key,
4329                                                       &ri->drop_progress);
4330                                 if ((level == ri->drop_level)
4331                                     && is_dropped_key(&key, &drop_key)) {
4332                                         continue;
4333                                 }
4334                         }
4335                         ret = add_extent_rec(extent_cache, &key,
4336                                              btrfs_node_ptr_generation(buf, i),
4337                                              ptr, size, 0, 0, 1, 0, 1, 0,
4338                                              size);
4339                         BUG_ON(ret);
4340
4341                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
4342
4343                         if (level > 1) {
4344                                 add_pending(nodes, seen, ptr, size);
4345                         } else {
4346                                 add_pending(pending, seen, ptr, size);
4347                         }
4348                 }
4349                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
4350                                       nritems) * sizeof(struct btrfs_key_ptr);
4351         }
4352         total_btree_bytes += buf->len;
4353         if (fs_root_objectid(btrfs_header_owner(buf)))
4354                 total_fs_tree_bytes += buf->len;
4355         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
4356                 total_extent_tree_bytes += buf->len;
4357         if (!found_old_backref &&
4358             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
4359             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
4360             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
4361                 found_old_backref = 1;
4362 out:
4363         free_extent_buffer(buf);
4364         return ret;
4365 }
4366
4367 static int add_root_to_pending(struct extent_buffer *buf,
4368                                struct cache_tree *extent_cache,
4369                                struct cache_tree *pending,
4370                                struct cache_tree *seen,
4371                                struct cache_tree *nodes,
4372                                struct btrfs_key *root_key)
4373 {
4374         if (btrfs_header_level(buf) > 0)
4375                 add_pending(nodes, seen, buf->start, buf->len);
4376         else
4377                 add_pending(pending, seen, buf->start, buf->len);
4378         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
4379                        0, 1, 1, 0, 1, 0, buf->len);
4380
4381         if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
4382             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
4383                 add_tree_backref(extent_cache, buf->start, buf->start,
4384                                  0, 1);
4385         else
4386                 add_tree_backref(extent_cache, buf->start, 0,
4387                                  root_key->objectid, 1);
4388         return 0;
4389 }
4390
4391 /* as we fix the tree, we might be deleting blocks that
4392  * we're tracking for repair.  This hook makes sure we
4393  * remove any backrefs for blocks as we are fixing them.
4394  */
4395 static int free_extent_hook(struct btrfs_trans_handle *trans,
4396                             struct btrfs_root *root,
4397                             u64 bytenr, u64 num_bytes, u64 parent,
4398                             u64 root_objectid, u64 owner, u64 offset,
4399                             int refs_to_drop)
4400 {
4401         struct extent_record *rec;
4402         struct cache_extent *cache;
4403         int is_data;
4404         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
4405
4406         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
4407         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
4408         if (!cache)
4409                 return 0;
4410
4411         rec = container_of(cache, struct extent_record, cache);
4412         if (is_data) {
4413                 struct data_backref *back;
4414                 back = find_data_backref(rec, parent, root_objectid, owner,
4415                                          offset, 1, bytenr, num_bytes);
4416                 if (!back)
4417                         goto out;
4418                 if (back->node.found_ref) {
4419                         back->found_ref -= refs_to_drop;
4420                         if (rec->refs)
4421                                 rec->refs -= refs_to_drop;
4422                 }
4423                 if (back->node.found_extent_tree) {
4424                         back->num_refs -= refs_to_drop;
4425                         if (rec->extent_item_refs)
4426                                 rec->extent_item_refs -= refs_to_drop;
4427                 }
4428                 if (back->found_ref == 0)
4429                         back->node.found_ref = 0;
4430                 if (back->num_refs == 0)
4431                         back->node.found_extent_tree = 0;
4432
4433                 if (!back->node.found_extent_tree && back->node.found_ref) {
4434                         list_del(&back->node.list);
4435                         free(back);
4436                 }
4437         } else {
4438                 struct tree_backref *back;
4439                 back = find_tree_backref(rec, parent, root_objectid);
4440                 if (!back)
4441                         goto out;
4442                 if (back->node.found_ref) {
4443                         if (rec->refs)
4444                                 rec->refs--;
4445                         back->node.found_ref = 0;
4446                 }
4447                 if (back->node.found_extent_tree) {
4448                         if (rec->extent_item_refs)
4449                                 rec->extent_item_refs--;
4450                         back->node.found_extent_tree = 0;
4451                 }
4452                 if (!back->node.found_extent_tree && back->node.found_ref) {
4453                         list_del(&back->node.list);
4454                         free(back);
4455                 }
4456         }
4457         maybe_free_extent_rec(extent_cache, rec);
4458 out:
4459         return 0;
4460 }
4461
4462 static int delete_extent_records(struct btrfs_trans_handle *trans,
4463                                  struct btrfs_root *root,
4464                                  struct btrfs_path *path,
4465                                  u64 bytenr, u64 new_len)
4466 {
4467         struct btrfs_key key;
4468         struct btrfs_key found_key;
4469         struct extent_buffer *leaf;
4470         int ret;
4471         int slot;
4472
4473
4474         key.objectid = bytenr;
4475         key.type = (u8)-1;
4476         key.offset = (u64)-1;
4477
4478         while(1) {
4479                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
4480                                         &key, path, 0, 1);
4481                 if (ret < 0)
4482                         break;
4483
4484                 if (ret > 0) {
4485                         ret = 0;
4486                         if (path->slots[0] == 0)
4487                                 break;
4488                         path->slots[0]--;
4489                 }
4490                 ret = 0;
4491
4492                 leaf = path->nodes[0];
4493                 slot = path->slots[0];
4494
4495                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4496                 if (found_key.objectid != bytenr)
4497                         break;
4498
4499                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
4500                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
4501                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4502                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
4503                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
4504                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
4505                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
4506                         btrfs_release_path(path);
4507                         if (found_key.type == 0) {
4508                                 if (found_key.offset == 0)
4509                                         break;
4510                                 key.offset = found_key.offset - 1;
4511                                 key.type = found_key.type;
4512                         }
4513                         key.type = found_key.type - 1;
4514                         key.offset = (u64)-1;
4515                         continue;
4516                 }
4517
4518                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
4519                         found_key.objectid, found_key.type, found_key.offset);
4520
4521                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
4522                 if (ret)
4523                         break;
4524                 btrfs_release_path(path);
4525
4526                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4527                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
4528                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
4529                                 found_key.offset : root->leafsize;
4530
4531                         ret = btrfs_update_block_group(trans, root, bytenr,
4532                                                        bytes, 0, 0);
4533                         if (ret)
4534                                 break;
4535                 }
4536         }
4537
4538         btrfs_release_path(path);
4539         return ret;
4540 }
4541
4542 /*
4543  * for a single backref, this will allocate a new extent
4544  * and add the backref to it.
4545  */
4546 static int record_extent(struct btrfs_trans_handle *trans,
4547                          struct btrfs_fs_info *info,
4548                          struct btrfs_path *path,
4549                          struct extent_record *rec,
4550                          struct extent_backref *back,
4551                          int allocated, u64 flags)
4552 {
4553         int ret;
4554         struct btrfs_root *extent_root = info->extent_root;
4555         struct extent_buffer *leaf;
4556         struct btrfs_key ins_key;
4557         struct btrfs_extent_item *ei;
4558         struct tree_backref *tback;
4559         struct data_backref *dback;
4560         struct btrfs_tree_block_info *bi;
4561
4562         if (!back->is_data)
4563                 rec->max_size = max_t(u64, rec->max_size,
4564                                     info->extent_root->leafsize);
4565
4566         if (!allocated) {
4567                 u32 item_size = sizeof(*ei);
4568
4569                 if (!back->is_data)
4570                         item_size += sizeof(*bi);
4571
4572                 ins_key.objectid = rec->start;
4573                 ins_key.offset = rec->max_size;
4574                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4575
4576                 ret = btrfs_insert_empty_item(trans, extent_root, path,
4577                                         &ins_key, item_size);
4578                 if (ret)
4579                         goto fail;
4580
4581                 leaf = path->nodes[0];
4582                 ei = btrfs_item_ptr(leaf, path->slots[0],
4583                                     struct btrfs_extent_item);
4584
4585                 btrfs_set_extent_refs(leaf, ei, 0);
4586                 btrfs_set_extent_generation(leaf, ei, rec->generation);
4587
4588                 if (back->is_data) {
4589                         btrfs_set_extent_flags(leaf, ei,
4590                                                BTRFS_EXTENT_FLAG_DATA);
4591                 } else {
4592                         struct btrfs_disk_key copy_key;;
4593
4594                         tback = (struct tree_backref *)back;
4595                         bi = (struct btrfs_tree_block_info *)(ei + 1);
4596                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
4597                                              sizeof(*bi));
4598
4599                         btrfs_set_disk_key_objectid(&copy_key,
4600                                                     rec->info_objectid);
4601                         btrfs_set_disk_key_type(&copy_key, 0);
4602                         btrfs_set_disk_key_offset(&copy_key, 0);
4603
4604                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
4605                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
4606
4607                         btrfs_set_extent_flags(leaf, ei,
4608                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
4609                 }
4610
4611                 btrfs_mark_buffer_dirty(leaf);
4612                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
4613                                                rec->max_size, 1, 0);
4614                 if (ret)
4615                         goto fail;
4616                 btrfs_release_path(path);
4617         }
4618
4619         if (back->is_data) {
4620                 u64 parent;
4621                 int i;
4622
4623                 dback = (struct data_backref *)back;
4624                 if (back->full_backref)
4625                         parent = dback->parent;
4626                 else
4627                         parent = 0;
4628
4629                 for (i = 0; i < dback->found_ref; i++) {
4630                         /* if parent != 0, we're doing a full backref
4631                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
4632                          * just makes the backref allocator create a data
4633                          * backref
4634                          */
4635                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
4636                                                    rec->start, rec->max_size,
4637                                                    parent,
4638                                                    dback->root,
4639                                                    parent ?
4640                                                    BTRFS_FIRST_FREE_OBJECTID :
4641                                                    dback->owner,
4642                                                    dback->offset);
4643                         if (ret)
4644                                 break;
4645                 }
4646                 fprintf(stderr, "adding new data backref"
4647                                 " on %llu %s %llu owner %llu"
4648                                 " offset %llu found %d\n",
4649                                 (unsigned long long)rec->start,
4650                                 back->full_backref ?
4651                                 "parent" : "root",
4652                                 back->full_backref ?
4653                                 (unsigned long long)parent :
4654                                 (unsigned long long)dback->root,
4655                                 (unsigned long long)dback->owner,
4656                                 (unsigned long long)dback->offset,
4657                                 dback->found_ref);
4658         } else {
4659                 u64 parent;
4660
4661                 tback = (struct tree_backref *)back;
4662                 if (back->full_backref)
4663                         parent = tback->parent;
4664                 else
4665                         parent = 0;
4666
4667                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
4668                                            rec->start, rec->max_size,
4669                                            parent, tback->root, 0, 0);
4670                 fprintf(stderr, "adding new tree backref on "
4671                         "start %llu len %llu parent %llu root %llu\n",
4672                         rec->start, rec->max_size, tback->parent, tback->root);
4673         }
4674         if (ret)
4675                 goto fail;
4676 fail:
4677         btrfs_release_path(path);
4678         return ret;
4679 }
4680
4681 struct extent_entry {
4682         u64 bytenr;
4683         u64 bytes;
4684         int count;
4685         int broken;
4686         struct list_head list;
4687 };
4688
4689 static struct extent_entry *find_entry(struct list_head *entries,
4690                                        u64 bytenr, u64 bytes)
4691 {
4692         struct extent_entry *entry = NULL;
4693
4694         list_for_each_entry(entry, entries, list) {
4695                 if (entry->bytenr == bytenr && entry->bytes == bytes)
4696                         return entry;
4697         }
4698
4699         return NULL;
4700 }
4701
4702 static struct extent_entry *find_most_right_entry(struct list_head *entries)
4703 {
4704         struct extent_entry *entry, *best = NULL, *prev = NULL;
4705
4706         list_for_each_entry(entry, entries, list) {
4707                 if (!prev) {
4708                         prev = entry;
4709                         continue;
4710                 }
4711
4712                 /*
4713                  * If there are as many broken entries as entries then we know
4714                  * not to trust this particular entry.
4715                  */
4716                 if (entry->broken == entry->count)
4717                         continue;
4718
4719                 /*
4720                  * If our current entry == best then we can't be sure our best
4721                  * is really the best, so we need to keep searching.
4722                  */
4723                 if (best && best->count == entry->count) {
4724                         prev = entry;
4725                         best = NULL;
4726                         continue;
4727                 }
4728
4729                 /* Prev == entry, not good enough, have to keep searching */
4730                 if (!prev->broken && prev->count == entry->count)
4731                         continue;
4732
4733                 if (!best)
4734                         best = (prev->count > entry->count) ? prev : entry;
4735                 else if (best->count < entry->count)
4736                         best = entry;
4737                 prev = entry;
4738         }
4739
4740         return best;
4741 }
4742
4743 static int repair_ref(struct btrfs_trans_handle *trans,
4744                       struct btrfs_fs_info *info, struct btrfs_path *path,
4745                       struct data_backref *dback, struct extent_entry *entry)
4746 {
4747         struct btrfs_root *root;
4748         struct btrfs_file_extent_item *fi;
4749         struct extent_buffer *leaf;
4750         struct btrfs_key key;
4751         u64 bytenr, bytes;
4752         int ret;
4753
4754         key.objectid = dback->root;
4755         key.type = BTRFS_ROOT_ITEM_KEY;
4756         key.offset = (u64)-1;
4757         root = btrfs_read_fs_root(info, &key);
4758         if (IS_ERR(root)) {
4759                 fprintf(stderr, "Couldn't find root for our ref\n");
4760                 return -EINVAL;
4761         }
4762
4763         /*
4764          * The backref points to the original offset of the extent if it was
4765          * split, so we need to search down to the offset we have and then walk
4766          * forward until we find the backref we're looking for.
4767          */
4768         key.objectid = dback->owner;
4769         key.type = BTRFS_EXTENT_DATA_KEY;
4770         key.offset = dback->offset;
4771         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4772         if (ret < 0) {
4773                 fprintf(stderr, "Error looking up ref %d\n", ret);
4774                 return ret;
4775         }
4776
4777         while (1) {
4778                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4779                         ret = btrfs_next_leaf(root, path);
4780                         if (ret) {
4781                                 fprintf(stderr, "Couldn't find our ref, next\n");
4782                                 return -EINVAL;
4783                         }
4784                 }
4785                 leaf = path->nodes[0];
4786                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4787                 if (key.objectid != dback->owner ||
4788                     key.type != BTRFS_EXTENT_DATA_KEY) {
4789                         fprintf(stderr, "Couldn't find our ref, search\n");
4790                         return -EINVAL;
4791                 }
4792                 fi = btrfs_item_ptr(leaf, path->slots[0],
4793                                     struct btrfs_file_extent_item);
4794                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4795                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4796
4797                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
4798                         break;
4799                 path->slots[0]++;
4800         }
4801
4802         btrfs_release_path(path);
4803
4804         /*
4805          * Have to make sure that this root gets updated when we commit the
4806          * transaction
4807          */
4808         record_root_in_trans(trans, root);
4809
4810         /*
4811          * Ok we have the key of the file extent we want to fix, now we can cow
4812          * down to the thing and fix it.
4813          */
4814         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4815         if (ret < 0) {
4816                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
4817                         key.objectid, key.type, key.offset, ret);
4818                 return ret;
4819         }
4820         if (ret > 0) {
4821                 fprintf(stderr, "Well that's odd, we just found this key "
4822                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
4823                         key.offset);
4824                 return -EINVAL;
4825         }
4826         leaf = path->nodes[0];
4827         fi = btrfs_item_ptr(leaf, path->slots[0],
4828                             struct btrfs_file_extent_item);
4829
4830         if (btrfs_file_extent_compression(leaf, fi) &&
4831             dback->disk_bytenr != entry->bytenr) {
4832                 fprintf(stderr, "Ref doesn't match the record start and is "
4833                         "compressed, please take a btrfs-image of this file "
4834                         "system and send it to a btrfs developer so they can "
4835                         "complete this functionality for bytenr %Lu\n",
4836                         dback->disk_bytenr);
4837                 return -EINVAL;
4838         }
4839
4840         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
4841                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4842         } else if (dback->disk_bytenr > entry->bytenr) {
4843                 u64 off_diff, offset;
4844
4845                 off_diff = dback->disk_bytenr - entry->bytenr;
4846                 offset = btrfs_file_extent_offset(leaf, fi);
4847                 if (dback->disk_bytenr + offset +
4848                     btrfs_file_extent_num_bytes(leaf, fi) >
4849                     entry->bytenr + entry->bytes) {
4850                         fprintf(stderr, "Ref is past the entry end, please "
4851                                 "take a btrfs-image of this file system and "
4852                                 "send it to a btrfs developer, ref %Lu\n",
4853                                 dback->disk_bytenr);
4854                         return -EINVAL;
4855                 }
4856                 offset += off_diff;
4857                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4858                 btrfs_set_file_extent_offset(leaf, fi, offset);
4859         } else if (dback->disk_bytenr < entry->bytenr) {
4860                 u64 offset;
4861
4862                 offset = btrfs_file_extent_offset(leaf, fi);
4863                 if (dback->disk_bytenr + offset < entry->bytenr) {
4864                         fprintf(stderr, "Ref is before the entry start, please"
4865                                 " take a btrfs-image of this file system and "
4866                                 "send it to a btrfs developer, ref %Lu\n",
4867                                 dback->disk_bytenr);
4868                         return -EINVAL;
4869                 }
4870
4871                 offset += dback->disk_bytenr;
4872                 offset -= entry->bytenr;
4873                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
4874                 btrfs_set_file_extent_offset(leaf, fi, offset);
4875         }
4876
4877         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
4878
4879         /*
4880          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
4881          * only do this if we aren't using compression, otherwise it's a
4882          * trickier case.
4883          */
4884         if (!btrfs_file_extent_compression(leaf, fi))
4885                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
4886         else
4887                 printf("ram bytes may be wrong?\n");
4888         btrfs_mark_buffer_dirty(leaf);
4889         btrfs_release_path(path);
4890         return 0;
4891 }
4892
4893 static int verify_backrefs(struct btrfs_trans_handle *trans,
4894                            struct btrfs_fs_info *info, struct btrfs_path *path,
4895                            struct extent_record *rec)
4896 {
4897         struct extent_backref *back;
4898         struct data_backref *dback;
4899         struct extent_entry *entry, *best = NULL;
4900         LIST_HEAD(entries);
4901         int nr_entries = 0;
4902         int broken_entries = 0;
4903         int ret = 0;
4904         short mismatch = 0;
4905
4906         /*
4907          * Metadata is easy and the backrefs should always agree on bytenr and
4908          * size, if not we've got bigger issues.
4909          */
4910         if (rec->metadata)
4911                 return 0;
4912
4913         list_for_each_entry(back, &rec->backrefs, list) {
4914                 if (back->full_backref || !back->is_data)
4915                         continue;
4916
4917                 dback = (struct data_backref *)back;
4918
4919                 /*
4920                  * We only pay attention to backrefs that we found a real
4921                  * backref for.
4922                  */
4923                 if (dback->found_ref == 0)
4924                         continue;
4925
4926                 /*
4927                  * For now we only catch when the bytes don't match, not the
4928                  * bytenr.  We can easily do this at the same time, but I want
4929                  * to have a fs image to test on before we just add repair
4930                  * functionality willy-nilly so we know we won't screw up the
4931                  * repair.
4932                  */
4933
4934                 entry = find_entry(&entries, dback->disk_bytenr,
4935                                    dback->bytes);
4936                 if (!entry) {
4937                         entry = malloc(sizeof(struct extent_entry));
4938                         if (!entry) {
4939                                 ret = -ENOMEM;
4940                                 goto out;
4941                         }
4942                         memset(entry, 0, sizeof(*entry));
4943                         entry->bytenr = dback->disk_bytenr;
4944                         entry->bytes = dback->bytes;
4945                         list_add_tail(&entry->list, &entries);
4946                         nr_entries++;
4947                 }
4948
4949                 /*
4950                  * If we only have on entry we may think the entries agree when
4951                  * in reality they don't so we have to do some extra checking.
4952                  */
4953                 if (dback->disk_bytenr != rec->start ||
4954                     dback->bytes != rec->nr || back->broken)
4955                         mismatch = 1;
4956
4957                 if (back->broken) {
4958                         entry->broken++;
4959                         broken_entries++;
4960                 }
4961
4962                 entry->count++;
4963         }
4964
4965         /* Yay all the backrefs agree, carry on good sir */
4966         if (nr_entries <= 1 && !mismatch)
4967                 goto out;
4968
4969         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
4970                 "%Lu\n", rec->start);
4971
4972         /*
4973          * First we want to see if the backrefs can agree amongst themselves who
4974          * is right, so figure out which one of the entries has the highest
4975          * count.
4976          */
4977         best = find_most_right_entry(&entries);
4978
4979         /*
4980          * Ok so we may have an even split between what the backrefs think, so
4981          * this is where we use the extent ref to see what it thinks.
4982          */
4983         if (!best) {
4984                 entry = find_entry(&entries, rec->start, rec->nr);
4985                 if (!entry && (!broken_entries || !rec->found_rec)) {
4986                         fprintf(stderr, "Backrefs don't agree with each other "
4987                                 "and extent record doesn't agree with anybody,"
4988                                 " so we can't fix bytenr %Lu bytes %Lu\n",
4989                                 rec->start, rec->nr);
4990                         ret = -EINVAL;
4991                         goto out;
4992                 } else if (!entry) {
4993                         /*
4994                          * Ok our backrefs were broken, we'll assume this is the
4995                          * correct value and add an entry for this range.
4996                          */
4997                         entry = malloc(sizeof(struct extent_entry));
4998                         if (!entry) {
4999                                 ret = -ENOMEM;
5000                                 goto out;
5001                         }
5002                         memset(entry, 0, sizeof(*entry));
5003                         entry->bytenr = rec->start;
5004                         entry->bytes = rec->nr;
5005                         list_add_tail(&entry->list, &entries);
5006                         nr_entries++;
5007                 }
5008                 entry->count++;
5009                 best = find_most_right_entry(&entries);
5010                 if (!best) {
5011                         fprintf(stderr, "Backrefs and extent record evenly "
5012                                 "split on who is right, this is going to "
5013                                 "require user input to fix bytenr %Lu bytes "
5014                                 "%Lu\n", rec->start, rec->nr);
5015                         ret = -EINVAL;
5016                         goto out;
5017                 }
5018         }
5019
5020         /*
5021          * I don't think this can happen currently as we'll abort() if we catch
5022          * this case higher up, but in case somebody removes that we still can't
5023          * deal with it properly here yet, so just bail out of that's the case.
5024          */
5025         if (best->bytenr != rec->start) {
5026                 fprintf(stderr, "Extent start and backref starts don't match, "
5027                         "please use btrfs-image on this file system and send "
5028                         "it to a btrfs developer so they can make fsck fix "
5029                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
5030                         rec->start, rec->nr);
5031                 ret = -EINVAL;
5032                 goto out;
5033         }
5034
5035         /*
5036          * Ok great we all agreed on an extent record, let's go find the real
5037          * references and fix up the ones that don't match.
5038          */
5039         list_for_each_entry(back, &rec->backrefs, list) {
5040                 if (back->full_backref || !back->is_data)
5041                         continue;
5042
5043                 dback = (struct data_backref *)back;
5044
5045                 /*
5046                  * Still ignoring backrefs that don't have a real ref attached
5047                  * to them.
5048                  */
5049                 if (dback->found_ref == 0)
5050                         continue;
5051
5052                 if (dback->bytes == best->bytes &&
5053                     dback->disk_bytenr == best->bytenr)
5054                         continue;
5055
5056                 ret = repair_ref(trans, info, path, dback, best);
5057                 if (ret)
5058                         goto out;
5059         }
5060
5061         /*
5062          * Ok we messed with the actual refs, which means we need to drop our
5063          * entire cache and go back and rescan.  I know this is a huge pain and
5064          * adds a lot of extra work, but it's the only way to be safe.  Once all
5065          * the backrefs agree we may not need to do anything to the extent
5066          * record itself.
5067          */
5068         ret = -EAGAIN;
5069 out:
5070         while (!list_empty(&entries)) {
5071                 entry = list_entry(entries.next, struct extent_entry, list);
5072                 list_del_init(&entry->list);
5073                 free(entry);
5074         }
5075         return ret;
5076 }
5077
5078 static int process_duplicates(struct btrfs_root *root,
5079                               struct cache_tree *extent_cache,
5080                               struct extent_record *rec)
5081 {
5082         struct extent_record *good, *tmp;
5083         struct cache_extent *cache;
5084         int ret;
5085
5086         /*
5087          * If we found a extent record for this extent then return, or if we
5088          * have more than one duplicate we are likely going to need to delete
5089          * something.
5090          */
5091         if (rec->found_rec || rec->num_duplicates > 1)
5092                 return 0;
5093
5094         /* Shouldn't happen but just in case */
5095         BUG_ON(!rec->num_duplicates);
5096
5097         /*
5098          * So this happens if we end up with a backref that doesn't match the
5099          * actual extent entry.  So either the backref is bad or the extent
5100          * entry is bad.  Either way we want to have the extent_record actually
5101          * reflect what we found in the extent_tree, so we need to take the
5102          * duplicate out and use that as the extent_record since the only way we
5103          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
5104          */
5105         remove_cache_extent(extent_cache, &rec->cache);
5106
5107         good = list_entry(rec->dups.next, struct extent_record, list);
5108         list_del_init(&good->list);
5109         INIT_LIST_HEAD(&good->backrefs);
5110         INIT_LIST_HEAD(&good->dups);
5111         good->cache.start = good->start;
5112         good->cache.size = good->nr;
5113         good->content_checked = 0;
5114         good->owner_ref_checked = 0;
5115         good->num_duplicates = 0;
5116         good->refs = rec->refs;
5117         list_splice_init(&rec->backrefs, &good->backrefs);
5118         while (1) {
5119                 cache = lookup_cache_extent(extent_cache, good->start,
5120                                             good->nr);
5121                 if (!cache)
5122                         break;
5123                 tmp = container_of(cache, struct extent_record, cache);
5124
5125                 /*
5126                  * If we find another overlapping extent and it's found_rec is
5127                  * set then it's a duplicate and we need to try and delete
5128                  * something.
5129                  */
5130                 if (tmp->found_rec || tmp->num_duplicates > 0) {
5131                         if (list_empty(&good->list))
5132                                 list_add_tail(&good->list,
5133                                               &duplicate_extents);
5134                         good->num_duplicates += tmp->num_duplicates + 1;
5135                         list_splice_init(&tmp->dups, &good->dups);
5136                         list_del_init(&tmp->list);
5137                         list_add_tail(&tmp->list, &good->dups);
5138                         remove_cache_extent(extent_cache, &tmp->cache);
5139                         continue;
5140                 }
5141
5142                 /*
5143                  * Ok we have another non extent item backed extent rec, so lets
5144                  * just add it to this extent and carry on like we did above.
5145                  */
5146                 good->refs += tmp->refs;
5147                 list_splice_init(&tmp->backrefs, &good->backrefs);
5148                 remove_cache_extent(extent_cache, &tmp->cache);
5149                 free(tmp);
5150         }
5151         ret = insert_cache_extent(extent_cache, &good->cache);
5152         BUG_ON(ret);
5153         free(rec);
5154         return good->num_duplicates ? 0 : 1;
5155 }
5156
5157 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
5158                                     struct btrfs_root *root,
5159                                     struct extent_record *rec)
5160 {
5161         LIST_HEAD(delete_list);
5162         struct btrfs_path *path;
5163         struct extent_record *tmp, *good, *n;
5164         int nr_del = 0;
5165         int ret = 0;
5166         struct btrfs_key key;
5167
5168         path = btrfs_alloc_path();
5169         if (!path) {
5170                 ret = -ENOMEM;
5171                 goto out;
5172         }
5173
5174         good = rec;
5175         /* Find the record that covers all of the duplicates. */
5176         list_for_each_entry(tmp, &rec->dups, list) {
5177                 if (good->start < tmp->start)
5178                         continue;
5179                 if (good->nr > tmp->nr)
5180                         continue;
5181
5182                 if (tmp->start + tmp->nr < good->start + good->nr) {
5183                         fprintf(stderr, "Ok we have overlapping extents that "
5184                                 "aren't completely covered by eachother, this "
5185                                 "is going to require more careful thought.  "
5186                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
5187                                 tmp->start, tmp->nr, good->start, good->nr);
5188                         abort();
5189                 }
5190                 good = tmp;
5191         }
5192
5193         if (good != rec)
5194                 list_add_tail(&rec->list, &delete_list);
5195
5196         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
5197                 if (tmp == good)
5198                         continue;
5199                 list_move_tail(&tmp->list, &delete_list);
5200         }
5201
5202         root = root->fs_info->extent_root;
5203         list_for_each_entry(tmp, &delete_list, list) {
5204                 if (tmp->found_rec == 0)
5205                         continue;
5206                 key.objectid = tmp->start;
5207                 key.type = BTRFS_EXTENT_ITEM_KEY;
5208                 key.offset = tmp->nr;
5209
5210                 /* Shouldn't happen but just in case */
5211                 if (tmp->metadata) {
5212                         fprintf(stderr, "Well this shouldn't happen, extent "
5213                                 "record overlaps but is metadata? "
5214                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
5215                         abort();
5216                 }
5217
5218                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5219                 if (ret) {
5220                         if (ret > 0)
5221                                 ret = -EINVAL;
5222                         goto out;
5223                 }
5224                 ret = btrfs_del_item(trans, root, path);
5225                 if (ret)
5226                         goto out;
5227                 btrfs_release_path(path);
5228                 nr_del++;
5229         }
5230
5231 out:
5232         while (!list_empty(&delete_list)) {
5233                 tmp = list_entry(delete_list.next, struct extent_record, list);
5234                 list_del_init(&tmp->list);
5235                 if (tmp == rec)
5236                         continue;
5237                 free(tmp);
5238         }
5239
5240         while (!list_empty(&rec->dups)) {
5241                 tmp = list_entry(rec->dups.next, struct extent_record, list);
5242                 list_del_init(&tmp->list);
5243                 free(tmp);
5244         }
5245
5246         btrfs_free_path(path);
5247
5248         if (!ret && !nr_del)
5249                 rec->num_duplicates = 0;
5250
5251         return ret ? ret : nr_del;
5252 }
5253
5254 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
5255                                   struct btrfs_fs_info *info,
5256                                   struct btrfs_path *path,
5257                                   struct cache_tree *extent_cache,
5258                                   struct extent_record *rec)
5259 {
5260         struct btrfs_root *root;
5261         struct extent_backref *back;
5262         struct data_backref *dback;
5263         struct cache_extent *cache;
5264         struct btrfs_file_extent_item *fi;
5265         struct btrfs_key key;
5266         u64 bytenr, bytes;
5267         int ret;
5268
5269         list_for_each_entry(back, &rec->backrefs, list) {
5270                 /* Don't care about full backrefs (poor unloved backrefs) */
5271                 if (back->full_backref || !back->is_data)
5272                         continue;
5273
5274                 dback = (struct data_backref *)back;
5275
5276                 /* We found this one, we don't need to do a lookup */
5277                 if (dback->found_ref)
5278                         continue;
5279
5280                 key.objectid = dback->root;
5281                 key.type = BTRFS_ROOT_ITEM_KEY;
5282                 key.offset = (u64)-1;
5283
5284                 root = btrfs_read_fs_root(info, &key);
5285
5286                 /* No root, definitely a bad ref, skip */
5287                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
5288                         continue;
5289                 /* Other err, exit */
5290                 if (IS_ERR(root))
5291                         return PTR_ERR(root);
5292
5293                 key.objectid = dback->owner;
5294                 key.type = BTRFS_EXTENT_DATA_KEY;
5295                 key.offset = dback->offset;
5296                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5297                 if (ret) {
5298                         btrfs_release_path(path);
5299                         if (ret < 0)
5300                                 return ret;
5301                         /* Didn't find it, we can carry on */
5302                         ret = 0;
5303                         continue;
5304                 }
5305
5306                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5307                                     struct btrfs_file_extent_item);
5308                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
5309                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
5310                 btrfs_release_path(path);
5311                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5312                 if (cache) {
5313                         struct extent_record *tmp;
5314                         tmp = container_of(cache, struct extent_record, cache);
5315
5316                         /*
5317                          * If we found an extent record for the bytenr for this
5318                          * particular backref then we can't add it to our
5319                          * current extent record.  We only want to add backrefs
5320                          * that don't have a corresponding extent item in the
5321                          * extent tree since they likely belong to this record
5322                          * and we need to fix it if it doesn't match bytenrs.
5323                          */
5324                         if  (tmp->found_rec)
5325                                 continue;
5326                 }
5327
5328                 dback->found_ref += 1;
5329                 dback->disk_bytenr = bytenr;
5330                 dback->bytes = bytes;
5331
5332                 /*
5333                  * Set this so the verify backref code knows not to trust the
5334                  * values in this backref.
5335                  */
5336                 back->broken = 1;
5337         }
5338
5339         return 0;
5340 }
5341
5342 /*
5343  * when an incorrect extent item is found, this will delete
5344  * all of the existing entries for it and recreate them
5345  * based on what the tree scan found.
5346  */
5347 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
5348                              struct btrfs_fs_info *info,
5349                              struct cache_tree *extent_cache,
5350                              struct extent_record *rec)
5351 {
5352         int ret;
5353         struct btrfs_path *path;
5354         struct list_head *cur = rec->backrefs.next;
5355         struct cache_extent *cache;
5356         struct extent_backref *back;
5357         int allocated = 0;
5358         u64 flags = 0;
5359
5360         /*
5361          * remember our flags for recreating the extent.
5362          * FIXME, if we have cleared extent tree, we can not
5363          * lookup extent info in extent tree.
5364          */
5365         if (!init_extent_tree) {
5366                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
5367                                         rec->start, rec->max_size,
5368                                         rec->metadata, NULL, &flags);
5369                 if (ret < 0)
5370                         flags = 0;
5371         } else {
5372                 flags = 0;
5373         }
5374
5375         path = btrfs_alloc_path();
5376         if (!path)
5377                 return -ENOMEM;
5378
5379         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
5380                 /*
5381                  * Sometimes the backrefs themselves are so broken they don't
5382                  * get attached to any meaningful rec, so first go back and
5383                  * check any of our backrefs that we couldn't find and throw
5384                  * them into the list if we find the backref so that
5385                  * verify_backrefs can figure out what to do.
5386                  */
5387                 ret = find_possible_backrefs(trans, info, path, extent_cache,
5388                                              rec);
5389                 if (ret < 0)
5390                         goto out;
5391         }
5392
5393         /* step one, make sure all of the backrefs agree */
5394         ret = verify_backrefs(trans, info, path, rec);
5395         if (ret < 0)
5396                 goto out;
5397
5398         /* step two, delete all the existing records */
5399         ret = delete_extent_records(trans, info->extent_root, path,
5400                                     rec->start, rec->max_size);
5401
5402         if (ret < 0)
5403                 goto out;
5404
5405         /* was this block corrupt?  If so, don't add references to it */
5406         cache = lookup_cache_extent(info->corrupt_blocks,
5407                                     rec->start, rec->max_size);
5408         if (cache) {
5409                 ret = 0;
5410                 goto out;
5411         }
5412
5413         /* step three, recreate all the refs we did find */
5414         while(cur != &rec->backrefs) {
5415                 back = list_entry(cur, struct extent_backref, list);
5416                 cur = cur->next;
5417
5418                 /*
5419                  * if we didn't find any references, don't create a
5420                  * new extent record
5421                  */
5422                 if (!back->found_ref)
5423                         continue;
5424
5425                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
5426                 allocated = 1;
5427
5428                 if (ret)
5429                         goto out;
5430         }
5431 out:
5432         btrfs_free_path(path);
5433         return ret;
5434 }
5435
5436 /* right now we only prune from the extent allocation tree */
5437 static int prune_one_block(struct btrfs_trans_handle *trans,
5438                            struct btrfs_fs_info *info,
5439                            struct btrfs_corrupt_block *corrupt)
5440 {
5441         int ret;
5442         struct btrfs_path path;
5443         struct extent_buffer *eb;
5444         u64 found;
5445         int slot;
5446         int nritems;
5447         int level = corrupt->level + 1;
5448
5449         btrfs_init_path(&path);
5450 again:
5451         /* we want to stop at the parent to our busted block */
5452         path.lowest_level = level;
5453
5454         ret = btrfs_search_slot(trans, info->extent_root,
5455                                 &corrupt->key, &path, -1, 1);
5456
5457         if (ret < 0)
5458                 goto out;
5459
5460         eb = path.nodes[level];
5461         if (!eb) {
5462                 ret = -ENOENT;
5463                 goto out;
5464         }
5465
5466         /*
5467          * hopefully the search gave us the block we want to prune,
5468          * lets try that first
5469          */
5470         slot = path.slots[level];
5471         found =  btrfs_node_blockptr(eb, slot);
5472         if (found == corrupt->cache.start)
5473                 goto del_ptr;
5474
5475         nritems = btrfs_header_nritems(eb);
5476
5477         /* the search failed, lets scan this node and hope we find it */
5478         for (slot = 0; slot < nritems; slot++) {
5479                 found =  btrfs_node_blockptr(eb, slot);
5480                 if (found == corrupt->cache.start)
5481                         goto del_ptr;
5482         }
5483         /*
5484          * we couldn't find the bad block.  TODO, search all the nodes for pointers
5485          * to this block
5486          */
5487         if (eb == info->extent_root->node) {
5488                 ret = -ENOENT;
5489                 goto out;
5490         } else {
5491                 level++;
5492                 btrfs_release_path(&path);
5493                 goto again;
5494         }
5495
5496 del_ptr:
5497         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
5498         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
5499
5500 out:
5501         btrfs_release_path(&path);
5502         return ret;
5503 }
5504
5505 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
5506                                 struct btrfs_fs_info *info)
5507 {
5508         struct cache_extent *cache;
5509         struct btrfs_corrupt_block *corrupt;
5510
5511         cache = search_cache_extent(info->corrupt_blocks, 0);
5512         while (1) {
5513                 if (!cache)
5514                         break;
5515                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5516                 prune_one_block(trans, info, corrupt);
5517                 cache = next_cache_extent(cache);
5518         }
5519         return 0;
5520 }
5521
5522 static void free_corrupt_block(struct cache_extent *cache)
5523 {
5524         struct btrfs_corrupt_block *corrupt;
5525
5526         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
5527         free(corrupt);
5528 }
5529
5530 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
5531
5532 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
5533 {
5534         struct btrfs_block_group_cache *cache;
5535         u64 start, end;
5536         int ret;
5537
5538         while (1) {
5539                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
5540                                             &start, &end, EXTENT_DIRTY);
5541                 if (ret)
5542                         break;
5543                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
5544                                    GFP_NOFS);
5545         }
5546
5547         start = 0;
5548         while (1) {
5549                 cache = btrfs_lookup_first_block_group(fs_info, start);
5550                 if (!cache)
5551                         break;
5552                 if (cache->cached)
5553                         cache->cached = 0;
5554                 start = cache->key.objectid + cache->key.offset;
5555         }
5556 }
5557
5558 static int check_extent_refs(struct btrfs_trans_handle *trans,
5559                              struct btrfs_root *root,
5560                              struct cache_tree *extent_cache)
5561 {
5562         struct extent_record *rec;
5563         struct cache_extent *cache;
5564         int err = 0;
5565         int ret = 0;
5566         int fixed = 0;
5567         int had_dups = 0;
5568
5569         if (repair) {
5570                 /*
5571                  * if we're doing a repair, we have to make sure
5572                  * we don't allocate from the problem extents.
5573                  * In the worst case, this will be all the
5574                  * extents in the FS
5575                  */
5576                 cache = search_cache_extent(extent_cache, 0);
5577                 while(cache) {
5578                         rec = container_of(cache, struct extent_record, cache);
5579                         btrfs_pin_extent(root->fs_info,
5580                                          rec->start, rec->max_size);
5581                         cache = next_cache_extent(cache);
5582                 }
5583
5584                 /* pin down all the corrupted blocks too */
5585                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
5586                 while(cache) {
5587                         btrfs_pin_extent(root->fs_info,
5588                                          cache->start, cache->size);
5589                         cache = next_cache_extent(cache);
5590                 }
5591                 prune_corrupt_blocks(trans, root->fs_info);
5592                 reset_cached_block_groups(root->fs_info);
5593         }
5594
5595         /*
5596          * We need to delete any duplicate entries we find first otherwise we
5597          * could mess up the extent tree when we have backrefs that actually
5598          * belong to a different extent item and not the weird duplicate one.
5599          */
5600         while (repair && !list_empty(&duplicate_extents)) {
5601                 rec = list_entry(duplicate_extents.next, struct extent_record,
5602                                  list);
5603                 list_del_init(&rec->list);
5604
5605                 /* Sometimes we can find a backref before we find an actual
5606                  * extent, so we need to process it a little bit to see if there
5607                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
5608                  * if this is a backref screwup.  If we need to delete stuff
5609                  * process_duplicates() will return 0, otherwise it will return
5610                  * 1 and we
5611                  */
5612                 if (process_duplicates(root, extent_cache, rec))
5613                         continue;
5614                 ret = delete_duplicate_records(trans, root, rec);
5615                 if (ret < 0)
5616                         return ret;
5617                 /*
5618                  * delete_duplicate_records will return the number of entries
5619                  * deleted, so if it's greater than 0 then we know we actually
5620                  * did something and we need to remove.
5621                  */
5622                 if (ret)
5623                         had_dups = 1;
5624         }
5625
5626         if (had_dups)
5627                 return -EAGAIN;
5628
5629         while(1) {
5630                 fixed = 0;
5631                 cache = search_cache_extent(extent_cache, 0);
5632                 if (!cache)
5633                         break;
5634                 rec = container_of(cache, struct extent_record, cache);
5635                 if (rec->num_duplicates) {
5636                         fprintf(stderr, "extent item %llu has multiple extent "
5637                                 "items\n", (unsigned long long)rec->start);
5638                         err = 1;
5639                 }
5640
5641                 if (rec->refs != rec->extent_item_refs) {
5642                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
5643                                 (unsigned long long)rec->start,
5644                                 (unsigned long long)rec->nr);
5645                         fprintf(stderr, "extent item %llu, found %llu\n",
5646                                 (unsigned long long)rec->extent_item_refs,
5647                                 (unsigned long long)rec->refs);
5648                         if (!fixed && repair) {
5649                                 ret = fixup_extent_refs(trans, root->fs_info,
5650                                                         extent_cache, rec);
5651                                 if (ret)
5652                                         goto repair_abort;
5653                                 fixed = 1;
5654                         }
5655                         err = 1;
5656
5657                 }
5658                 if (all_backpointers_checked(rec, 1)) {
5659                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
5660                                 (unsigned long long)rec->start,
5661                                 (unsigned long long)rec->nr);
5662
5663                         if (!fixed && repair) {
5664                                 ret = fixup_extent_refs(trans, root->fs_info,
5665                                                         extent_cache, rec);
5666                                 if (ret)
5667                                         goto repair_abort;
5668                                 fixed = 1;
5669                         }
5670
5671                         err = 1;
5672                 }
5673                 if (!rec->owner_ref_checked) {
5674                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
5675                                 (unsigned long long)rec->start,
5676                                 (unsigned long long)rec->nr);
5677                         if (!fixed && repair) {
5678                                 ret = fixup_extent_refs(trans, root->fs_info,
5679                                                         extent_cache, rec);
5680                                 if (ret)
5681                                         goto repair_abort;
5682                                 fixed = 1;
5683                         }
5684                         err = 1;
5685                 }
5686
5687                 remove_cache_extent(extent_cache, cache);
5688                 free_all_extent_backrefs(rec);
5689                 free(rec);
5690         }
5691 repair_abort:
5692         if (repair) {
5693                 if (ret && ret != -EAGAIN) {
5694                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
5695                         exit(1);
5696                 } else if (!ret) {
5697                         btrfs_fix_block_accounting(trans, root);
5698                 }
5699                 if (err)
5700                         fprintf(stderr, "repaired damaged extent references\n");
5701                 return ret;
5702         }
5703         return err;
5704 }
5705
5706 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
5707 {
5708         u64 stripe_size;
5709
5710         if (type & BTRFS_BLOCK_GROUP_RAID0) {
5711                 stripe_size = length;
5712                 stripe_size /= num_stripes;
5713         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
5714                 stripe_size = length * 2;
5715                 stripe_size /= num_stripes;
5716         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
5717                 stripe_size = length;
5718                 stripe_size /= (num_stripes - 1);
5719         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
5720                 stripe_size = length;
5721                 stripe_size /= (num_stripes - 2);
5722         } else {
5723                 stripe_size = length;
5724         }
5725         return stripe_size;
5726 }
5727
5728 static int check_chunk_refs(struct chunk_record *chunk_rec,
5729                             struct block_group_tree *block_group_cache,
5730                             struct device_extent_tree *dev_extent_cache,
5731                             int silent)
5732 {
5733         struct cache_extent *block_group_item;
5734         struct block_group_record *block_group_rec;
5735         struct cache_extent *dev_extent_item;
5736         struct device_extent_record *dev_extent_rec;
5737         u64 devid;
5738         u64 offset;
5739         u64 length;
5740         int i;
5741         int ret = 0;
5742
5743         block_group_item = lookup_cache_extent(&block_group_cache->tree,
5744                                                chunk_rec->offset,
5745                                                chunk_rec->length);
5746         if (block_group_item) {
5747                 block_group_rec = container_of(block_group_item,
5748                                                struct block_group_record,
5749                                                cache);
5750                 if (chunk_rec->length != block_group_rec->offset ||
5751                     chunk_rec->offset != block_group_rec->objectid ||
5752                     chunk_rec->type_flags != block_group_rec->flags) {
5753                         if (!silent)
5754                                 fprintf(stderr,
5755                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
5756                                         chunk_rec->objectid,
5757                                         chunk_rec->type,
5758                                         chunk_rec->offset,
5759                                         chunk_rec->length,
5760                                         chunk_rec->offset,
5761                                         chunk_rec->type_flags,
5762                                         block_group_rec->objectid,
5763                                         block_group_rec->type,
5764                                         block_group_rec->offset,
5765                                         block_group_rec->offset,
5766                                         block_group_rec->objectid,
5767                                         block_group_rec->flags);
5768                         ret = -1;
5769                 } else {
5770                         list_del_init(&block_group_rec->list);
5771                         chunk_rec->bg_rec = block_group_rec;
5772                 }
5773         } else {
5774                 if (!silent)
5775                         fprintf(stderr,
5776                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
5777                                 chunk_rec->objectid,
5778                                 chunk_rec->type,
5779                                 chunk_rec->offset,
5780                                 chunk_rec->length,
5781                                 chunk_rec->offset,
5782                                 chunk_rec->type_flags);
5783                 ret = -1;
5784         }
5785
5786         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
5787                                     chunk_rec->num_stripes);
5788         for (i = 0; i < chunk_rec->num_stripes; ++i) {
5789                 devid = chunk_rec->stripes[i].devid;
5790                 offset = chunk_rec->stripes[i].offset;
5791                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
5792                                                        devid, offset, length);
5793                 if (dev_extent_item) {
5794                         dev_extent_rec = container_of(dev_extent_item,
5795                                                 struct device_extent_record,
5796                                                 cache);
5797                         if (dev_extent_rec->objectid != devid ||
5798                             dev_extent_rec->offset != offset ||
5799                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
5800                             dev_extent_rec->length != length) {
5801                                 if (!silent)
5802                                         fprintf(stderr,
5803                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
5804                                                 chunk_rec->objectid,
5805                                                 chunk_rec->type,
5806                                                 chunk_rec->offset,
5807                                                 chunk_rec->stripes[i].devid,
5808                                                 chunk_rec->stripes[i].offset,
5809                                                 dev_extent_rec->objectid,
5810                                                 dev_extent_rec->offset,
5811                                                 dev_extent_rec->length);
5812                                 ret = -1;
5813                         } else {
5814                                 list_move(&dev_extent_rec->chunk_list,
5815                                           &chunk_rec->dextents);
5816                         }
5817                 } else {
5818                         if (!silent)
5819                                 fprintf(stderr,
5820                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
5821                                         chunk_rec->objectid,
5822                                         chunk_rec->type,
5823                                         chunk_rec->offset,
5824                                         chunk_rec->stripes[i].devid,
5825                                         chunk_rec->stripes[i].offset);
5826                         ret = -1;
5827                 }
5828         }
5829         return ret;
5830 }
5831
5832 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
5833 int check_chunks(struct cache_tree *chunk_cache,
5834                  struct block_group_tree *block_group_cache,
5835                  struct device_extent_tree *dev_extent_cache,
5836                  struct list_head *good, struct list_head *bad, int silent)
5837 {
5838         struct cache_extent *chunk_item;
5839         struct chunk_record *chunk_rec;
5840         struct block_group_record *bg_rec;
5841         struct device_extent_record *dext_rec;
5842         int err;
5843         int ret = 0;
5844
5845         chunk_item = first_cache_extent(chunk_cache);
5846         while (chunk_item) {
5847                 chunk_rec = container_of(chunk_item, struct chunk_record,
5848                                          cache);
5849                 err = check_chunk_refs(chunk_rec, block_group_cache,
5850                                        dev_extent_cache, silent);
5851                 if (err) {
5852                         ret = err;
5853                         if (bad)
5854                                 list_add_tail(&chunk_rec->list, bad);
5855                 } else {
5856                         if (good)
5857                                 list_add_tail(&chunk_rec->list, good);
5858                 }
5859
5860                 chunk_item = next_cache_extent(chunk_item);
5861         }
5862
5863         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
5864                 if (!silent)
5865                         fprintf(stderr,
5866                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
5867                                 bg_rec->objectid,
5868                                 bg_rec->offset,
5869                                 bg_rec->flags);
5870                 if (!ret)
5871                         ret = 1;
5872         }
5873
5874         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
5875                             chunk_list) {
5876                 if (!silent)
5877                         fprintf(stderr,
5878                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
5879                                 dext_rec->objectid,
5880                                 dext_rec->offset,
5881                                 dext_rec->length);
5882                 if (!ret)
5883                         ret = 1;
5884         }
5885         return ret;
5886 }
5887
5888
5889 static int check_device_used(struct device_record *dev_rec,
5890                              struct device_extent_tree *dext_cache)
5891 {
5892         struct cache_extent *cache;
5893         struct device_extent_record *dev_extent_rec;
5894         u64 total_byte = 0;
5895
5896         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
5897         while (cache) {
5898                 dev_extent_rec = container_of(cache,
5899                                               struct device_extent_record,
5900                                               cache);
5901                 if (dev_extent_rec->objectid != dev_rec->devid)
5902                         break;
5903
5904                 list_del(&dev_extent_rec->device_list);
5905                 total_byte += dev_extent_rec->length;
5906                 cache = next_cache_extent(cache);
5907         }
5908
5909         if (total_byte != dev_rec->byte_used) {
5910                 fprintf(stderr,
5911                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
5912                         total_byte, dev_rec->byte_used, dev_rec->objectid,
5913                         dev_rec->type, dev_rec->offset);
5914                 return -1;
5915         } else {
5916                 return 0;
5917         }
5918 }
5919
5920 /* check btrfs_dev_item -> btrfs_dev_extent */
5921 static int check_devices(struct rb_root *dev_cache,
5922                          struct device_extent_tree *dev_extent_cache)
5923 {
5924         struct rb_node *dev_node;
5925         struct device_record *dev_rec;
5926         struct device_extent_record *dext_rec;
5927         int err;
5928         int ret = 0;
5929
5930         dev_node = rb_first(dev_cache);
5931         while (dev_node) {
5932                 dev_rec = container_of(dev_node, struct device_record, node);
5933                 err = check_device_used(dev_rec, dev_extent_cache);
5934                 if (err)
5935                         ret = err;
5936
5937                 dev_node = rb_next(dev_node);
5938         }
5939         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
5940                             device_list) {
5941                 fprintf(stderr,
5942                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
5943                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
5944                 if (!ret)
5945                         ret = 1;
5946         }
5947         return ret;
5948 }
5949
5950 static int check_chunks_and_extents(struct btrfs_root *root)
5951 {
5952         struct rb_root dev_cache;
5953         struct cache_tree chunk_cache;
5954         struct block_group_tree block_group_cache;
5955         struct device_extent_tree dev_extent_cache;
5956         struct cache_tree extent_cache;
5957         struct cache_tree seen;
5958         struct cache_tree pending;
5959         struct cache_tree reada;
5960         struct cache_tree nodes;
5961         struct cache_tree corrupt_blocks;
5962         struct btrfs_path path;
5963         struct btrfs_key key;
5964         struct btrfs_key found_key;
5965         int ret, err = 0;
5966         u64 last = 0;
5967         struct block_info *bits;
5968         int bits_nr;
5969         struct extent_buffer *leaf;
5970         struct btrfs_trans_handle *trans = NULL;
5971         int slot;
5972         struct btrfs_root_item ri;
5973         struct list_head dropping_trees;
5974
5975         dev_cache = RB_ROOT;
5976         cache_tree_init(&chunk_cache);
5977         block_group_tree_init(&block_group_cache);
5978         device_extent_tree_init(&dev_extent_cache);
5979
5980         cache_tree_init(&extent_cache);
5981         cache_tree_init(&seen);
5982         cache_tree_init(&pending);
5983         cache_tree_init(&nodes);
5984         cache_tree_init(&reada);
5985         cache_tree_init(&corrupt_blocks);
5986         INIT_LIST_HEAD(&dropping_trees);
5987
5988         if (repair) {
5989                 trans = btrfs_start_transaction(root, 1);
5990                 if (IS_ERR(trans)) {
5991                         fprintf(stderr, "Error starting transaction\n");
5992                         return PTR_ERR(trans);
5993                 }
5994                 root->fs_info->fsck_extent_cache = &extent_cache;
5995                 root->fs_info->free_extent_hook = free_extent_hook;
5996                 root->fs_info->corrupt_blocks = &corrupt_blocks;
5997         }
5998
5999         bits_nr = 1024;
6000         bits = malloc(bits_nr * sizeof(struct block_info));
6001         if (!bits) {
6002                 perror("malloc");
6003                 exit(1);
6004         }
6005
6006 again:
6007         add_root_to_pending(root->fs_info->tree_root->node,
6008                             &extent_cache, &pending, &seen, &nodes,
6009                             &root->fs_info->tree_root->root_key);
6010
6011         add_root_to_pending(root->fs_info->chunk_root->node,
6012                             &extent_cache, &pending, &seen, &nodes,
6013                             &root->fs_info->chunk_root->root_key);
6014
6015         btrfs_init_path(&path);
6016         key.offset = 0;
6017         key.objectid = 0;
6018         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
6019         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
6020                                         &key, &path, 0, 0);
6021         if (ret < 0)
6022                 goto out;
6023         while(1) {
6024                 leaf = path.nodes[0];
6025                 slot = path.slots[0];
6026                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
6027                         ret = btrfs_next_leaf(root, &path);
6028                         if (ret != 0)
6029                                 break;
6030                         leaf = path.nodes[0];
6031                         slot = path.slots[0];
6032                 }
6033                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
6034                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
6035                         unsigned long offset;
6036                         struct extent_buffer *buf;
6037
6038                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6039                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
6040                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
6041                                 buf = read_tree_block(root->fs_info->tree_root,
6042                                                       btrfs_root_bytenr(&ri),
6043                                                       btrfs_level_size(root,
6044                                                       btrfs_root_level(&ri)),
6045                                                       0);
6046                                 if (!buf) {
6047                                         ret = -EIO;
6048                                         goto out;
6049                                 }
6050                                 add_root_to_pending(buf, &extent_cache,
6051                                                     &pending, &seen, &nodes,
6052                                                     &found_key);
6053                                 free_extent_buffer(buf);
6054                         } else {
6055                                 struct dropping_root_item_record *dri_rec;
6056                                 dri_rec = malloc(sizeof(*dri_rec));
6057                                 if (!dri_rec) {
6058                                         perror("malloc");
6059                                         exit(1);
6060                                 }
6061                                 memcpy(&dri_rec->ri, &ri, sizeof(ri));
6062                                 memcpy(&dri_rec->found_key, &found_key,
6063                                        sizeof(found_key));
6064                                 list_add_tail(&dri_rec->list, &dropping_trees);
6065                         }
6066                 }
6067                 path.slots[0]++;
6068         }
6069         btrfs_release_path(&path);
6070         while (1) {
6071                 ret = run_next_block(trans, root, bits, bits_nr, &last,
6072                                      &pending, &seen, &reada, &nodes,
6073                                      &extent_cache, &chunk_cache, &dev_cache,
6074                                      &block_group_cache, &dev_extent_cache,
6075                                      NULL);
6076                 if (ret != 0)
6077                         break;
6078         }
6079
6080         while (!list_empty(&dropping_trees)) {
6081                 struct dropping_root_item_record *rec;
6082                 struct extent_buffer *buf;
6083                 rec = list_entry(dropping_trees.next,
6084                                  struct dropping_root_item_record, list);
6085                 last = 0;
6086                 if (!bits) {
6087                         perror("realloc");
6088                         exit(1);
6089                 }
6090                 buf = read_tree_block(root->fs_info->tree_root,
6091                                       btrfs_root_bytenr(&rec->ri),
6092                                       btrfs_level_size(root,
6093                                       btrfs_root_level(&rec->ri)), 0);
6094                 if (!buf) {
6095                         ret = -EIO;
6096                         goto out;
6097                 }
6098                 add_root_to_pending(buf, &extent_cache, &pending,
6099                                     &seen, &nodes, &rec->found_key);
6100                 while (1) {
6101                         ret = run_next_block(trans, root, bits, bits_nr, &last,
6102                                              &pending, &seen, &reada,
6103                                              &nodes, &extent_cache,
6104                                              &chunk_cache, &dev_cache,
6105                                              &block_group_cache,
6106                                              &dev_extent_cache,
6107                                              &rec->ri);
6108                         if (ret != 0)
6109                                 break;
6110                 }
6111                 free_extent_buffer(buf);
6112                 list_del(&rec->list);
6113                 free(rec);
6114         }
6115
6116         if (ret >= 0)
6117                 ret = check_extent_refs(trans, root, &extent_cache);
6118         if (ret == -EAGAIN) {
6119                 ret = btrfs_commit_transaction(trans, root);
6120                 if (ret)
6121                         goto out;
6122
6123                 trans = btrfs_start_transaction(root, 1);
6124                 if (IS_ERR(trans)) {
6125                         ret = PTR_ERR(trans);
6126                         goto out;
6127                 }
6128
6129                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6130                 free_extent_cache_tree(&seen);
6131                 free_extent_cache_tree(&pending);
6132                 free_extent_cache_tree(&reada);
6133                 free_extent_cache_tree(&nodes);
6134                 free_extent_record_cache(root->fs_info, &extent_cache);
6135                 goto again;
6136         }
6137
6138         err = check_chunks(&chunk_cache, &block_group_cache,
6139                            &dev_extent_cache, NULL, NULL, 0);
6140         if (err && !ret)
6141                 ret = err;
6142
6143         err = check_devices(&dev_cache, &dev_extent_cache);
6144         if (err && !ret)
6145                 ret = err;
6146
6147 out:
6148         if (trans) {
6149                 err = btrfs_commit_transaction(trans, root);
6150                 if (!ret)
6151                         ret = err;
6152         }
6153         if (repair) {
6154                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
6155                 root->fs_info->fsck_extent_cache = NULL;
6156                 root->fs_info->free_extent_hook = NULL;
6157                 root->fs_info->corrupt_blocks = NULL;
6158         }
6159         free(bits);
6160         free_chunk_cache_tree(&chunk_cache);
6161         free_device_cache_tree(&dev_cache);
6162         free_block_group_tree(&block_group_cache);
6163         free_device_extent_tree(&dev_extent_cache);
6164         free_extent_cache_tree(&seen);
6165         free_extent_cache_tree(&pending);
6166         free_extent_cache_tree(&reada);
6167         free_extent_cache_tree(&nodes);
6168         return ret;
6169 }
6170
6171 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
6172                            struct btrfs_root *root, int overwrite)
6173 {
6174         struct extent_buffer *c;
6175         struct extent_buffer *old = root->node;
6176         int level;
6177         int ret;
6178         struct btrfs_disk_key disk_key = {0,0,0};
6179
6180         level = 0;
6181
6182         if (overwrite) {
6183                 c = old;
6184                 extent_buffer_get(c);
6185                 goto init;
6186         }
6187         c = btrfs_alloc_free_block(trans, root,
6188                                    btrfs_level_size(root, 0),
6189                                    root->root_key.objectid,
6190                                    &disk_key, level, 0, 0);
6191         if (IS_ERR(c)) {
6192                 c = old;
6193                 extent_buffer_get(c);
6194                 overwrite = 1;
6195         }
6196 init:
6197         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
6198         btrfs_set_header_level(c, level);
6199         btrfs_set_header_bytenr(c, c->start);
6200         btrfs_set_header_generation(c, trans->transid);
6201         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
6202         btrfs_set_header_owner(c, root->root_key.objectid);
6203
6204         write_extent_buffer(c, root->fs_info->fsid,
6205                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
6206
6207         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
6208                             btrfs_header_chunk_tree_uuid(c),
6209                             BTRFS_UUID_SIZE);
6210
6211         btrfs_mark_buffer_dirty(c);
6212         /*
6213          * this case can happen in the following case:
6214          *
6215          * 1.overwrite previous root.
6216          *
6217          * 2.reinit reloc data root, this is because we skip pin
6218          * down reloc data tree before which means we can allocate
6219          * same block bytenr here.
6220          */
6221         if (old->start == c->start) {
6222                 btrfs_set_root_generation(&root->root_item,
6223                                           trans->transid);
6224                 root->root_item.level = btrfs_header_level(root->node);
6225                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
6226                                         &root->root_key, &root->root_item);
6227                 if (ret) {
6228                         free_extent_buffer(c);
6229                         return ret;
6230                 }
6231         }
6232         free_extent_buffer(old);
6233         root->node = c;
6234         add_root_to_dirty_list(root);
6235         return 0;
6236 }
6237
6238 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
6239                                 struct extent_buffer *eb, int tree_root)
6240 {
6241         struct extent_buffer *tmp;
6242         struct btrfs_root_item *ri;
6243         struct btrfs_key key;
6244         u64 bytenr;
6245         u32 leafsize;
6246         int level = btrfs_header_level(eb);
6247         int nritems;
6248         int ret;
6249         int i;
6250
6251         /*
6252          * If we have pinned this block before, don't pin it again.
6253          * This can not only avoid forever loop with broken filesystem
6254          * but also give us some speedups.
6255          */
6256         if (test_range_bit(&fs_info->pinned_extents, eb->start,
6257                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
6258                 return 0;
6259
6260         btrfs_pin_extent(fs_info, eb->start, eb->len);
6261
6262         leafsize = btrfs_super_leafsize(fs_info->super_copy);
6263         nritems = btrfs_header_nritems(eb);
6264         for (i = 0; i < nritems; i++) {
6265                 if (level == 0) {
6266                         btrfs_item_key_to_cpu(eb, &key, i);
6267                         if (key.type != BTRFS_ROOT_ITEM_KEY)
6268                                 continue;
6269                         /* Skip the extent root and reloc roots */
6270                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6271                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
6272                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
6273                                 continue;
6274                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
6275                         bytenr = btrfs_disk_root_bytenr(eb, ri);
6276
6277                         /*
6278                          * If at any point we start needing the real root we
6279                          * will have to build a stump root for the root we are
6280                          * in, but for now this doesn't actually use the root so
6281                          * just pass in extent_root.
6282                          */
6283                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6284                                               leafsize, 0);
6285                         if (!tmp) {
6286                                 fprintf(stderr, "Error reading root block\n");
6287                                 return -EIO;
6288                         }
6289                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
6290                         free_extent_buffer(tmp);
6291                         if (ret)
6292                                 return ret;
6293                 } else {
6294                         bytenr = btrfs_node_blockptr(eb, i);
6295
6296                         /* If we aren't the tree root don't read the block */
6297                         if (level == 1 && !tree_root) {
6298                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
6299                                 continue;
6300                         }
6301
6302                         tmp = read_tree_block(fs_info->extent_root, bytenr,
6303                                               leafsize, 0);
6304                         if (!tmp) {
6305                                 fprintf(stderr, "Error reading tree block\n");
6306                                 return -EIO;
6307                         }
6308                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
6309                         free_extent_buffer(tmp);
6310                         if (ret)
6311                                 return ret;
6312                 }
6313         }
6314
6315         return 0;
6316 }
6317
6318 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
6319 {
6320         int ret;
6321
6322         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
6323         if (ret)
6324                 return ret;
6325
6326         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
6327 }
6328
6329 static int reset_block_groups(struct btrfs_fs_info *fs_info)
6330 {
6331         struct btrfs_block_group_cache *cache;
6332         struct btrfs_path *path;
6333         struct extent_buffer *leaf;
6334         struct btrfs_chunk *chunk;
6335         struct btrfs_key key;
6336         int ret;
6337         u64 start;
6338
6339         path = btrfs_alloc_path();
6340         if (!path)
6341                 return -ENOMEM;
6342
6343         key.objectid = 0;
6344         key.type = BTRFS_CHUNK_ITEM_KEY;
6345         key.offset = 0;
6346
6347         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
6348         if (ret < 0) {
6349                 btrfs_free_path(path);
6350                 return ret;
6351         }
6352
6353         /*
6354          * We do this in case the block groups were screwed up and had alloc
6355          * bits that aren't actually set on the chunks.  This happens with
6356          * restored images every time and could happen in real life I guess.
6357          */
6358         fs_info->avail_data_alloc_bits = 0;
6359         fs_info->avail_metadata_alloc_bits = 0;
6360         fs_info->avail_system_alloc_bits = 0;
6361
6362         /* First we need to create the in-memory block groups */
6363         while (1) {
6364                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6365                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
6366                         if (ret < 0) {
6367                                 btrfs_free_path(path);
6368                                 return ret;
6369                         }
6370                         if (ret) {
6371                                 ret = 0;
6372                                 break;
6373                         }
6374                 }
6375                 leaf = path->nodes[0];
6376                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6377                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6378                         path->slots[0]++;
6379                         continue;
6380                 }
6381
6382                 chunk = btrfs_item_ptr(leaf, path->slots[0],
6383                                        struct btrfs_chunk);
6384                 btrfs_add_block_group(fs_info, 0,
6385                                       btrfs_chunk_type(leaf, chunk),
6386                                       key.objectid, key.offset,
6387                                       btrfs_chunk_length(leaf, chunk));
6388                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
6389                                  key.offset + btrfs_chunk_length(leaf, chunk),
6390                                  GFP_NOFS);
6391                 path->slots[0]++;
6392         }
6393         start = 0;
6394         while (1) {
6395                 cache = btrfs_lookup_first_block_group(fs_info, start);
6396                 if (!cache)
6397                         break;
6398                 cache->cached = 1;
6399                 start = cache->key.objectid + cache->key.offset;
6400         }
6401
6402         btrfs_free_path(path);
6403         return 0;
6404 }
6405
6406 static int reset_balance(struct btrfs_trans_handle *trans,
6407                          struct btrfs_fs_info *fs_info)
6408 {
6409         struct btrfs_root *root = fs_info->tree_root;
6410         struct btrfs_path *path;
6411         struct extent_buffer *leaf;
6412         struct btrfs_key key;
6413         int del_slot, del_nr = 0;
6414         int ret;
6415         int found = 0;
6416
6417         path = btrfs_alloc_path();
6418         if (!path)
6419                 return -ENOMEM;
6420
6421         key.objectid = BTRFS_BALANCE_OBJECTID;
6422         key.type = BTRFS_BALANCE_ITEM_KEY;
6423         key.offset = 0;
6424
6425         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6426         if (ret) {
6427                 if (ret > 0)
6428                         ret = 0;
6429                 if (!ret)
6430                         goto reinit_data_reloc;
6431                 else
6432                         goto out;
6433         }
6434
6435         ret = btrfs_del_item(trans, root, path);
6436         if (ret)
6437                 goto out;
6438         btrfs_release_path(path);
6439
6440         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6441         key.type = BTRFS_ROOT_ITEM_KEY;
6442         key.offset = 0;
6443
6444         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6445         if (ret < 0)
6446                 goto out;
6447         while (1) {
6448                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6449                         if (!found)
6450                                 break;
6451
6452                         if (del_nr) {
6453                                 ret = btrfs_del_items(trans, root, path,
6454                                                       del_slot, del_nr);
6455                                 del_nr = 0;
6456                                 if (ret)
6457                                         goto out;
6458                         }
6459                         key.offset++;
6460                         btrfs_release_path(path);
6461
6462                         found = 0;
6463                         ret = btrfs_search_slot(trans, root, &key, path,
6464                                                 -1, 1);
6465                         if (ret < 0)
6466                                 goto out;
6467                         continue;
6468                 }
6469                 found = 1;
6470                 leaf = path->nodes[0];
6471                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6472                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
6473                         break;
6474                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6475                         path->slots[0]++;
6476                         continue;
6477                 }
6478                 if (!del_nr) {
6479                         del_slot = path->slots[0];
6480                         del_nr = 1;
6481                 } else {
6482                         del_nr++;
6483                 }
6484                 path->slots[0]++;
6485         }
6486
6487         if (del_nr) {
6488                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
6489                 if (ret)
6490                         goto out;
6491         }
6492         btrfs_release_path(path);
6493
6494 reinit_data_reloc:
6495         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6496         key.type = BTRFS_ROOT_ITEM_KEY;
6497         key.offset = (u64)-1;
6498         root = btrfs_read_fs_root(fs_info, &key);
6499         if (IS_ERR(root)) {
6500                 fprintf(stderr, "Error reading data reloc tree\n");
6501                 return PTR_ERR(root);
6502         }
6503         record_root_in_trans(trans, root);
6504         ret = btrfs_fsck_reinit_root(trans, root, 0);
6505         if (ret)
6506                 goto out;
6507         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
6508 out:
6509         btrfs_free_path(path);
6510         return ret;
6511 }
6512
6513 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
6514                               struct btrfs_fs_info *fs_info)
6515 {
6516         u64 start = 0;
6517         int ret;
6518
6519         /*
6520          * The only reason we don't do this is because right now we're just
6521          * walking the trees we find and pinning down their bytes, we don't look
6522          * at any of the leaves.  In order to do mixed groups we'd have to check
6523          * the leaves of any fs roots and pin down the bytes for any file
6524          * extents we find.  Not hard but why do it if we don't have to?
6525          */
6526         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
6527                 fprintf(stderr, "We don't support re-initing the extent tree "
6528                         "for mixed block groups yet, please notify a btrfs "
6529                         "developer you want to do this so they can add this "
6530                         "functionality.\n");
6531                 return -EINVAL;
6532         }
6533
6534         /*
6535          * first we need to walk all of the trees except the extent tree and pin
6536          * down the bytes that are in use so we don't overwrite any existing
6537          * metadata.
6538          */
6539         ret = pin_metadata_blocks(fs_info);
6540         if (ret) {
6541                 fprintf(stderr, "error pinning down used bytes\n");
6542                 return ret;
6543         }
6544
6545         /*
6546          * Need to drop all the block groups since we're going to recreate all
6547          * of them again.
6548          */
6549         btrfs_free_block_groups(fs_info);
6550         ret = reset_block_groups(fs_info);
6551         if (ret) {
6552                 fprintf(stderr, "error resetting the block groups\n");
6553                 return ret;
6554         }
6555
6556         /* Ok we can allocate now, reinit the extent root */
6557         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
6558         if (ret) {
6559                 fprintf(stderr, "extent root initialization failed\n");
6560                 /*
6561                  * When the transaction code is updated we should end the
6562                  * transaction, but for now progs only knows about commit so
6563                  * just return an error.
6564                  */
6565                 return ret;
6566         }
6567
6568         /*
6569          * Now we have all the in-memory block groups setup so we can make
6570          * allocations properly, and the metadata we care about is safe since we
6571          * pinned all of it above.
6572          */
6573         while (1) {
6574                 struct btrfs_block_group_cache *cache;
6575
6576                 cache = btrfs_lookup_first_block_group(fs_info, start);
6577                 if (!cache)
6578                         break;
6579                 start = cache->key.objectid + cache->key.offset;
6580                 ret = btrfs_insert_item(trans, fs_info->extent_root,
6581                                         &cache->key, &cache->item,
6582                                         sizeof(cache->item));
6583                 if (ret) {
6584                         fprintf(stderr, "Error adding block group\n");
6585                         return ret;
6586                 }
6587                 btrfs_extent_post_op(trans, fs_info->extent_root);
6588         }
6589
6590         ret = reset_balance(trans, fs_info);
6591         if (ret)
6592                 fprintf(stderr, "error reseting the pending balance\n");
6593
6594         return ret;
6595 }
6596
6597 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
6598 {
6599         struct btrfs_path *path;
6600         struct btrfs_trans_handle *trans;
6601         struct btrfs_key key;
6602         int ret;
6603
6604         printf("Recowing metadata block %llu\n", eb->start);
6605         key.objectid = btrfs_header_owner(eb);
6606         key.type = BTRFS_ROOT_ITEM_KEY;
6607         key.offset = (u64)-1;
6608
6609         root = btrfs_read_fs_root(root->fs_info, &key);
6610         if (IS_ERR(root)) {
6611                 fprintf(stderr, "Couldn't find owner root %llu\n",
6612                         key.objectid);
6613                 return PTR_ERR(root);
6614         }
6615
6616         path = btrfs_alloc_path();
6617         if (!path)
6618                 return -ENOMEM;
6619
6620         trans = btrfs_start_transaction(root, 1);
6621         if (IS_ERR(trans)) {
6622                 btrfs_free_path(path);
6623                 return PTR_ERR(trans);
6624         }
6625
6626         path->lowest_level = btrfs_header_level(eb);
6627         if (path->lowest_level)
6628                 btrfs_node_key_to_cpu(eb, &key, 0);
6629         else
6630                 btrfs_item_key_to_cpu(eb, &key, 0);
6631
6632         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6633         btrfs_commit_transaction(trans, root);
6634         btrfs_free_path(path);
6635         return ret;
6636 }
6637
6638 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
6639 {
6640         struct btrfs_path *path;
6641         struct btrfs_trans_handle *trans;
6642         struct btrfs_key key;
6643         int ret;
6644
6645         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
6646                bad->key.type, bad->key.offset);
6647         key.objectid = bad->root_id;
6648         key.type = BTRFS_ROOT_ITEM_KEY;
6649         key.offset = (u64)-1;
6650
6651         root = btrfs_read_fs_root(root->fs_info, &key);
6652         if (IS_ERR(root)) {
6653                 fprintf(stderr, "Couldn't find owner root %llu\n",
6654                         key.objectid);
6655                 return PTR_ERR(root);
6656         }
6657
6658         path = btrfs_alloc_path();
6659         if (!path)
6660                 return -ENOMEM;
6661
6662         trans = btrfs_start_transaction(root, 1);
6663         if (IS_ERR(trans)) {
6664                 btrfs_free_path(path);
6665                 return PTR_ERR(trans);
6666         }
6667
6668         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
6669         if (ret) {
6670                 if (ret > 0)
6671                         ret = 0;
6672                 goto out;
6673         }
6674         ret = btrfs_del_item(trans, root, path);
6675 out:
6676         btrfs_commit_transaction(trans, root);
6677         btrfs_free_path(path);
6678         return ret;
6679 }
6680
6681 static int zero_log_tree(struct btrfs_root *root)
6682 {
6683         struct btrfs_trans_handle *trans;
6684         int ret;
6685
6686         trans = btrfs_start_transaction(root, 1);
6687         if (IS_ERR(trans)) {
6688                 ret = PTR_ERR(trans);
6689                 return ret;
6690         }
6691         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
6692         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
6693         ret = btrfs_commit_transaction(trans, root);
6694         return ret;
6695 }
6696
6697 static int populate_csum(struct btrfs_trans_handle *trans,
6698                          struct btrfs_root *csum_root, char *buf, u64 start,
6699                          u64 len)
6700 {
6701         u64 offset = 0;
6702         u64 sectorsize;
6703         int ret = 0;
6704
6705         while (offset < len) {
6706                 sectorsize = csum_root->sectorsize;
6707                 ret = read_extent_data(csum_root, buf, start + offset,
6708                                        &sectorsize, 0);
6709                 if (ret)
6710                         break;
6711                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
6712                                             start + offset, buf, sectorsize);
6713                 if (ret)
6714                         break;
6715                 offset += sectorsize;
6716         }
6717         return ret;
6718 }
6719
6720 static int fill_csum_tree(struct btrfs_trans_handle *trans,
6721                           struct btrfs_root *csum_root)
6722 {
6723         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
6724         struct btrfs_path *path;
6725         struct btrfs_extent_item *ei;
6726         struct extent_buffer *leaf;
6727         char *buf;
6728         struct btrfs_key key;
6729         int ret;
6730
6731         path = btrfs_alloc_path();
6732         if (!path)
6733                 return -ENOMEM;
6734
6735         key.objectid = 0;
6736         key.type = BTRFS_EXTENT_ITEM_KEY;
6737         key.offset = 0;
6738
6739         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
6740         if (ret < 0) {
6741                 btrfs_free_path(path);
6742                 return ret;
6743         }
6744
6745         buf = malloc(csum_root->sectorsize);
6746         if (!buf) {
6747                 btrfs_free_path(path);
6748                 return -ENOMEM;
6749         }
6750
6751         while (1) {
6752                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6753                         ret = btrfs_next_leaf(extent_root, path);
6754                         if (ret < 0)
6755                                 break;
6756                         if (ret) {
6757                                 ret = 0;
6758                                 break;
6759                         }
6760                 }
6761                 leaf = path->nodes[0];
6762
6763                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6764                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6765                         path->slots[0]++;
6766                         continue;
6767                 }
6768
6769                 ei = btrfs_item_ptr(leaf, path->slots[0],
6770                                     struct btrfs_extent_item);
6771                 if (!(btrfs_extent_flags(leaf, ei) &
6772                       BTRFS_EXTENT_FLAG_DATA)) {
6773                         path->slots[0]++;
6774                         continue;
6775                 }
6776
6777                 ret = populate_csum(trans, csum_root, buf, key.objectid,
6778                                     key.offset);
6779                 if (ret)
6780                         break;
6781                 path->slots[0]++;
6782         }
6783
6784         btrfs_free_path(path);
6785         free(buf);
6786         return ret;
6787 }
6788
6789 static struct option long_options[] = {
6790         { "super", 1, NULL, 's' },
6791         { "repair", 0, NULL, 0 },
6792         { "init-csum-tree", 0, NULL, 0 },
6793         { "init-extent-tree", 0, NULL, 0 },
6794         { "check-data-csum", 0, NULL, 0 },
6795         { "backup", 0, NULL, 0 },
6796         { "subvol-extents", no_argument, NULL, 'E' },
6797         { "qgroup-report", 0, NULL, 'Q' },
6798         { NULL, 0, NULL, 0}
6799 };
6800
6801 const char * const cmd_check_usage[] = {
6802         "btrfs check [options] <device>",
6803         "Check an unmounted btrfs filesystem.",
6804         "",
6805         "-s|--super <superblock>     use this superblock copy",
6806         "-b|--backup                 use the backup root copy",
6807         "--repair                    try to repair the filesystem",
6808         "--init-csum-tree            create a new CRC tree",
6809         "--init-extent-tree          create a new extent tree",
6810         "--check-data-csum           verify checkums of data blocks",
6811         "--qgroup-report             print a report on qgroup consistency",
6812         "--subvol-extents            print subvolume extents and sharing state",
6813         NULL
6814 };
6815
6816 int cmd_check(int argc, char **argv)
6817 {
6818         struct cache_tree root_cache;
6819         struct btrfs_root *root;
6820         struct btrfs_fs_info *info;
6821         u64 bytenr = 0;
6822         u64 subvolid = 0;
6823         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
6824         int ret;
6825         u64 num;
6826         int option_index = 0;
6827         int init_csum_tree = 0;
6828         int qgroup_report = 0;
6829         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
6830
6831         while(1) {
6832                 int c;
6833                 c = getopt_long(argc, argv, "as:b", long_options,
6834                                 &option_index);
6835                 if (c < 0)
6836                         break;
6837                 switch(c) {
6838                         case 'a': /* ignored */ break;
6839                         case 'b':
6840                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
6841                                 break;
6842                         case 's':
6843                                 num = arg_strtou64(optarg);
6844                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
6845                                         fprintf(stderr,
6846                                                 "ERROR: super mirror should be less than: %d\n",
6847                                                 BTRFS_SUPER_MIRROR_MAX);
6848                                         exit(1);
6849                                 }
6850                                 bytenr = btrfs_sb_offset(((int)num));
6851                                 printf("using SB copy %llu, bytenr %llu\n", num,
6852                                        (unsigned long long)bytenr);
6853                                 break;
6854                         case 'Q':
6855                                 qgroup_report = 1;
6856                                 break;
6857                         case 'E':
6858                                 subvolid = arg_strtou64(optarg);
6859                                 break;
6860                         case '?':
6861                         case 'h':
6862                                 usage(cmd_check_usage);
6863                 }
6864                 if (option_index == 1) {
6865                         printf("enabling repair mode\n");
6866                         repair = 1;
6867                         ctree_flags |= OPEN_CTREE_WRITES;
6868                 } else if (option_index == 2) {
6869                         printf("Creating a new CRC tree\n");
6870                         init_csum_tree = 1;
6871                         repair = 1;
6872                         ctree_flags |= OPEN_CTREE_WRITES;
6873                 } else if (option_index == 3) {
6874                         init_extent_tree = 1;
6875                         ctree_flags |= (OPEN_CTREE_WRITES |
6876                                         OPEN_CTREE_NO_BLOCK_GROUPS);
6877                         repair = 1;
6878                 } else if (option_index == 4) {
6879                         check_data_csum = 1;
6880                 }
6881         }
6882         argc = argc - optind;
6883
6884         if (check_argc_exact(argc, 1))
6885                 usage(cmd_check_usage);
6886
6887         radix_tree_init();
6888         cache_tree_init(&root_cache);
6889
6890         if((ret = check_mounted(argv[optind])) < 0) {
6891                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
6892                 goto err_out;
6893         } else if(ret) {
6894                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
6895                 ret = -EBUSY;
6896                 goto err_out;
6897         }
6898
6899         /* only allow partial opening under repair mode */
6900         if (repair)
6901                 ctree_flags |= OPEN_CTREE_PARTIAL;
6902
6903         info = open_ctree_fs_info(argv[optind], bytenr, 0, ctree_flags);
6904         if (!info) {
6905                 fprintf(stderr, "Couldn't open file system\n");
6906                 ret = -EIO;
6907                 goto err_out;
6908         }
6909
6910         root = info->fs_root;
6911         /*
6912          * repair mode will force us to commit transaction which
6913          * will make us fail to load log tree when mounting.
6914          */
6915         if (repair && btrfs_super_log_root(info->super_copy)) {
6916                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
6917                 if (!ret) {
6918                         ret = 1;
6919                         goto close_out;
6920                 }
6921                 ret = zero_log_tree(root);
6922                 if (ret) {
6923                         fprintf(stderr, "fail to zero log tree\n");
6924                         goto close_out;
6925                 }
6926         }
6927
6928         uuid_unparse(info->super_copy->fsid, uuidbuf);
6929         if (qgroup_report) {
6930                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
6931                        uuidbuf);
6932                 ret = qgroup_verify_all(info);
6933                 if (ret == 0)
6934                         print_qgroup_report(1);
6935                 goto close_out;
6936         }
6937         if (subvolid) {
6938                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
6939                        subvolid, argv[optind], uuidbuf);
6940                 ret = print_extent_state(info, subvolid);
6941                 goto close_out;
6942         }
6943         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
6944
6945         if (!extent_buffer_uptodate(info->tree_root->node) ||
6946             !extent_buffer_uptodate(info->dev_root->node) ||
6947             !extent_buffer_uptodate(info->chunk_root->node)) {
6948                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6949                 ret = -EIO;
6950                 goto close_out;
6951         }
6952
6953         if (init_extent_tree || init_csum_tree) {
6954                 struct btrfs_trans_handle *trans;
6955
6956                 trans = btrfs_start_transaction(info->extent_root, 0);
6957                 if (IS_ERR(trans)) {
6958                         fprintf(stderr, "Error starting transaction\n");
6959                         ret = PTR_ERR(trans);
6960                         goto close_out;
6961                 }
6962
6963                 if (init_extent_tree) {
6964                         printf("Creating a new extent tree\n");
6965                         ret = reinit_extent_tree(trans, info);
6966                         if (ret)
6967                                 goto close_out;
6968                 }
6969
6970                 if (init_csum_tree) {
6971                         fprintf(stderr, "Reinit crc root\n");
6972                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
6973                         if (ret) {
6974                                 fprintf(stderr, "crc root initialization failed\n");
6975                                 ret = -EIO;
6976                                 goto close_out;
6977                         }
6978
6979                         ret = fill_csum_tree(trans, info->csum_root);
6980                         if (ret) {
6981                                 fprintf(stderr, "crc refilling failed\n");
6982                                 return -EIO;
6983                         }
6984                 }
6985                 /*
6986                  * Ok now we commit and run the normal fsck, which will add
6987                  * extent entries for all of the items it finds.
6988                  */
6989                 ret = btrfs_commit_transaction(trans, info->extent_root);
6990                 if (ret)
6991                         goto close_out;
6992         }
6993         if (!extent_buffer_uptodate(info->extent_root->node)) {
6994                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
6995                 ret = -EIO;
6996                 goto close_out;
6997         }
6998         if (!extent_buffer_uptodate(info->csum_root->node)) {
6999                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
7000                 ret = -EIO;
7001                 goto close_out;
7002         }
7003
7004         fprintf(stderr, "checking extents\n");
7005         ret = check_chunks_and_extents(root);
7006         if (ret)
7007                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
7008
7009         fprintf(stderr, "checking free space cache\n");
7010         ret = check_space_cache(root);
7011         if (ret)
7012                 goto out;
7013
7014         /*
7015          * We used to have to have these hole extents in between our real
7016          * extents so if we don't have this flag set we need to make sure there
7017          * are no gaps in the file extents for inodes, otherwise we can just
7018          * ignore it when this happens.
7019          */
7020         no_holes = btrfs_fs_incompat(root->fs_info,
7021                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
7022         fprintf(stderr, "checking fs roots\n");
7023         ret = check_fs_roots(root, &root_cache);
7024         if (ret)
7025                 goto out;
7026
7027         fprintf(stderr, "checking csums\n");
7028         ret = check_csums(root);
7029         if (ret)
7030                 goto out;
7031
7032         fprintf(stderr, "checking root refs\n");
7033         ret = check_root_refs(root, &root_cache);
7034         if (ret)
7035                 goto out;
7036
7037         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
7038                 struct extent_buffer *eb;
7039
7040                 eb = list_first_entry(&root->fs_info->recow_ebs,
7041                                       struct extent_buffer, recow);
7042                 list_del_init(&eb->recow);
7043                 ret = recow_extent_buffer(root, eb);
7044                 if (ret)
7045                         break;
7046         }
7047
7048         while (!list_empty(&delete_items)) {
7049                 struct bad_item *bad;
7050
7051                 bad = list_first_entry(&delete_items, struct bad_item, list);
7052                 list_del_init(&bad->list);
7053                 if (repair)
7054                         ret = delete_bad_item(root, bad);
7055                 free(bad);
7056         }
7057
7058         if (info->quota_enabled) {
7059                 int err;
7060                 fprintf(stderr, "checking quota groups\n");
7061                 err = qgroup_verify_all(info);
7062                 if (err)
7063                         goto out;
7064         }
7065
7066         if (!list_empty(&root->fs_info->recow_ebs)) {
7067                 fprintf(stderr, "Transid errors in file system\n");
7068                 ret = 1;
7069         }
7070 out:
7071         print_qgroup_report(0);
7072         if (found_old_backref) { /*
7073                  * there was a disk format change when mixed
7074                  * backref was in testing tree. The old format
7075                  * existed about one week.
7076                  */
7077                 printf("\n * Found old mixed backref format. "
7078                        "The old format is not supported! *"
7079                        "\n * Please mount the FS in readonly mode, "
7080                        "backup data and re-format the FS. *\n\n");
7081                 ret = 1;
7082         }
7083         printf("found %llu bytes used err is %d\n",
7084                (unsigned long long)bytes_used, ret);
7085         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
7086         printf("total tree bytes: %llu\n",
7087                (unsigned long long)total_btree_bytes);
7088         printf("total fs tree bytes: %llu\n",
7089                (unsigned long long)total_fs_tree_bytes);
7090         printf("total extent tree bytes: %llu\n",
7091                (unsigned long long)total_extent_tree_bytes);
7092         printf("btree space waste bytes: %llu\n",
7093                (unsigned long long)btree_space_waste);
7094         printf("file data blocks allocated: %llu\n referenced %llu\n",
7095                 (unsigned long long)data_bytes_allocated,
7096                 (unsigned long long)data_bytes_referenced);
7097         printf("%s\n", BTRFS_BUILD_VERSION);
7098
7099         free_root_recs_tree(&root_cache);
7100 close_out:
7101         close_ctree(root);
7102 err_out:
7103         return ret;
7104 }