btrfs-progs: Record orphan data extent ref to corresponding root.
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "transaction.h"
34 #include "utils.h"
35 #include "commands.h"
36 #include "free-space-cache.h"
37 #include "btrfsck.h"
38 #include "qgroup-verify.h"
39 #include "rbtree-utils.h"
40 #include "backref.h"
41 #include "ulist.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 /*
84  * Much like data_backref, just removed the undetermined members
85  * and change it to use list_head.
86  * Stored in the root->orphan_data_extents list
87  */
88 struct orphan_data_extent {
89         struct list_head list;
90         u64 root;
91         u64 objectid;
92         u64 offset;
93         u64 disk_bytenr;
94         u64 disk_len;
95 };
96
97 struct tree_backref {
98         struct extent_backref node;
99         union {
100                 u64 parent;
101                 u64 root;
102         };
103 };
104
105 struct extent_record {
106         struct list_head backrefs;
107         struct list_head dups;
108         struct list_head list;
109         struct cache_extent cache;
110         struct btrfs_disk_key parent_key;
111         u64 start;
112         u64 max_size;
113         u64 nr;
114         u64 refs;
115         u64 extent_item_refs;
116         u64 generation;
117         u64 parent_generation;
118         u64 info_objectid;
119         u32 num_duplicates;
120         u8 info_level;
121         unsigned int found_rec:1;
122         unsigned int content_checked:1;
123         unsigned int owner_ref_checked:1;
124         unsigned int is_root:1;
125         unsigned int metadata:1;
126         unsigned int flag_block_full_backref:1;
127 };
128
129 struct inode_backref {
130         struct list_head list;
131         unsigned int found_dir_item:1;
132         unsigned int found_dir_index:1;
133         unsigned int found_inode_ref:1;
134         unsigned int filetype:8;
135         int errors;
136         unsigned int ref_type;
137         u64 dir;
138         u64 index;
139         u16 namelen;
140         char name[0];
141 };
142
143 struct root_item_record {
144         struct list_head list;
145         u64 objectid;
146         u64 bytenr;
147         u8 level;
148         u8 drop_level;
149         int level_size;
150         struct btrfs_key drop_key;
151 };
152
153 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
154 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
155 #define REF_ERR_NO_INODE_REF            (1 << 2)
156 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
157 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
158 #define REF_ERR_DUP_INODE_REF           (1 << 5)
159 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
160 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
161 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
162 #define REF_ERR_NO_ROOT_REF             (1 << 9)
163 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
164 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
165 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
166
167 struct inode_record {
168         struct list_head backrefs;
169         unsigned int checked:1;
170         unsigned int merging:1;
171         unsigned int found_inode_item:1;
172         unsigned int found_dir_item:1;
173         unsigned int found_file_extent:1;
174         unsigned int found_csum_item:1;
175         unsigned int some_csum_missing:1;
176         unsigned int nodatasum:1;
177         int errors;
178
179         u64 ino;
180         u32 nlink;
181         u32 imode;
182         u64 isize;
183         u64 nbytes;
184
185         u32 found_link;
186         u64 found_size;
187         u64 extent_start;
188         u64 extent_end;
189         u64 first_extent_gap;
190
191         u32 refs;
192 };
193
194 #define I_ERR_NO_INODE_ITEM             (1 << 0)
195 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
196 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
197 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
198 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
199 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
200 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
201 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
202 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
203 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
204 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
205 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
206 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
207 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
208
209 struct root_backref {
210         struct list_head list;
211         unsigned int found_dir_item:1;
212         unsigned int found_dir_index:1;
213         unsigned int found_back_ref:1;
214         unsigned int found_forward_ref:1;
215         unsigned int reachable:1;
216         int errors;
217         u64 ref_root;
218         u64 dir;
219         u64 index;
220         u16 namelen;
221         char name[0];
222 };
223
224 struct root_record {
225         struct list_head backrefs;
226         struct cache_extent cache;
227         unsigned int found_root_item:1;
228         u64 objectid;
229         u32 found_ref;
230 };
231
232 struct ptr_node {
233         struct cache_extent cache;
234         void *data;
235 };
236
237 struct shared_node {
238         struct cache_extent cache;
239         struct cache_tree root_cache;
240         struct cache_tree inode_cache;
241         struct inode_record *current;
242         u32 refs;
243 };
244
245 struct block_info {
246         u64 start;
247         u32 size;
248 };
249
250 struct walk_control {
251         struct cache_tree shared;
252         struct shared_node *nodes[BTRFS_MAX_LEVEL];
253         int active_node;
254         int root_level;
255 };
256
257 struct bad_item {
258         struct btrfs_key key;
259         u64 root_id;
260         struct list_head list;
261 };
262
263 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
264
265 static void record_root_in_trans(struct btrfs_trans_handle *trans,
266                                  struct btrfs_root *root)
267 {
268         if (root->last_trans != trans->transid) {
269                 root->track_dirty = 1;
270                 root->last_trans = trans->transid;
271                 root->commit_root = root->node;
272                 extent_buffer_get(root->node);
273         }
274 }
275
276 static u8 imode_to_type(u32 imode)
277 {
278 #define S_SHIFT 12
279         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
280                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
281                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
282                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
283                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
284                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
285                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
286                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
287         };
288
289         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
290 #undef S_SHIFT
291 }
292
293 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
294 {
295         struct device_record *rec1;
296         struct device_record *rec2;
297
298         rec1 = rb_entry(node1, struct device_record, node);
299         rec2 = rb_entry(node2, struct device_record, node);
300         if (rec1->devid > rec2->devid)
301                 return -1;
302         else if (rec1->devid < rec2->devid)
303                 return 1;
304         else
305                 return 0;
306 }
307
308 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
309 {
310         struct inode_record *rec;
311         struct inode_backref *backref;
312         struct inode_backref *orig;
313         size_t size;
314
315         rec = malloc(sizeof(*rec));
316         memcpy(rec, orig_rec, sizeof(*rec));
317         rec->refs = 1;
318         INIT_LIST_HEAD(&rec->backrefs);
319
320         list_for_each_entry(orig, &orig_rec->backrefs, list) {
321                 size = sizeof(*orig) + orig->namelen + 1;
322                 backref = malloc(size);
323                 memcpy(backref, orig, size);
324                 list_add_tail(&backref->list, &rec->backrefs);
325         }
326         return rec;
327 }
328
329 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
330 {
331         u64 root_objectid = root->root_key.objectid;
332         int errors = rec->errors;
333
334         if (!errors)
335                 return;
336         /* reloc root errors, we print its corresponding fs root objectid*/
337         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
338                 root_objectid = root->root_key.offset;
339                 fprintf(stderr, "reloc");
340         }
341         fprintf(stderr, "root %llu inode %llu errors %x",
342                 (unsigned long long) root_objectid,
343                 (unsigned long long) rec->ino, rec->errors);
344
345         if (errors & I_ERR_NO_INODE_ITEM)
346                 fprintf(stderr, ", no inode item");
347         if (errors & I_ERR_NO_ORPHAN_ITEM)
348                 fprintf(stderr, ", no orphan item");
349         if (errors & I_ERR_DUP_INODE_ITEM)
350                 fprintf(stderr, ", dup inode item");
351         if (errors & I_ERR_DUP_DIR_INDEX)
352                 fprintf(stderr, ", dup dir index");
353         if (errors & I_ERR_ODD_DIR_ITEM)
354                 fprintf(stderr, ", odd dir item");
355         if (errors & I_ERR_ODD_FILE_EXTENT)
356                 fprintf(stderr, ", odd file extent");
357         if (errors & I_ERR_BAD_FILE_EXTENT)
358                 fprintf(stderr, ", bad file extent");
359         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
360                 fprintf(stderr, ", file extent overlap");
361         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
362                 fprintf(stderr, ", file extent discount");
363         if (errors & I_ERR_DIR_ISIZE_WRONG)
364                 fprintf(stderr, ", dir isize wrong");
365         if (errors & I_ERR_FILE_NBYTES_WRONG)
366                 fprintf(stderr, ", nbytes wrong");
367         if (errors & I_ERR_ODD_CSUM_ITEM)
368                 fprintf(stderr, ", odd csum item");
369         if (errors & I_ERR_SOME_CSUM_MISSING)
370                 fprintf(stderr, ", some csum missing");
371         if (errors & I_ERR_LINK_COUNT_WRONG)
372                 fprintf(stderr, ", link count wrong");
373         fprintf(stderr, "\n");
374 }
375
376 static void print_ref_error(int errors)
377 {
378         if (errors & REF_ERR_NO_DIR_ITEM)
379                 fprintf(stderr, ", no dir item");
380         if (errors & REF_ERR_NO_DIR_INDEX)
381                 fprintf(stderr, ", no dir index");
382         if (errors & REF_ERR_NO_INODE_REF)
383                 fprintf(stderr, ", no inode ref");
384         if (errors & REF_ERR_DUP_DIR_ITEM)
385                 fprintf(stderr, ", dup dir item");
386         if (errors & REF_ERR_DUP_DIR_INDEX)
387                 fprintf(stderr, ", dup dir index");
388         if (errors & REF_ERR_DUP_INODE_REF)
389                 fprintf(stderr, ", dup inode ref");
390         if (errors & REF_ERR_INDEX_UNMATCH)
391                 fprintf(stderr, ", index unmatch");
392         if (errors & REF_ERR_FILETYPE_UNMATCH)
393                 fprintf(stderr, ", filetype unmatch");
394         if (errors & REF_ERR_NAME_TOO_LONG)
395                 fprintf(stderr, ", name too long");
396         if (errors & REF_ERR_NO_ROOT_REF)
397                 fprintf(stderr, ", no root ref");
398         if (errors & REF_ERR_NO_ROOT_BACKREF)
399                 fprintf(stderr, ", no root backref");
400         if (errors & REF_ERR_DUP_ROOT_REF)
401                 fprintf(stderr, ", dup root ref");
402         if (errors & REF_ERR_DUP_ROOT_BACKREF)
403                 fprintf(stderr, ", dup root backref");
404         fprintf(stderr, "\n");
405 }
406
407 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
408                                           u64 ino, int mod)
409 {
410         struct ptr_node *node;
411         struct cache_extent *cache;
412         struct inode_record *rec = NULL;
413         int ret;
414
415         cache = lookup_cache_extent(inode_cache, ino, 1);
416         if (cache) {
417                 node = container_of(cache, struct ptr_node, cache);
418                 rec = node->data;
419                 if (mod && rec->refs > 1) {
420                         node->data = clone_inode_rec(rec);
421                         rec->refs--;
422                         rec = node->data;
423                 }
424         } else if (mod) {
425                 rec = calloc(1, sizeof(*rec));
426                 rec->ino = ino;
427                 rec->extent_start = (u64)-1;
428                 rec->first_extent_gap = (u64)-1;
429                 rec->refs = 1;
430                 INIT_LIST_HEAD(&rec->backrefs);
431
432                 node = malloc(sizeof(*node));
433                 node->cache.start = ino;
434                 node->cache.size = 1;
435                 node->data = rec;
436
437                 if (ino == BTRFS_FREE_INO_OBJECTID)
438                         rec->found_link = 1;
439
440                 ret = insert_cache_extent(inode_cache, &node->cache);
441                 BUG_ON(ret);
442         }
443         return rec;
444 }
445
446 static void free_inode_rec(struct inode_record *rec)
447 {
448         struct inode_backref *backref;
449
450         if (--rec->refs > 0)
451                 return;
452
453         while (!list_empty(&rec->backrefs)) {
454                 backref = list_entry(rec->backrefs.next,
455                                      struct inode_backref, list);
456                 list_del(&backref->list);
457                 free(backref);
458         }
459         free(rec);
460 }
461
462 static int can_free_inode_rec(struct inode_record *rec)
463 {
464         if (!rec->errors && rec->checked && rec->found_inode_item &&
465             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
466                 return 1;
467         return 0;
468 }
469
470 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
471                                  struct inode_record *rec)
472 {
473         struct cache_extent *cache;
474         struct inode_backref *tmp, *backref;
475         struct ptr_node *node;
476         unsigned char filetype;
477
478         if (!rec->found_inode_item)
479                 return;
480
481         filetype = imode_to_type(rec->imode);
482         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
483                 if (backref->found_dir_item && backref->found_dir_index) {
484                         if (backref->filetype != filetype)
485                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
486                         if (!backref->errors && backref->found_inode_ref) {
487                                 list_del(&backref->list);
488                                 free(backref);
489                         }
490                 }
491         }
492
493         if (!rec->checked || rec->merging)
494                 return;
495
496         if (S_ISDIR(rec->imode)) {
497                 if (rec->found_size != rec->isize)
498                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
499                 if (rec->found_file_extent)
500                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
501         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
502                 if (rec->found_dir_item)
503                         rec->errors |= I_ERR_ODD_DIR_ITEM;
504                 if (rec->found_size != rec->nbytes)
505                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
506                 if (rec->extent_start == (u64)-1 || rec->extent_start > 0)
507                         rec->first_extent_gap = 0;
508                 if (rec->nlink > 0 && !no_holes &&
509                     (rec->extent_end < rec->isize ||
510                      rec->first_extent_gap < rec->isize))
511                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
512         }
513
514         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
515                 if (rec->found_csum_item && rec->nodatasum)
516                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
517                 if (rec->some_csum_missing && !rec->nodatasum)
518                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
519         }
520
521         BUG_ON(rec->refs != 1);
522         if (can_free_inode_rec(rec)) {
523                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
524                 node = container_of(cache, struct ptr_node, cache);
525                 BUG_ON(node->data != rec);
526                 remove_cache_extent(inode_cache, &node->cache);
527                 free(node);
528                 free_inode_rec(rec);
529         }
530 }
531
532 static int check_orphan_item(struct btrfs_root *root, u64 ino)
533 {
534         struct btrfs_path path;
535         struct btrfs_key key;
536         int ret;
537
538         key.objectid = BTRFS_ORPHAN_OBJECTID;
539         key.type = BTRFS_ORPHAN_ITEM_KEY;
540         key.offset = ino;
541
542         btrfs_init_path(&path);
543         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
544         btrfs_release_path(&path);
545         if (ret > 0)
546                 ret = -ENOENT;
547         return ret;
548 }
549
550 static int process_inode_item(struct extent_buffer *eb,
551                               int slot, struct btrfs_key *key,
552                               struct shared_node *active_node)
553 {
554         struct inode_record *rec;
555         struct btrfs_inode_item *item;
556
557         rec = active_node->current;
558         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
559         if (rec->found_inode_item) {
560                 rec->errors |= I_ERR_DUP_INODE_ITEM;
561                 return 1;
562         }
563         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
564         rec->nlink = btrfs_inode_nlink(eb, item);
565         rec->isize = btrfs_inode_size(eb, item);
566         rec->nbytes = btrfs_inode_nbytes(eb, item);
567         rec->imode = btrfs_inode_mode(eb, item);
568         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
569                 rec->nodatasum = 1;
570         rec->found_inode_item = 1;
571         if (rec->nlink == 0)
572                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
573         maybe_free_inode_rec(&active_node->inode_cache, rec);
574         return 0;
575 }
576
577 static struct inode_backref *get_inode_backref(struct inode_record *rec,
578                                                 const char *name,
579                                                 int namelen, u64 dir)
580 {
581         struct inode_backref *backref;
582
583         list_for_each_entry(backref, &rec->backrefs, list) {
584                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
585                         break;
586                 if (backref->dir != dir || backref->namelen != namelen)
587                         continue;
588                 if (memcmp(name, backref->name, namelen))
589                         continue;
590                 return backref;
591         }
592
593         backref = malloc(sizeof(*backref) + namelen + 1);
594         memset(backref, 0, sizeof(*backref));
595         backref->dir = dir;
596         backref->namelen = namelen;
597         memcpy(backref->name, name, namelen);
598         backref->name[namelen] = '\0';
599         list_add_tail(&backref->list, &rec->backrefs);
600         return backref;
601 }
602
603 static int add_inode_backref(struct cache_tree *inode_cache,
604                              u64 ino, u64 dir, u64 index,
605                              const char *name, int namelen,
606                              int filetype, int itemtype, int errors)
607 {
608         struct inode_record *rec;
609         struct inode_backref *backref;
610
611         rec = get_inode_rec(inode_cache, ino, 1);
612         backref = get_inode_backref(rec, name, namelen, dir);
613         if (errors)
614                 backref->errors |= errors;
615         if (itemtype == BTRFS_DIR_INDEX_KEY) {
616                 if (backref->found_dir_index)
617                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
618                 if (backref->found_inode_ref && backref->index != index)
619                         backref->errors |= REF_ERR_INDEX_UNMATCH;
620                 if (backref->found_dir_item && backref->filetype != filetype)
621                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
622
623                 backref->index = index;
624                 backref->filetype = filetype;
625                 backref->found_dir_index = 1;
626         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
627                 rec->found_link++;
628                 if (backref->found_dir_item)
629                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
630                 if (backref->found_dir_index && backref->filetype != filetype)
631                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
632
633                 backref->filetype = filetype;
634                 backref->found_dir_item = 1;
635         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
636                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
637                 if (backref->found_inode_ref)
638                         backref->errors |= REF_ERR_DUP_INODE_REF;
639                 if (backref->found_dir_index && backref->index != index)
640                         backref->errors |= REF_ERR_INDEX_UNMATCH;
641                 else
642                         backref->index = index;
643
644                 backref->ref_type = itemtype;
645                 backref->found_inode_ref = 1;
646         } else {
647                 BUG_ON(1);
648         }
649
650         maybe_free_inode_rec(inode_cache, rec);
651         return 0;
652 }
653
654 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
655                             struct cache_tree *dst_cache)
656 {
657         struct inode_backref *backref;
658         u32 dir_count = 0;
659
660         dst->merging = 1;
661         list_for_each_entry(backref, &src->backrefs, list) {
662                 if (backref->found_dir_index) {
663                         add_inode_backref(dst_cache, dst->ino, backref->dir,
664                                         backref->index, backref->name,
665                                         backref->namelen, backref->filetype,
666                                         BTRFS_DIR_INDEX_KEY, backref->errors);
667                 }
668                 if (backref->found_dir_item) {
669                         dir_count++;
670                         add_inode_backref(dst_cache, dst->ino,
671                                         backref->dir, 0, backref->name,
672                                         backref->namelen, backref->filetype,
673                                         BTRFS_DIR_ITEM_KEY, backref->errors);
674                 }
675                 if (backref->found_inode_ref) {
676                         add_inode_backref(dst_cache, dst->ino,
677                                         backref->dir, backref->index,
678                                         backref->name, backref->namelen, 0,
679                                         backref->ref_type, backref->errors);
680                 }
681         }
682
683         if (src->found_dir_item)
684                 dst->found_dir_item = 1;
685         if (src->found_file_extent)
686                 dst->found_file_extent = 1;
687         if (src->found_csum_item)
688                 dst->found_csum_item = 1;
689         if (src->some_csum_missing)
690                 dst->some_csum_missing = 1;
691         if (dst->first_extent_gap > src->first_extent_gap)
692                 dst->first_extent_gap = src->first_extent_gap;
693
694         BUG_ON(src->found_link < dir_count);
695         dst->found_link += src->found_link - dir_count;
696         dst->found_size += src->found_size;
697         if (src->extent_start != (u64)-1) {
698                 if (dst->extent_start == (u64)-1) {
699                         dst->extent_start = src->extent_start;
700                         dst->extent_end = src->extent_end;
701                 } else {
702                         if (dst->extent_end > src->extent_start)
703                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
704                         else if (dst->extent_end < src->extent_start &&
705                                  dst->extent_end < dst->first_extent_gap)
706                                 dst->first_extent_gap = dst->extent_end;
707                         if (dst->extent_end < src->extent_end)
708                                 dst->extent_end = src->extent_end;
709                 }
710         }
711
712         dst->errors |= src->errors;
713         if (src->found_inode_item) {
714                 if (!dst->found_inode_item) {
715                         dst->nlink = src->nlink;
716                         dst->isize = src->isize;
717                         dst->nbytes = src->nbytes;
718                         dst->imode = src->imode;
719                         dst->nodatasum = src->nodatasum;
720                         dst->found_inode_item = 1;
721                 } else {
722                         dst->errors |= I_ERR_DUP_INODE_ITEM;
723                 }
724         }
725         dst->merging = 0;
726
727         return 0;
728 }
729
730 static int splice_shared_node(struct shared_node *src_node,
731                               struct shared_node *dst_node)
732 {
733         struct cache_extent *cache;
734         struct ptr_node *node, *ins;
735         struct cache_tree *src, *dst;
736         struct inode_record *rec, *conflict;
737         u64 current_ino = 0;
738         int splice = 0;
739         int ret;
740
741         if (--src_node->refs == 0)
742                 splice = 1;
743         if (src_node->current)
744                 current_ino = src_node->current->ino;
745
746         src = &src_node->root_cache;
747         dst = &dst_node->root_cache;
748 again:
749         cache = search_cache_extent(src, 0);
750         while (cache) {
751                 node = container_of(cache, struct ptr_node, cache);
752                 rec = node->data;
753                 cache = next_cache_extent(cache);
754
755                 if (splice) {
756                         remove_cache_extent(src, &node->cache);
757                         ins = node;
758                 } else {
759                         ins = malloc(sizeof(*ins));
760                         ins->cache.start = node->cache.start;
761                         ins->cache.size = node->cache.size;
762                         ins->data = rec;
763                         rec->refs++;
764                 }
765                 ret = insert_cache_extent(dst, &ins->cache);
766                 if (ret == -EEXIST) {
767                         conflict = get_inode_rec(dst, rec->ino, 1);
768                         merge_inode_recs(rec, conflict, dst);
769                         if (rec->checked) {
770                                 conflict->checked = 1;
771                                 if (dst_node->current == conflict)
772                                         dst_node->current = NULL;
773                         }
774                         maybe_free_inode_rec(dst, conflict);
775                         free_inode_rec(rec);
776                         free(ins);
777                 } else {
778                         BUG_ON(ret);
779                 }
780         }
781
782         if (src == &src_node->root_cache) {
783                 src = &src_node->inode_cache;
784                 dst = &dst_node->inode_cache;
785                 goto again;
786         }
787
788         if (current_ino > 0 && (!dst_node->current ||
789             current_ino > dst_node->current->ino)) {
790                 if (dst_node->current) {
791                         dst_node->current->checked = 1;
792                         maybe_free_inode_rec(dst, dst_node->current);
793                 }
794                 dst_node->current = get_inode_rec(dst, current_ino, 1);
795         }
796         return 0;
797 }
798
799 static void free_inode_ptr(struct cache_extent *cache)
800 {
801         struct ptr_node *node;
802         struct inode_record *rec;
803
804         node = container_of(cache, struct ptr_node, cache);
805         rec = node->data;
806         free_inode_rec(rec);
807         free(node);
808 }
809
810 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
811
812 static struct shared_node *find_shared_node(struct cache_tree *shared,
813                                             u64 bytenr)
814 {
815         struct cache_extent *cache;
816         struct shared_node *node;
817
818         cache = lookup_cache_extent(shared, bytenr, 1);
819         if (cache) {
820                 node = container_of(cache, struct shared_node, cache);
821                 return node;
822         }
823         return NULL;
824 }
825
826 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
827 {
828         int ret;
829         struct shared_node *node;
830
831         node = calloc(1, sizeof(*node));
832         node->cache.start = bytenr;
833         node->cache.size = 1;
834         cache_tree_init(&node->root_cache);
835         cache_tree_init(&node->inode_cache);
836         node->refs = refs;
837
838         ret = insert_cache_extent(shared, &node->cache);
839         BUG_ON(ret);
840         return 0;
841 }
842
843 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
844                              struct walk_control *wc, int level)
845 {
846         struct shared_node *node;
847         struct shared_node *dest;
848
849         if (level == wc->active_node)
850                 return 0;
851
852         BUG_ON(wc->active_node <= level);
853         node = find_shared_node(&wc->shared, bytenr);
854         if (!node) {
855                 add_shared_node(&wc->shared, bytenr, refs);
856                 node = find_shared_node(&wc->shared, bytenr);
857                 wc->nodes[level] = node;
858                 wc->active_node = level;
859                 return 0;
860         }
861
862         if (wc->root_level == wc->active_node &&
863             btrfs_root_refs(&root->root_item) == 0) {
864                 if (--node->refs == 0) {
865                         free_inode_recs_tree(&node->root_cache);
866                         free_inode_recs_tree(&node->inode_cache);
867                         remove_cache_extent(&wc->shared, &node->cache);
868                         free(node);
869                 }
870                 return 1;
871         }
872
873         dest = wc->nodes[wc->active_node];
874         splice_shared_node(node, dest);
875         if (node->refs == 0) {
876                 remove_cache_extent(&wc->shared, &node->cache);
877                 free(node);
878         }
879         return 1;
880 }
881
882 static int leave_shared_node(struct btrfs_root *root,
883                              struct walk_control *wc, int level)
884 {
885         struct shared_node *node;
886         struct shared_node *dest;
887         int i;
888
889         if (level == wc->root_level)
890                 return 0;
891
892         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
893                 if (wc->nodes[i])
894                         break;
895         }
896         BUG_ON(i >= BTRFS_MAX_LEVEL);
897
898         node = wc->nodes[wc->active_node];
899         wc->nodes[wc->active_node] = NULL;
900         wc->active_node = i;
901
902         dest = wc->nodes[wc->active_node];
903         if (wc->active_node < wc->root_level ||
904             btrfs_root_refs(&root->root_item) > 0) {
905                 BUG_ON(node->refs <= 1);
906                 splice_shared_node(node, dest);
907         } else {
908                 BUG_ON(node->refs < 2);
909                 node->refs--;
910         }
911         return 0;
912 }
913
914 /*
915  * Returns:
916  * < 0 - on error
917  * 1   - if the root with id child_root_id is a child of root parent_root_id
918  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
919  *       has other root(s) as parent(s)
920  * 2   - if the root child_root_id doesn't have any parent roots
921  */
922 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
923                          u64 child_root_id)
924 {
925         struct btrfs_path path;
926         struct btrfs_key key;
927         struct extent_buffer *leaf;
928         int has_parent = 0;
929         int ret;
930
931         btrfs_init_path(&path);
932
933         key.objectid = parent_root_id;
934         key.type = BTRFS_ROOT_REF_KEY;
935         key.offset = child_root_id;
936         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
937                                 0, 0);
938         if (ret < 0)
939                 return ret;
940         btrfs_release_path(&path);
941         if (!ret)
942                 return 1;
943
944         key.objectid = child_root_id;
945         key.type = BTRFS_ROOT_BACKREF_KEY;
946         key.offset = 0;
947         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
948                                 0, 0);
949         if (ret < 0)
950                 goto out;
951
952         while (1) {
953                 leaf = path.nodes[0];
954                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
955                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
956                         if (ret)
957                                 break;
958                         leaf = path.nodes[0];
959                 }
960
961                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
962                 if (key.objectid != child_root_id ||
963                     key.type != BTRFS_ROOT_BACKREF_KEY)
964                         break;
965
966                 has_parent = 1;
967
968                 if (key.offset == parent_root_id) {
969                         btrfs_release_path(&path);
970                         return 1;
971                 }
972
973                 path.slots[0]++;
974         }
975 out:
976         btrfs_release_path(&path);
977         if (ret < 0)
978                 return ret;
979         return has_parent ? 0 : 2;
980 }
981
982 static int process_dir_item(struct btrfs_root *root,
983                             struct extent_buffer *eb,
984                             int slot, struct btrfs_key *key,
985                             struct shared_node *active_node)
986 {
987         u32 total;
988         u32 cur = 0;
989         u32 len;
990         u32 name_len;
991         u32 data_len;
992         int error;
993         int nritems = 0;
994         int filetype;
995         struct btrfs_dir_item *di;
996         struct inode_record *rec;
997         struct cache_tree *root_cache;
998         struct cache_tree *inode_cache;
999         struct btrfs_key location;
1000         char namebuf[BTRFS_NAME_LEN];
1001
1002         root_cache = &active_node->root_cache;
1003         inode_cache = &active_node->inode_cache;
1004         rec = active_node->current;
1005         rec->found_dir_item = 1;
1006
1007         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1008         total = btrfs_item_size_nr(eb, slot);
1009         while (cur < total) {
1010                 nritems++;
1011                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1012                 name_len = btrfs_dir_name_len(eb, di);
1013                 data_len = btrfs_dir_data_len(eb, di);
1014                 filetype = btrfs_dir_type(eb, di);
1015
1016                 rec->found_size += name_len;
1017                 if (name_len <= BTRFS_NAME_LEN) {
1018                         len = name_len;
1019                         error = 0;
1020                 } else {
1021                         len = BTRFS_NAME_LEN;
1022                         error = REF_ERR_NAME_TOO_LONG;
1023                 }
1024                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1025
1026                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1027                         add_inode_backref(inode_cache, location.objectid,
1028                                           key->objectid, key->offset, namebuf,
1029                                           len, filetype, key->type, error);
1030                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1031                         add_inode_backref(root_cache, location.objectid,
1032                                           key->objectid, key->offset,
1033                                           namebuf, len, filetype,
1034                                           key->type, error);
1035                 } else {
1036                         fprintf(stderr, "invalid location in dir item %u\n",
1037                                 location.type);
1038                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1039                                           key->objectid, key->offset, namebuf,
1040                                           len, filetype, key->type, error);
1041                 }
1042
1043                 len = sizeof(*di) + name_len + data_len;
1044                 di = (struct btrfs_dir_item *)((char *)di + len);
1045                 cur += len;
1046         }
1047         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1048                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1049
1050         return 0;
1051 }
1052
1053 static int process_inode_ref(struct extent_buffer *eb,
1054                              int slot, struct btrfs_key *key,
1055                              struct shared_node *active_node)
1056 {
1057         u32 total;
1058         u32 cur = 0;
1059         u32 len;
1060         u32 name_len;
1061         u64 index;
1062         int error;
1063         struct cache_tree *inode_cache;
1064         struct btrfs_inode_ref *ref;
1065         char namebuf[BTRFS_NAME_LEN];
1066
1067         inode_cache = &active_node->inode_cache;
1068
1069         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1070         total = btrfs_item_size_nr(eb, slot);
1071         while (cur < total) {
1072                 name_len = btrfs_inode_ref_name_len(eb, ref);
1073                 index = btrfs_inode_ref_index(eb, ref);
1074                 if (name_len <= BTRFS_NAME_LEN) {
1075                         len = name_len;
1076                         error = 0;
1077                 } else {
1078                         len = BTRFS_NAME_LEN;
1079                         error = REF_ERR_NAME_TOO_LONG;
1080                 }
1081                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1082                 add_inode_backref(inode_cache, key->objectid, key->offset,
1083                                   index, namebuf, len, 0, key->type, error);
1084
1085                 len = sizeof(*ref) + name_len;
1086                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1087                 cur += len;
1088         }
1089         return 0;
1090 }
1091
1092 static int process_inode_extref(struct extent_buffer *eb,
1093                                 int slot, struct btrfs_key *key,
1094                                 struct shared_node *active_node)
1095 {
1096         u32 total;
1097         u32 cur = 0;
1098         u32 len;
1099         u32 name_len;
1100         u64 index;
1101         u64 parent;
1102         int error;
1103         struct cache_tree *inode_cache;
1104         struct btrfs_inode_extref *extref;
1105         char namebuf[BTRFS_NAME_LEN];
1106
1107         inode_cache = &active_node->inode_cache;
1108
1109         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1110         total = btrfs_item_size_nr(eb, slot);
1111         while (cur < total) {
1112                 name_len = btrfs_inode_extref_name_len(eb, extref);
1113                 index = btrfs_inode_extref_index(eb, extref);
1114                 parent = btrfs_inode_extref_parent(eb, extref);
1115                 if (name_len <= BTRFS_NAME_LEN) {
1116                         len = name_len;
1117                         error = 0;
1118                 } else {
1119                         len = BTRFS_NAME_LEN;
1120                         error = REF_ERR_NAME_TOO_LONG;
1121                 }
1122                 read_extent_buffer(eb, namebuf,
1123                                    (unsigned long)(extref + 1), len);
1124                 add_inode_backref(inode_cache, key->objectid, parent,
1125                                   index, namebuf, len, 0, key->type, error);
1126
1127                 len = sizeof(*extref) + name_len;
1128                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1129                 cur += len;
1130         }
1131         return 0;
1132
1133 }
1134
1135 static int count_csum_range(struct btrfs_root *root, u64 start,
1136                             u64 len, u64 *found)
1137 {
1138         struct btrfs_key key;
1139         struct btrfs_path path;
1140         struct extent_buffer *leaf;
1141         int ret;
1142         size_t size;
1143         *found = 0;
1144         u64 csum_end;
1145         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1146
1147         btrfs_init_path(&path);
1148
1149         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1150         key.offset = start;
1151         key.type = BTRFS_EXTENT_CSUM_KEY;
1152
1153         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1154                                 &key, &path, 0, 0);
1155         if (ret < 0)
1156                 goto out;
1157         if (ret > 0 && path.slots[0] > 0) {
1158                 leaf = path.nodes[0];
1159                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1160                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1161                     key.type == BTRFS_EXTENT_CSUM_KEY)
1162                         path.slots[0]--;
1163         }
1164
1165         while (len > 0) {
1166                 leaf = path.nodes[0];
1167                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1168                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1169                         if (ret > 0)
1170                                 break;
1171                         else if (ret < 0)
1172                                 goto out;
1173                         leaf = path.nodes[0];
1174                 }
1175
1176                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1177                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1178                     key.type != BTRFS_EXTENT_CSUM_KEY)
1179                         break;
1180
1181                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1182                 if (key.offset >= start + len)
1183                         break;
1184
1185                 if (key.offset > start)
1186                         start = key.offset;
1187
1188                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1189                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1190                 if (csum_end > start) {
1191                         size = min(csum_end - start, len);
1192                         len -= size;
1193                         start += size;
1194                         *found += size;
1195                 }
1196
1197                 path.slots[0]++;
1198         }
1199 out:
1200         btrfs_release_path(&path);
1201         if (ret < 0)
1202                 return ret;
1203         return 0;
1204 }
1205
1206 static int process_file_extent(struct btrfs_root *root,
1207                                 struct extent_buffer *eb,
1208                                 int slot, struct btrfs_key *key,
1209                                 struct shared_node *active_node)
1210 {
1211         struct inode_record *rec;
1212         struct btrfs_file_extent_item *fi;
1213         u64 num_bytes = 0;
1214         u64 disk_bytenr = 0;
1215         u64 extent_offset = 0;
1216         u64 mask = root->sectorsize - 1;
1217         int extent_type;
1218         int ret;
1219
1220         rec = active_node->current;
1221         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1222         rec->found_file_extent = 1;
1223
1224         if (rec->extent_start == (u64)-1) {
1225                 rec->extent_start = key->offset;
1226                 rec->extent_end = key->offset;
1227         }
1228
1229         if (rec->extent_end > key->offset)
1230                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1231         else if (rec->extent_end < key->offset &&
1232                  rec->extent_end < rec->first_extent_gap)
1233                 rec->first_extent_gap = rec->extent_end;
1234
1235         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1236         extent_type = btrfs_file_extent_type(eb, fi);
1237
1238         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1239                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1240                 if (num_bytes == 0)
1241                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1242                 rec->found_size += num_bytes;
1243                 num_bytes = (num_bytes + mask) & ~mask;
1244         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1245                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1246                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1247                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1248                 extent_offset = btrfs_file_extent_offset(eb, fi);
1249                 if (num_bytes == 0 || (num_bytes & mask))
1250                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1251                 if (num_bytes + extent_offset >
1252                     btrfs_file_extent_ram_bytes(eb, fi))
1253                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1254                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1255                     (btrfs_file_extent_compression(eb, fi) ||
1256                      btrfs_file_extent_encryption(eb, fi) ||
1257                      btrfs_file_extent_other_encoding(eb, fi)))
1258                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1259                 if (disk_bytenr > 0)
1260                         rec->found_size += num_bytes;
1261         } else {
1262                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1263         }
1264         rec->extent_end = key->offset + num_bytes;
1265
1266         if (disk_bytenr > 0) {
1267                 u64 found;
1268                 if (btrfs_file_extent_compression(eb, fi))
1269                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1270                 else
1271                         disk_bytenr += extent_offset;
1272
1273                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1274                 if (ret < 0)
1275                         return ret;
1276                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1277                         if (found > 0)
1278                                 rec->found_csum_item = 1;
1279                         if (found < num_bytes)
1280                                 rec->some_csum_missing = 1;
1281                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1282                         if (found > 0)
1283                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1284                 }
1285         }
1286         return 0;
1287 }
1288
1289 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1290                             struct walk_control *wc)
1291 {
1292         struct btrfs_key key;
1293         u32 nritems;
1294         int i;
1295         int ret = 0;
1296         struct cache_tree *inode_cache;
1297         struct shared_node *active_node;
1298
1299         if (wc->root_level == wc->active_node &&
1300             btrfs_root_refs(&root->root_item) == 0)
1301                 return 0;
1302
1303         active_node = wc->nodes[wc->active_node];
1304         inode_cache = &active_node->inode_cache;
1305         nritems = btrfs_header_nritems(eb);
1306         for (i = 0; i < nritems; i++) {
1307                 btrfs_item_key_to_cpu(eb, &key, i);
1308
1309                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1310                         continue;
1311                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1312                         continue;
1313
1314                 if (active_node->current == NULL ||
1315                     active_node->current->ino < key.objectid) {
1316                         if (active_node->current) {
1317                                 active_node->current->checked = 1;
1318                                 maybe_free_inode_rec(inode_cache,
1319                                                      active_node->current);
1320                         }
1321                         active_node->current = get_inode_rec(inode_cache,
1322                                                              key.objectid, 1);
1323                 }
1324                 switch (key.type) {
1325                 case BTRFS_DIR_ITEM_KEY:
1326                 case BTRFS_DIR_INDEX_KEY:
1327                         ret = process_dir_item(root, eb, i, &key, active_node);
1328                         break;
1329                 case BTRFS_INODE_REF_KEY:
1330                         ret = process_inode_ref(eb, i, &key, active_node);
1331                         break;
1332                 case BTRFS_INODE_EXTREF_KEY:
1333                         ret = process_inode_extref(eb, i, &key, active_node);
1334                         break;
1335                 case BTRFS_INODE_ITEM_KEY:
1336                         ret = process_inode_item(eb, i, &key, active_node);
1337                         break;
1338                 case BTRFS_EXTENT_DATA_KEY:
1339                         ret = process_file_extent(root, eb, i, &key,
1340                                                   active_node);
1341                         break;
1342                 default:
1343                         break;
1344                 };
1345         }
1346         return ret;
1347 }
1348
1349 static void reada_walk_down(struct btrfs_root *root,
1350                             struct extent_buffer *node, int slot)
1351 {
1352         u64 bytenr;
1353         u64 ptr_gen;
1354         u32 nritems;
1355         u32 blocksize;
1356         int i;
1357         int level;
1358
1359         level = btrfs_header_level(node);
1360         if (level != 1)
1361                 return;
1362
1363         nritems = btrfs_header_nritems(node);
1364         blocksize = btrfs_level_size(root, level - 1);
1365         for (i = slot; i < nritems; i++) {
1366                 bytenr = btrfs_node_blockptr(node, i);
1367                 ptr_gen = btrfs_node_ptr_generation(node, i);
1368                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1369         }
1370 }
1371
1372 /*
1373  * Check the child node/leaf by the following condition:
1374  * 1. the first item key of the node/leaf should be the same with the one
1375  *    in parent.
1376  * 2. block in parent node should match the child node/leaf.
1377  * 3. generation of parent node and child's header should be consistent.
1378  *
1379  * Or the child node/leaf pointed by the key in parent is not valid.
1380  *
1381  * We hope to check leaf owner too, but since subvol may share leaves,
1382  * which makes leaf owner check not so strong, key check should be
1383  * sufficient enough for that case.
1384  */
1385 static int check_child_node(struct btrfs_root *root,
1386                             struct extent_buffer *parent, int slot,
1387                             struct extent_buffer *child)
1388 {
1389         struct btrfs_key parent_key;
1390         struct btrfs_key child_key;
1391         int ret = 0;
1392
1393         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1394         if (btrfs_header_level(child) == 0)
1395                 btrfs_item_key_to_cpu(child, &child_key, 0);
1396         else
1397                 btrfs_node_key_to_cpu(child, &child_key, 0);
1398
1399         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1400                 ret = -EINVAL;
1401                 fprintf(stderr,
1402                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1403                         parent_key.objectid, parent_key.type, parent_key.offset,
1404                         child_key.objectid, child_key.type, child_key.offset);
1405         }
1406         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1407                 ret = -EINVAL;
1408                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1409                         btrfs_node_blockptr(parent, slot),
1410                         btrfs_header_bytenr(child));
1411         }
1412         if (btrfs_node_ptr_generation(parent, slot) !=
1413             btrfs_header_generation(child)) {
1414                 ret = -EINVAL;
1415                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1416                         btrfs_header_generation(child),
1417                         btrfs_node_ptr_generation(parent, slot));
1418         }
1419         return ret;
1420 }
1421
1422 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1423                           struct walk_control *wc, int *level)
1424 {
1425         enum btrfs_tree_block_status status;
1426         u64 bytenr;
1427         u64 ptr_gen;
1428         struct extent_buffer *next;
1429         struct extent_buffer *cur;
1430         u32 blocksize;
1431         int ret, err = 0;
1432         u64 refs;
1433
1434         WARN_ON(*level < 0);
1435         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1436         ret = btrfs_lookup_extent_info(NULL, root,
1437                                        path->nodes[*level]->start,
1438                                        *level, 1, &refs, NULL);
1439         if (ret < 0) {
1440                 err = ret;
1441                 goto out;
1442         }
1443
1444         if (refs > 1) {
1445                 ret = enter_shared_node(root, path->nodes[*level]->start,
1446                                         refs, wc, *level);
1447                 if (ret > 0) {
1448                         err = ret;
1449                         goto out;
1450                 }
1451         }
1452
1453         while (*level >= 0) {
1454                 WARN_ON(*level < 0);
1455                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1456                 cur = path->nodes[*level];
1457
1458                 if (btrfs_header_level(cur) != *level)
1459                         WARN_ON(1);
1460
1461                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1462                         break;
1463                 if (*level == 0) {
1464                         ret = process_one_leaf(root, cur, wc);
1465                         if (ret < 0)
1466                                 err = ret;
1467                         break;
1468                 }
1469                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1470                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1471                 blocksize = btrfs_level_size(root, *level - 1);
1472                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1473                                                1, &refs, NULL);
1474                 if (ret < 0)
1475                         refs = 0;
1476
1477                 if (refs > 1) {
1478                         ret = enter_shared_node(root, bytenr, refs,
1479                                                 wc, *level - 1);
1480                         if (ret > 0) {
1481                                 path->slots[*level]++;
1482                                 continue;
1483                         }
1484                 }
1485
1486                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1487                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1488                         free_extent_buffer(next);
1489                         reada_walk_down(root, cur, path->slots[*level]);
1490                         next = read_tree_block(root, bytenr, blocksize,
1491                                                ptr_gen);
1492                         if (!next) {
1493                                 struct btrfs_key node_key;
1494
1495                                 btrfs_node_key_to_cpu(path->nodes[*level],
1496                                                       &node_key,
1497                                                       path->slots[*level]);
1498                                 btrfs_add_corrupt_extent_record(root->fs_info,
1499                                                 &node_key,
1500                                                 path->nodes[*level]->start,
1501                                                 root->leafsize, *level);
1502                                 err = -EIO;
1503                                 goto out;
1504                         }
1505                 }
1506
1507                 ret = check_child_node(root, cur, path->slots[*level], next);
1508                 if (ret) {
1509                         err = ret;
1510                         goto out;
1511                 }
1512
1513                 if (btrfs_is_leaf(next))
1514                         status = btrfs_check_leaf(root, NULL, next);
1515                 else
1516                         status = btrfs_check_node(root, NULL, next);
1517                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1518                         free_extent_buffer(next);
1519                         err = -EIO;
1520                         goto out;
1521                 }
1522
1523                 *level = *level - 1;
1524                 free_extent_buffer(path->nodes[*level]);
1525                 path->nodes[*level] = next;
1526                 path->slots[*level] = 0;
1527         }
1528 out:
1529         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1530         return err;
1531 }
1532
1533 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1534                         struct walk_control *wc, int *level)
1535 {
1536         int i;
1537         struct extent_buffer *leaf;
1538
1539         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1540                 leaf = path->nodes[i];
1541                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1542                         path->slots[i]++;
1543                         *level = i;
1544                         return 0;
1545                 } else {
1546                         free_extent_buffer(path->nodes[*level]);
1547                         path->nodes[*level] = NULL;
1548                         BUG_ON(*level > wc->active_node);
1549                         if (*level == wc->active_node)
1550                                 leave_shared_node(root, wc, *level);
1551                         *level = i + 1;
1552                 }
1553         }
1554         return 1;
1555 }
1556
1557 static int check_root_dir(struct inode_record *rec)
1558 {
1559         struct inode_backref *backref;
1560         int ret = -1;
1561
1562         if (!rec->found_inode_item || rec->errors)
1563                 goto out;
1564         if (rec->nlink != 1 || rec->found_link != 0)
1565                 goto out;
1566         if (list_empty(&rec->backrefs))
1567                 goto out;
1568         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1569         if (!backref->found_inode_ref)
1570                 goto out;
1571         if (backref->index != 0 || backref->namelen != 2 ||
1572             memcmp(backref->name, "..", 2))
1573                 goto out;
1574         if (backref->found_dir_index || backref->found_dir_item)
1575                 goto out;
1576         ret = 0;
1577 out:
1578         return ret;
1579 }
1580
1581 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1582                               struct btrfs_root *root, struct btrfs_path *path,
1583                               struct inode_record *rec)
1584 {
1585         struct btrfs_inode_item *ei;
1586         struct btrfs_key key;
1587         int ret;
1588
1589         key.objectid = rec->ino;
1590         key.type = BTRFS_INODE_ITEM_KEY;
1591         key.offset = (u64)-1;
1592
1593         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1594         if (ret < 0)
1595                 goto out;
1596         if (ret) {
1597                 if (!path->slots[0]) {
1598                         ret = -ENOENT;
1599                         goto out;
1600                 }
1601                 path->slots[0]--;
1602                 ret = 0;
1603         }
1604         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1605         if (key.objectid != rec->ino) {
1606                 ret = -ENOENT;
1607                 goto out;
1608         }
1609
1610         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1611                             struct btrfs_inode_item);
1612         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1613         btrfs_mark_buffer_dirty(path->nodes[0]);
1614         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1615         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1616                root->root_key.objectid);
1617 out:
1618         btrfs_release_path(path);
1619         return ret;
1620 }
1621
1622 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1623                                     struct btrfs_root *root,
1624                                     struct btrfs_path *path,
1625                                     struct inode_record *rec)
1626 {
1627         int ret;
1628
1629         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1630         btrfs_release_path(path);
1631         if (!ret)
1632                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1633         return ret;
1634 }
1635
1636 static int add_missing_dir_index(struct btrfs_root *root,
1637                                  struct cache_tree *inode_cache,
1638                                  struct inode_record *rec,
1639                                  struct inode_backref *backref)
1640 {
1641         struct btrfs_path *path;
1642         struct btrfs_trans_handle *trans;
1643         struct btrfs_dir_item *dir_item;
1644         struct extent_buffer *leaf;
1645         struct btrfs_key key;
1646         struct btrfs_disk_key disk_key;
1647         struct inode_record *dir_rec;
1648         unsigned long name_ptr;
1649         u32 data_size = sizeof(*dir_item) + backref->namelen;
1650         int ret;
1651
1652         path = btrfs_alloc_path();
1653         if (!path)
1654                 return -ENOMEM;
1655
1656         trans = btrfs_start_transaction(root, 1);
1657         if (IS_ERR(trans)) {
1658                 btrfs_free_path(path);
1659                 return PTR_ERR(trans);
1660         }
1661
1662         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1663                 (unsigned long long)rec->ino);
1664         key.objectid = backref->dir;
1665         key.type = BTRFS_DIR_INDEX_KEY;
1666         key.offset = backref->index;
1667
1668         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
1669         BUG_ON(ret);
1670
1671         leaf = path->nodes[0];
1672         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
1673
1674         disk_key.objectid = cpu_to_le64(rec->ino);
1675         disk_key.type = BTRFS_INODE_ITEM_KEY;
1676         disk_key.offset = 0;
1677
1678         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1679         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1680         btrfs_set_dir_data_len(leaf, dir_item, 0);
1681         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1682         name_ptr = (unsigned long)(dir_item + 1);
1683         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1684         btrfs_mark_buffer_dirty(leaf);
1685         btrfs_free_path(path);
1686         btrfs_commit_transaction(trans, root);
1687
1688         backref->found_dir_index = 1;
1689         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1690         if (!dir_rec)
1691                 return 0;
1692         dir_rec->found_size += backref->namelen;
1693         if (dir_rec->found_size == dir_rec->isize &&
1694             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1695                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1696         if (dir_rec->found_size != dir_rec->isize)
1697                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1698
1699         return 0;
1700 }
1701
1702 static int delete_dir_index(struct btrfs_root *root,
1703                             struct cache_tree *inode_cache,
1704                             struct inode_record *rec,
1705                             struct inode_backref *backref)
1706 {
1707         struct btrfs_trans_handle *trans;
1708         struct btrfs_dir_item *di;
1709         struct btrfs_path *path;
1710         int ret = 0;
1711
1712         path = btrfs_alloc_path();
1713         if (!path)
1714                 return -ENOMEM;
1715
1716         trans = btrfs_start_transaction(root, 1);
1717         if (IS_ERR(trans)) {
1718                 btrfs_free_path(path);
1719                 return PTR_ERR(trans);
1720         }
1721
1722
1723         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1724                 (unsigned long long)backref->dir,
1725                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1726                 (unsigned long long)root->objectid);
1727
1728         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
1729                                     backref->name, backref->namelen,
1730                                     backref->index, -1);
1731         if (IS_ERR(di)) {
1732                 ret = PTR_ERR(di);
1733                 btrfs_free_path(path);
1734                 btrfs_commit_transaction(trans, root);
1735                 if (ret == -ENOENT)
1736                         return 0;
1737                 return ret;
1738         }
1739
1740         if (!di)
1741                 ret = btrfs_del_item(trans, root, path);
1742         else
1743                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
1744         BUG_ON(ret);
1745         btrfs_free_path(path);
1746         btrfs_commit_transaction(trans, root);
1747         return ret;
1748 }
1749
1750 static int create_inode_item(struct btrfs_root *root,
1751                              struct inode_record *rec,
1752                              struct inode_backref *backref, int root_dir)
1753 {
1754         struct btrfs_trans_handle *trans;
1755         struct btrfs_inode_item inode_item;
1756         time_t now = time(NULL);
1757         int ret;
1758
1759         trans = btrfs_start_transaction(root, 1);
1760         if (IS_ERR(trans)) {
1761                 ret = PTR_ERR(trans);
1762                 return ret;
1763         }
1764
1765         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
1766                 "be incomplete, please check permissions and content after "
1767                 "the fsck completes.\n", (unsigned long long)root->objectid,
1768                 (unsigned long long)rec->ino);
1769
1770         memset(&inode_item, 0, sizeof(inode_item));
1771         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
1772         if (root_dir)
1773                 btrfs_set_stack_inode_nlink(&inode_item, 1);
1774         else
1775                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
1776         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
1777         if (rec->found_dir_item) {
1778                 if (rec->found_file_extent)
1779                         fprintf(stderr, "root %llu inode %llu has both a dir "
1780                                 "item and extents, unsure if it is a dir or a "
1781                                 "regular file so setting it as a directory\n",
1782                                 (unsigned long long)root->objectid,
1783                                 (unsigned long long)rec->ino);
1784                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
1785                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
1786         } else if (!rec->found_dir_item) {
1787                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
1788                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
1789         }
1790         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
1791         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
1792         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
1793         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
1794         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
1795         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
1796         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
1797         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
1798
1799         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
1800         BUG_ON(ret);
1801         btrfs_commit_transaction(trans, root);
1802         return 0;
1803 }
1804
1805 static int repair_inode_backrefs(struct btrfs_root *root,
1806                                  struct inode_record *rec,
1807                                  struct cache_tree *inode_cache,
1808                                  int delete)
1809 {
1810         struct inode_backref *tmp, *backref;
1811         u64 root_dirid = btrfs_root_dirid(&root->root_item);
1812         int ret = 0;
1813         int repaired = 0;
1814
1815         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1816                 if (!delete && rec->ino == root_dirid) {
1817                         if (!rec->found_inode_item) {
1818                                 ret = create_inode_item(root, rec, backref, 1);
1819                                 if (ret)
1820                                         break;
1821                                 repaired++;
1822                         }
1823                 }
1824
1825                 /* Index 0 for root dir's are special, don't mess with it */
1826                 if (rec->ino == root_dirid && backref->index == 0)
1827                         continue;
1828
1829                 if (delete &&
1830                     ((backref->found_dir_index && !backref->found_inode_ref) ||
1831                      (backref->found_dir_index && backref->found_inode_ref &&
1832                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
1833                         ret = delete_dir_index(root, inode_cache, rec, backref);
1834                         if (ret)
1835                                 break;
1836                         repaired++;
1837                         list_del(&backref->list);
1838                         free(backref);
1839                 }
1840
1841                 if (!delete && !backref->found_dir_index &&
1842                     backref->found_dir_item && backref->found_inode_ref) {
1843                         ret = add_missing_dir_index(root, inode_cache, rec,
1844                                                     backref);
1845                         if (ret)
1846                                 break;
1847                         repaired++;
1848                         if (backref->found_dir_item &&
1849                             backref->found_dir_index &&
1850                             backref->found_dir_index) {
1851                                 if (!backref->errors &&
1852                                     backref->found_inode_ref) {
1853                                         list_del(&backref->list);
1854                                         free(backref);
1855                                 }
1856                         }
1857                 }
1858
1859                 if (!delete && (!backref->found_dir_index &&
1860                                 !backref->found_dir_item &&
1861                                 backref->found_inode_ref)) {
1862                         struct btrfs_trans_handle *trans;
1863                         struct btrfs_key location;
1864
1865                         ret = check_dir_conflict(root, backref->name,
1866                                                  backref->namelen,
1867                                                  backref->dir,
1868                                                  backref->index);
1869                         if (ret) {
1870                                 /*
1871                                  * let nlink fixing routine to handle it,
1872                                  * which can do it better.
1873                                  */
1874                                 ret = 0;
1875                                 break;
1876                         }
1877                         location.objectid = rec->ino;
1878                         location.type = BTRFS_INODE_ITEM_KEY;
1879                         location.offset = 0;
1880
1881                         trans = btrfs_start_transaction(root, 1);
1882                         if (IS_ERR(trans)) {
1883                                 ret = PTR_ERR(trans);
1884                                 break;
1885                         }
1886                         fprintf(stderr, "adding missing dir index/item pair "
1887                                 "for inode %llu\n",
1888                                 (unsigned long long)rec->ino);
1889                         ret = btrfs_insert_dir_item(trans, root, backref->name,
1890                                                     backref->namelen,
1891                                                     backref->dir, &location,
1892                                                     imode_to_type(rec->imode),
1893                                                     backref->index);
1894                         BUG_ON(ret);
1895                         btrfs_commit_transaction(trans, root);
1896                         repaired++;
1897                 }
1898
1899                 if (!delete && (backref->found_inode_ref &&
1900                                 backref->found_dir_index &&
1901                                 backref->found_dir_item &&
1902                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
1903                                 !rec->found_inode_item)) {
1904                         ret = create_inode_item(root, rec, backref, 0);
1905                         if (ret)
1906                                 break;
1907                         repaired++;
1908                 }
1909
1910         }
1911         return ret ? ret : repaired;
1912 }
1913
1914 /*
1915  * To determine the file type for nlink/inode_item repair
1916  *
1917  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
1918  * Return -ENOENT if file type is not found.
1919  */
1920 static int find_file_type(struct inode_record *rec, u8 *type)
1921 {
1922         struct inode_backref *backref;
1923
1924         /* For inode item recovered case */
1925         if (rec->found_inode_item) {
1926                 *type = imode_to_type(rec->imode);
1927                 return 0;
1928         }
1929
1930         list_for_each_entry(backref, &rec->backrefs, list) {
1931                 if (backref->found_dir_index || backref->found_dir_item) {
1932                         *type = backref->filetype;
1933                         return 0;
1934                 }
1935         }
1936         return -ENOENT;
1937 }
1938
1939 /*
1940  * To determine the file name for nlink repair
1941  *
1942  * Return 0 if file name is found, set name and namelen.
1943  * Return -ENOENT if file name is not found.
1944  */
1945 static int find_file_name(struct inode_record *rec,
1946                           char *name, int *namelen)
1947 {
1948         struct inode_backref *backref;
1949
1950         list_for_each_entry(backref, &rec->backrefs, list) {
1951                 if (backref->found_dir_index || backref->found_dir_item ||
1952                     backref->found_inode_ref) {
1953                         memcpy(name, backref->name, backref->namelen);
1954                         *namelen = backref->namelen;
1955                         return 0;
1956                 }
1957         }
1958         return -ENOENT;
1959 }
1960
1961 /* Reset the nlink of the inode to the correct one */
1962 static int reset_nlink(struct btrfs_trans_handle *trans,
1963                        struct btrfs_root *root,
1964                        struct btrfs_path *path,
1965                        struct inode_record *rec)
1966 {
1967         struct inode_backref *backref;
1968         struct inode_backref *tmp;
1969         struct btrfs_key key;
1970         struct btrfs_inode_item *inode_item;
1971         int ret = 0;
1972
1973         /* We don't believe this either, reset it and iterate backref */
1974         rec->found_link = 0;
1975
1976         /* Remove all backref including the valid ones */
1977         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1978                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
1979                                    backref->index, backref->name,
1980                                    backref->namelen, 0);
1981                 if (ret < 0)
1982                         goto out;
1983
1984                 /* remove invalid backref, so it won't be added back */
1985                 if (!(backref->found_dir_index &&
1986                       backref->found_dir_item &&
1987                       backref->found_inode_ref)) {
1988                         list_del(&backref->list);
1989                         free(backref);
1990                 } else {
1991                         rec->found_link++;
1992                 }
1993         }
1994
1995         /* Set nlink to 0 */
1996         key.objectid = rec->ino;
1997         key.type = BTRFS_INODE_ITEM_KEY;
1998         key.offset = 0;
1999         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2000         if (ret < 0)
2001                 goto out;
2002         if (ret > 0) {
2003                 ret = -ENOENT;
2004                 goto out;
2005         }
2006         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2007                                     struct btrfs_inode_item);
2008         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2009         btrfs_mark_buffer_dirty(path->nodes[0]);
2010         btrfs_release_path(path);
2011
2012         /*
2013          * Add back valid inode_ref/dir_item/dir_index,
2014          * add_link() will handle the nlink inc, so new nlink must be correct
2015          */
2016         list_for_each_entry(backref, &rec->backrefs, list) {
2017                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2018                                      backref->name, backref->namelen,
2019                                      backref->ref_type, &backref->index, 1);
2020                 if (ret < 0)
2021                         goto out;
2022         }
2023 out:
2024         btrfs_release_path(path);
2025         return ret;
2026 }
2027
2028 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2029                                struct btrfs_root *root,
2030                                struct btrfs_path *path,
2031                                struct inode_record *rec)
2032 {
2033         char *dir_name = "lost+found";
2034         char namebuf[BTRFS_NAME_LEN] = {0};
2035         u64 lost_found_ino;
2036         u32 mode = 0700;
2037         u8 type = 0;
2038         int namelen = 0;
2039         int name_recovered = 0;
2040         int type_recovered = 0;
2041         int ret = 0;
2042
2043         /*
2044          * Get file name and type first before these invalid inode ref
2045          * are deleted by remove_all_invalid_backref()
2046          */
2047         name_recovered = !find_file_name(rec, namebuf, &namelen);
2048         type_recovered = !find_file_type(rec, &type);
2049
2050         if (!name_recovered) {
2051                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2052                        rec->ino, rec->ino);
2053                 namelen = count_digits(rec->ino);
2054                 sprintf(namebuf, "%llu", rec->ino);
2055                 name_recovered = 1;
2056         }
2057         if (!type_recovered) {
2058                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2059                        rec->ino);
2060                 type = BTRFS_FT_REG_FILE;
2061                 type_recovered = 1;
2062         }
2063
2064         ret = reset_nlink(trans, root, path, rec);
2065         if (ret < 0) {
2066                 fprintf(stderr,
2067                         "Failed to reset nlink for inode %llu: %s\n",
2068                         rec->ino, strerror(-ret));
2069                 goto out;
2070         }
2071
2072         if (rec->found_link == 0) {
2073                 lost_found_ino = root->highest_inode;
2074                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2075                         ret = -EOVERFLOW;
2076                         goto out;
2077                 }
2078                 lost_found_ino++;
2079                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2080                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2081                                   mode);
2082                 if (ret < 0) {
2083                         fprintf(stderr, "Failed to create '%s' dir: %s",
2084                                 dir_name, strerror(-ret));
2085                         goto out;
2086                 }
2087                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2088                                      namebuf, namelen, type, NULL, 1);
2089                 if (ret == -EEXIST) {
2090                         /*
2091                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2092                          */
2093                         if (namelen + count_digits(rec->ino) + 1 >
2094                             BTRFS_NAME_LEN) {
2095                                 ret = -EFBIG;
2096                                 goto out;
2097                         }
2098                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2099                                  ".%llu", rec->ino);
2100                         namelen += count_digits(rec->ino) + 1;
2101                         ret = btrfs_add_link(trans, root, rec->ino,
2102                                              lost_found_ino, namebuf,
2103                                              namelen, type, NULL, 1);
2104                 }
2105                 if (ret < 0) {
2106                         fprintf(stderr,
2107                                 "Failed to link the inode %llu to %s dir: %s",
2108                                 rec->ino, dir_name, strerror(-ret));
2109                         goto out;
2110                 }
2111                 /*
2112                  * Just increase the found_link, don't actually add the
2113                  * backref. This will make things easier and this inode
2114                  * record will be freed after the repair is done.
2115                  * So fsck will not report problem about this inode.
2116                  */
2117                 rec->found_link++;
2118                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2119                        namelen, namebuf, dir_name);
2120         }
2121         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2122         printf("Fixed the nlink of inode %llu\n", rec->ino);
2123 out:
2124         btrfs_release_path(path);
2125         return ret;
2126 }
2127
2128 /*
2129  * Check if there is any normal(reg or prealloc) file extent for given
2130  * ino.
2131  * This is used to determine the file type when neither its dir_index/item or
2132  * inode_item exists.
2133  *
2134  * This will *NOT* report error, if any error happens, just consider it does
2135  * not have any normal file extent.
2136  */
2137 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2138 {
2139         struct btrfs_path *path;
2140         struct btrfs_key key;
2141         struct btrfs_key found_key;
2142         struct btrfs_file_extent_item *fi;
2143         u8 type;
2144         int ret = 0;
2145
2146         path = btrfs_alloc_path();
2147         if (!path)
2148                 goto out;
2149         key.objectid = ino;
2150         key.type = BTRFS_EXTENT_DATA_KEY;
2151         key.offset = 0;
2152
2153         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2154         if (ret < 0) {
2155                 ret = 0;
2156                 goto out;
2157         }
2158         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2159                 ret = btrfs_next_leaf(root, path);
2160                 if (ret) {
2161                         ret = 0;
2162                         goto out;
2163                 }
2164         }
2165         while (1) {
2166                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2167                                       path->slots[0]);
2168                 if (found_key.objectid != ino ||
2169                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2170                         break;
2171                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2172                                     struct btrfs_file_extent_item);
2173                 type = btrfs_file_extent_type(path->nodes[0], fi);
2174                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2175                         ret = 1;
2176                         goto out;
2177                 }
2178         }
2179 out:
2180         btrfs_free_path(path);
2181         return ret;
2182 }
2183
2184 static u32 btrfs_type_to_imode(u8 type)
2185 {
2186         static u32 imode_by_btrfs_type[] = {
2187                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2188                 [BTRFS_FT_DIR]          = S_IFDIR,
2189                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2190                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2191                 [BTRFS_FT_FIFO]         = S_IFIFO,
2192                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2193                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2194         };
2195
2196         return imode_by_btrfs_type[(type)];
2197 }
2198
2199 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2200                                 struct btrfs_root *root,
2201                                 struct btrfs_path *path,
2202                                 struct inode_record *rec)
2203 {
2204         u8 filetype;
2205         u32 mode = 0700;
2206         int type_recovered = 0;
2207         int ret = 0;
2208
2209         /*
2210          * TODO:
2211          * 1. salvage data from existing file extent and
2212          *    punch hole to keep fi ext consistent.
2213          * 2. salvage data from extent tree
2214          */
2215         printf("Trying to rebuild inode:%llu\n", rec->ino);
2216
2217         type_recovered = !find_file_type(rec, &filetype);
2218
2219         /*
2220          * Try to determine inode type if type not found.
2221          *
2222          * For found regular file extent, it must be FILE.
2223          * For found dir_item/index, it must be DIR.
2224          *
2225          * For undetermined one, use FILE as fallback.
2226          *
2227          * TODO:
2228          * 1. If found extent belong to it in extent tree, it must be FILE
2229          *    Need extra hook in extent tree scan.
2230          * 2. If found backref(inode_index/item is already handled) to it,
2231          *    it must be DIR.
2232          *    Need new inode-inode ref structure to allow search for that.
2233          */
2234         if (!type_recovered) {
2235                 if (rec->found_file_extent &&
2236                     find_normal_file_extent(root, rec->ino)) {
2237                         type_recovered = 1;
2238                         filetype = BTRFS_FT_REG_FILE;
2239                 } else if (rec->found_dir_item) {
2240                         type_recovered = 1;
2241                         filetype = BTRFS_FT_DIR;
2242                 } else {
2243                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2244                                rec->ino);
2245                         type_recovered = 1;
2246                         filetype = BTRFS_FT_REG_FILE;
2247                 }
2248         }
2249
2250         ret = btrfs_new_inode(trans, root, rec->ino,
2251                               mode | btrfs_type_to_imode(filetype));
2252         if (ret < 0)
2253                 goto out;
2254
2255         /*
2256          * Here inode rebuild is done, we only rebuild the inode item,
2257          * don't repair the nlink(like move to lost+found).
2258          * That is the job of nlink repair.
2259          *
2260          * We just fill the record and return
2261          */
2262         rec->found_dir_item = 1;
2263         rec->imode = mode | btrfs_type_to_imode(filetype);
2264         rec->nlink = 0;
2265         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2266         /* Ensure the inode_nlinks repair function will be called */
2267         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2268 out:
2269         return ret;
2270 }
2271
2272 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2273 {
2274         struct btrfs_trans_handle *trans;
2275         struct btrfs_path *path;
2276         int ret = 0;
2277
2278         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2279                              I_ERR_NO_ORPHAN_ITEM |
2280                              I_ERR_LINK_COUNT_WRONG |
2281                              I_ERR_NO_INODE_ITEM)))
2282                 return rec->errors;
2283
2284         path = btrfs_alloc_path();
2285         if (!path)
2286                 return -ENOMEM;
2287
2288         /*
2289          * For nlink repair, it may create a dir and add link, so
2290          * 2 for parent(256)'s dir_index and dir_item
2291          * 2 for lost+found dir's inode_item and inode_ref
2292          * 1 for the new inode_ref of the file
2293          * 2 for lost+found dir's dir_index and dir_item for the file
2294          */
2295         trans = btrfs_start_transaction(root, 7);
2296         if (IS_ERR(trans)) {
2297                 btrfs_free_path(path);
2298                 return PTR_ERR(trans);
2299         }
2300
2301         if (rec->errors & I_ERR_NO_INODE_ITEM)
2302                 ret = repair_inode_no_item(trans, root, path, rec);
2303         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2304                 ret = repair_inode_isize(trans, root, path, rec);
2305         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2306                 ret = repair_inode_orphan_item(trans, root, path, rec);
2307         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2308                 ret = repair_inode_nlinks(trans, root, path, rec);
2309         btrfs_commit_transaction(trans, root);
2310         btrfs_free_path(path);
2311         return ret;
2312 }
2313
2314 static int check_inode_recs(struct btrfs_root *root,
2315                             struct cache_tree *inode_cache)
2316 {
2317         struct cache_extent *cache;
2318         struct ptr_node *node;
2319         struct inode_record *rec;
2320         struct inode_backref *backref;
2321         int stage = 0;
2322         int ret = 0;
2323         int err = 0;
2324         u64 error = 0;
2325         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2326
2327         if (btrfs_root_refs(&root->root_item) == 0) {
2328                 if (!cache_tree_empty(inode_cache))
2329                         fprintf(stderr, "warning line %d\n", __LINE__);
2330                 return 0;
2331         }
2332
2333         /*
2334          * We need to record the highest inode number for later 'lost+found'
2335          * dir creation.
2336          * We must select a ino not used/refered by any existing inode, or
2337          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2338          * this may cause 'lost+found' dir has wrong nlinks.
2339          */
2340         cache = last_cache_extent(inode_cache);
2341         if (cache) {
2342                 node = container_of(cache, struct ptr_node, cache);
2343                 rec = node->data;
2344                 if (rec->ino > root->highest_inode)
2345                         root->highest_inode = rec->ino;
2346         }
2347
2348         /*
2349          * We need to repair backrefs first because we could change some of the
2350          * errors in the inode recs.
2351          *
2352          * We also need to go through and delete invalid backrefs first and then
2353          * add the correct ones second.  We do this because we may get EEXIST
2354          * when adding back the correct index because we hadn't yet deleted the
2355          * invalid index.
2356          *
2357          * For example, if we were missing a dir index then the directories
2358          * isize would be wrong, so if we fixed the isize to what we thought it
2359          * would be and then fixed the backref we'd still have a invalid fs, so
2360          * we need to add back the dir index and then check to see if the isize
2361          * is still wrong.
2362          */
2363         while (stage < 3) {
2364                 stage++;
2365                 if (stage == 3 && !err)
2366                         break;
2367
2368                 cache = search_cache_extent(inode_cache, 0);
2369                 while (repair && cache) {
2370                         node = container_of(cache, struct ptr_node, cache);
2371                         rec = node->data;
2372                         cache = next_cache_extent(cache);
2373
2374                         /* Need to free everything up and rescan */
2375                         if (stage == 3) {
2376                                 remove_cache_extent(inode_cache, &node->cache);
2377                                 free(node);
2378                                 free_inode_rec(rec);
2379                                 continue;
2380                         }
2381
2382                         if (list_empty(&rec->backrefs))
2383                                 continue;
2384
2385                         ret = repair_inode_backrefs(root, rec, inode_cache,
2386                                                     stage == 1);
2387                         if (ret < 0) {
2388                                 err = ret;
2389                                 stage = 2;
2390                                 break;
2391                         } if (ret > 0) {
2392                                 err = -EAGAIN;
2393                         }
2394                 }
2395         }
2396         if (err)
2397                 return err;
2398
2399         rec = get_inode_rec(inode_cache, root_dirid, 0);
2400         if (rec) {
2401                 ret = check_root_dir(rec);
2402                 if (ret) {
2403                         fprintf(stderr, "root %llu root dir %llu error\n",
2404                                 (unsigned long long)root->root_key.objectid,
2405                                 (unsigned long long)root_dirid);
2406                         print_inode_error(root, rec);
2407                         error++;
2408                 }
2409         } else {
2410                 if (repair) {
2411                         struct btrfs_trans_handle *trans;
2412
2413                         trans = btrfs_start_transaction(root, 1);
2414                         if (IS_ERR(trans)) {
2415                                 err = PTR_ERR(trans);
2416                                 return err;
2417                         }
2418
2419                         fprintf(stderr,
2420                                 "root %llu missing its root dir, recreating\n",
2421                                 (unsigned long long)root->objectid);
2422
2423                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2424                         BUG_ON(ret);
2425
2426                         btrfs_commit_transaction(trans, root);
2427                         return -EAGAIN;
2428                 }
2429
2430                 fprintf(stderr, "root %llu root dir %llu not found\n",
2431                         (unsigned long long)root->root_key.objectid,
2432                         (unsigned long long)root_dirid);
2433         }
2434
2435         while (1) {
2436                 cache = search_cache_extent(inode_cache, 0);
2437                 if (!cache)
2438                         break;
2439                 node = container_of(cache, struct ptr_node, cache);
2440                 rec = node->data;
2441                 remove_cache_extent(inode_cache, &node->cache);
2442                 free(node);
2443                 if (rec->ino == root_dirid ||
2444                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2445                         free_inode_rec(rec);
2446                         continue;
2447                 }
2448
2449                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2450                         ret = check_orphan_item(root, rec->ino);
2451                         if (ret == 0)
2452                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2453                         if (can_free_inode_rec(rec)) {
2454                                 free_inode_rec(rec);
2455                                 continue;
2456                         }
2457                 }
2458
2459                 if (!rec->found_inode_item)
2460                         rec->errors |= I_ERR_NO_INODE_ITEM;
2461                 if (rec->found_link != rec->nlink)
2462                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2463                 if (repair) {
2464                         ret = try_repair_inode(root, rec);
2465                         if (ret == 0 && can_free_inode_rec(rec)) {
2466                                 free_inode_rec(rec);
2467                                 continue;
2468                         }
2469                         ret = 0;
2470                 }
2471
2472                 if (!(repair && ret == 0))
2473                         error++;
2474                 print_inode_error(root, rec);
2475                 list_for_each_entry(backref, &rec->backrefs, list) {
2476                         if (!backref->found_dir_item)
2477                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2478                         if (!backref->found_dir_index)
2479                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2480                         if (!backref->found_inode_ref)
2481                                 backref->errors |= REF_ERR_NO_INODE_REF;
2482                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2483                                 " namelen %u name %s filetype %d errors %x",
2484                                 (unsigned long long)backref->dir,
2485                                 (unsigned long long)backref->index,
2486                                 backref->namelen, backref->name,
2487                                 backref->filetype, backref->errors);
2488                         print_ref_error(backref->errors);
2489                 }
2490                 free_inode_rec(rec);
2491         }
2492         return (error > 0) ? -1 : 0;
2493 }
2494
2495 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2496                                         u64 objectid)
2497 {
2498         struct cache_extent *cache;
2499         struct root_record *rec = NULL;
2500         int ret;
2501
2502         cache = lookup_cache_extent(root_cache, objectid, 1);
2503         if (cache) {
2504                 rec = container_of(cache, struct root_record, cache);
2505         } else {
2506                 rec = calloc(1, sizeof(*rec));
2507                 rec->objectid = objectid;
2508                 INIT_LIST_HEAD(&rec->backrefs);
2509                 rec->cache.start = objectid;
2510                 rec->cache.size = 1;
2511
2512                 ret = insert_cache_extent(root_cache, &rec->cache);
2513                 BUG_ON(ret);
2514         }
2515         return rec;
2516 }
2517
2518 static struct root_backref *get_root_backref(struct root_record *rec,
2519                                              u64 ref_root, u64 dir, u64 index,
2520                                              const char *name, int namelen)
2521 {
2522         struct root_backref *backref;
2523
2524         list_for_each_entry(backref, &rec->backrefs, list) {
2525                 if (backref->ref_root != ref_root || backref->dir != dir ||
2526                     backref->namelen != namelen)
2527                         continue;
2528                 if (memcmp(name, backref->name, namelen))
2529                         continue;
2530                 return backref;
2531         }
2532
2533         backref = malloc(sizeof(*backref) + namelen + 1);
2534         memset(backref, 0, sizeof(*backref));
2535         backref->ref_root = ref_root;
2536         backref->dir = dir;
2537         backref->index = index;
2538         backref->namelen = namelen;
2539         memcpy(backref->name, name, namelen);
2540         backref->name[namelen] = '\0';
2541         list_add_tail(&backref->list, &rec->backrefs);
2542         return backref;
2543 }
2544
2545 static void free_root_record(struct cache_extent *cache)
2546 {
2547         struct root_record *rec;
2548         struct root_backref *backref;
2549
2550         rec = container_of(cache, struct root_record, cache);
2551         while (!list_empty(&rec->backrefs)) {
2552                 backref = list_entry(rec->backrefs.next,
2553                                      struct root_backref, list);
2554                 list_del(&backref->list);
2555                 free(backref);
2556         }
2557
2558         kfree(rec);
2559 }
2560
2561 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2562
2563 static int add_root_backref(struct cache_tree *root_cache,
2564                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2565                             const char *name, int namelen,
2566                             int item_type, int errors)
2567 {
2568         struct root_record *rec;
2569         struct root_backref *backref;
2570
2571         rec = get_root_rec(root_cache, root_id);
2572         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2573
2574         backref->errors |= errors;
2575
2576         if (item_type != BTRFS_DIR_ITEM_KEY) {
2577                 if (backref->found_dir_index || backref->found_back_ref ||
2578                     backref->found_forward_ref) {
2579                         if (backref->index != index)
2580                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
2581                 } else {
2582                         backref->index = index;
2583                 }
2584         }
2585
2586         if (item_type == BTRFS_DIR_ITEM_KEY) {
2587                 if (backref->found_forward_ref)
2588                         rec->found_ref++;
2589                 backref->found_dir_item = 1;
2590         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2591                 backref->found_dir_index = 1;
2592         } else if (item_type == BTRFS_ROOT_REF_KEY) {
2593                 if (backref->found_forward_ref)
2594                         backref->errors |= REF_ERR_DUP_ROOT_REF;
2595                 else if (backref->found_dir_item)
2596                         rec->found_ref++;
2597                 backref->found_forward_ref = 1;
2598         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2599                 if (backref->found_back_ref)
2600                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2601                 backref->found_back_ref = 1;
2602         } else {
2603                 BUG_ON(1);
2604         }
2605
2606         if (backref->found_forward_ref && backref->found_dir_item)
2607                 backref->reachable = 1;
2608         return 0;
2609 }
2610
2611 static int merge_root_recs(struct btrfs_root *root,
2612                            struct cache_tree *src_cache,
2613                            struct cache_tree *dst_cache)
2614 {
2615         struct cache_extent *cache;
2616         struct ptr_node *node;
2617         struct inode_record *rec;
2618         struct inode_backref *backref;
2619         int ret = 0;
2620
2621         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2622                 free_inode_recs_tree(src_cache);
2623                 return 0;
2624         }
2625
2626         while (1) {
2627                 cache = search_cache_extent(src_cache, 0);
2628                 if (!cache)
2629                         break;
2630                 node = container_of(cache, struct ptr_node, cache);
2631                 rec = node->data;
2632                 remove_cache_extent(src_cache, &node->cache);
2633                 free(node);
2634
2635                 ret = is_child_root(root, root->objectid, rec->ino);
2636                 if (ret < 0)
2637                         break;
2638                 else if (ret == 0)
2639                         goto skip;
2640
2641                 list_for_each_entry(backref, &rec->backrefs, list) {
2642                         BUG_ON(backref->found_inode_ref);
2643                         if (backref->found_dir_item)
2644                                 add_root_backref(dst_cache, rec->ino,
2645                                         root->root_key.objectid, backref->dir,
2646                                         backref->index, backref->name,
2647                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
2648                                         backref->errors);
2649                         if (backref->found_dir_index)
2650                                 add_root_backref(dst_cache, rec->ino,
2651                                         root->root_key.objectid, backref->dir,
2652                                         backref->index, backref->name,
2653                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
2654                                         backref->errors);
2655                 }
2656 skip:
2657                 free_inode_rec(rec);
2658         }
2659         if (ret < 0)
2660                 return ret;
2661         return 0;
2662 }
2663
2664 static int check_root_refs(struct btrfs_root *root,
2665                            struct cache_tree *root_cache)
2666 {
2667         struct root_record *rec;
2668         struct root_record *ref_root;
2669         struct root_backref *backref;
2670         struct cache_extent *cache;
2671         int loop = 1;
2672         int ret;
2673         int error;
2674         int errors = 0;
2675
2676         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2677         rec->found_ref = 1;
2678
2679         /* fixme: this can not detect circular references */
2680         while (loop) {
2681                 loop = 0;
2682                 cache = search_cache_extent(root_cache, 0);
2683                 while (1) {
2684                         if (!cache)
2685                                 break;
2686                         rec = container_of(cache, struct root_record, cache);
2687                         cache = next_cache_extent(cache);
2688
2689                         if (rec->found_ref == 0)
2690                                 continue;
2691
2692                         list_for_each_entry(backref, &rec->backrefs, list) {
2693                                 if (!backref->reachable)
2694                                         continue;
2695
2696                                 ref_root = get_root_rec(root_cache,
2697                                                         backref->ref_root);
2698                                 if (ref_root->found_ref > 0)
2699                                         continue;
2700
2701                                 backref->reachable = 0;
2702                                 rec->found_ref--;
2703                                 if (rec->found_ref == 0)
2704                                         loop = 1;
2705                         }
2706                 }
2707         }
2708
2709         cache = search_cache_extent(root_cache, 0);
2710         while (1) {
2711                 if (!cache)
2712                         break;
2713                 rec = container_of(cache, struct root_record, cache);
2714                 cache = next_cache_extent(cache);
2715
2716                 if (rec->found_ref == 0 &&
2717                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
2718                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
2719                         ret = check_orphan_item(root->fs_info->tree_root,
2720                                                 rec->objectid);
2721                         if (ret == 0)
2722                                 continue;
2723
2724                         /*
2725                          * If we don't have a root item then we likely just have
2726                          * a dir item in a snapshot for this root but no actual
2727                          * ref key or anything so it's meaningless.
2728                          */
2729                         if (!rec->found_root_item)
2730                                 continue;
2731                         errors++;
2732                         fprintf(stderr, "fs tree %llu not referenced\n",
2733                                 (unsigned long long)rec->objectid);
2734                 }
2735
2736                 error = 0;
2737                 if (rec->found_ref > 0 && !rec->found_root_item)
2738                         error = 1;
2739                 list_for_each_entry(backref, &rec->backrefs, list) {
2740                         if (!backref->found_dir_item)
2741                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2742                         if (!backref->found_dir_index)
2743                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2744                         if (!backref->found_back_ref)
2745                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
2746                         if (!backref->found_forward_ref)
2747                                 backref->errors |= REF_ERR_NO_ROOT_REF;
2748                         if (backref->reachable && backref->errors)
2749                                 error = 1;
2750                 }
2751                 if (!error)
2752                         continue;
2753
2754                 errors++;
2755                 fprintf(stderr, "fs tree %llu refs %u %s\n",
2756                         (unsigned long long)rec->objectid, rec->found_ref,
2757                          rec->found_root_item ? "" : "not found");
2758
2759                 list_for_each_entry(backref, &rec->backrefs, list) {
2760                         if (!backref->reachable)
2761                                 continue;
2762                         if (!backref->errors && rec->found_root_item)
2763                                 continue;
2764                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
2765                                 " index %llu namelen %u name %s errors %x\n",
2766                                 (unsigned long long)backref->ref_root,
2767                                 (unsigned long long)backref->dir,
2768                                 (unsigned long long)backref->index,
2769                                 backref->namelen, backref->name,
2770                                 backref->errors);
2771                         print_ref_error(backref->errors);
2772                 }
2773         }
2774         return errors > 0 ? 1 : 0;
2775 }
2776
2777 static int process_root_ref(struct extent_buffer *eb, int slot,
2778                             struct btrfs_key *key,
2779                             struct cache_tree *root_cache)
2780 {
2781         u64 dirid;
2782         u64 index;
2783         u32 len;
2784         u32 name_len;
2785         struct btrfs_root_ref *ref;
2786         char namebuf[BTRFS_NAME_LEN];
2787         int error;
2788
2789         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
2790
2791         dirid = btrfs_root_ref_dirid(eb, ref);
2792         index = btrfs_root_ref_sequence(eb, ref);
2793         name_len = btrfs_root_ref_name_len(eb, ref);
2794
2795         if (name_len <= BTRFS_NAME_LEN) {
2796                 len = name_len;
2797                 error = 0;
2798         } else {
2799                 len = BTRFS_NAME_LEN;
2800                 error = REF_ERR_NAME_TOO_LONG;
2801         }
2802         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
2803
2804         if (key->type == BTRFS_ROOT_REF_KEY) {
2805                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
2806                                  index, namebuf, len, key->type, error);
2807         } else {
2808                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
2809                                  index, namebuf, len, key->type, error);
2810         }
2811         return 0;
2812 }
2813
2814 static void free_corrupt_block(struct cache_extent *cache)
2815 {
2816         struct btrfs_corrupt_block *corrupt;
2817
2818         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
2819         free(corrupt);
2820 }
2821
2822 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
2823
2824 /*
2825  * Repair the btree of the given root.
2826  *
2827  * The fix is to remove the node key in corrupt_blocks cache_tree.
2828  * and rebalance the tree.
2829  * After the fix, the btree should be writeable.
2830  */
2831 static int repair_btree(struct btrfs_root *root,
2832                         struct cache_tree *corrupt_blocks)
2833 {
2834         struct btrfs_trans_handle *trans;
2835         struct btrfs_path *path;
2836         struct btrfs_corrupt_block *corrupt;
2837         struct cache_extent *cache;
2838         struct btrfs_key key;
2839         u64 offset;
2840         int level;
2841         int ret = 0;
2842
2843         if (cache_tree_empty(corrupt_blocks))
2844                 return 0;
2845
2846         path = btrfs_alloc_path();
2847         if (!path)
2848                 return -ENOMEM;
2849
2850         trans = btrfs_start_transaction(root, 1);
2851         if (IS_ERR(trans)) {
2852                 ret = PTR_ERR(trans);
2853                 fprintf(stderr, "Error starting transaction: %s\n",
2854                         strerror(-ret));
2855                 goto out_free_path;
2856         }
2857         cache = first_cache_extent(corrupt_blocks);
2858         while (cache) {
2859                 corrupt = container_of(cache, struct btrfs_corrupt_block,
2860                                        cache);
2861                 level = corrupt->level;
2862                 path->lowest_level = level;
2863                 key.objectid = corrupt->key.objectid;
2864                 key.type = corrupt->key.type;
2865                 key.offset = corrupt->key.offset;
2866
2867                 /*
2868                  * Here we don't want to do any tree balance, since it may
2869                  * cause a balance with corrupted brother leaf/node,
2870                  * so ins_len set to 0 here.
2871                  * Balance will be done after all corrupt node/leaf is deleted.
2872                  */
2873                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2874                 if (ret < 0)
2875                         goto out;
2876                 offset = btrfs_node_blockptr(path->nodes[level],
2877                                              path->slots[level]);
2878
2879                 /* Remove the ptr */
2880                 ret = btrfs_del_ptr(trans, root, path, level,
2881                                     path->slots[level]);
2882                 if (ret < 0)
2883                         goto out;
2884                 /*
2885                  * Remove the corresponding extent
2886                  * return value is not concerned.
2887                  */
2888                 btrfs_release_path(path);
2889                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
2890                                         0, root->root_key.objectid,
2891                                         level - 1, 0);
2892                 cache = next_cache_extent(cache);
2893         }
2894
2895         /* Balance the btree using btrfs_search_slot() */
2896         cache = first_cache_extent(corrupt_blocks);
2897         while (cache) {
2898                 corrupt = container_of(cache, struct btrfs_corrupt_block,
2899                                        cache);
2900                 memcpy(&key, &corrupt->key, sizeof(key));
2901                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2902                 if (ret < 0)
2903                         goto out;
2904                 /* return will always >0 since it won't find the item */
2905                 ret = 0;
2906                 btrfs_release_path(path);
2907                 cache = next_cache_extent(cache);
2908         }
2909 out:
2910         btrfs_commit_transaction(trans, root);
2911 out_free_path:
2912         btrfs_free_path(path);
2913         return ret;
2914 }
2915
2916 static void print_orphan_data_extents(struct list_head *orphan_extents,
2917                                       u64 objectid)
2918 {
2919         struct orphan_data_extent *orphan;
2920
2921         if (list_empty(orphan_extents))
2922                 return;
2923         printf("The following data extent is lost in tree %llu:\n",
2924                objectid);
2925         list_for_each_entry(orphan, orphan_extents, list) {
2926                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
2927                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
2928                        orphan->disk_len);
2929         }
2930 }
2931
2932 static void free_orphan_data_extents(struct list_head *orphan_extents)
2933 {
2934         struct orphan_data_extent *orphan;
2935
2936         while (!list_empty(orphan_extents)) {
2937                 orphan = list_entry(orphan_extents->next,
2938                                     struct orphan_data_extent, list);
2939                 list_del(&orphan->list);
2940                 free(orphan);
2941         }
2942 }
2943
2944 static int check_fs_root(struct btrfs_root *root,
2945                          struct cache_tree *root_cache,
2946                          struct walk_control *wc)
2947 {
2948         int ret = 0;
2949         int err = 0;
2950         int wret;
2951         int level;
2952         struct btrfs_path path;
2953         struct shared_node root_node;
2954         struct root_record *rec;
2955         struct btrfs_root_item *root_item = &root->root_item;
2956         struct cache_tree corrupt_blocks;
2957         enum btrfs_tree_block_status status;
2958
2959         /*
2960          * Reuse the corrupt_block cache tree to record corrupted tree block
2961          *
2962          * Unlike the usage in extent tree check, here we do it in a per
2963          * fs/subvol tree base.
2964          */
2965         cache_tree_init(&corrupt_blocks);
2966         root->fs_info->corrupt_blocks = &corrupt_blocks;
2967         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2968                 rec = get_root_rec(root_cache, root->root_key.objectid);
2969                 if (btrfs_root_refs(root_item) > 0)
2970                         rec->found_root_item = 1;
2971         }
2972
2973         btrfs_init_path(&path);
2974         memset(&root_node, 0, sizeof(root_node));
2975         cache_tree_init(&root_node.root_cache);
2976         cache_tree_init(&root_node.inode_cache);
2977
2978         level = btrfs_header_level(root->node);
2979         memset(wc->nodes, 0, sizeof(wc->nodes));
2980         wc->nodes[level] = &root_node;
2981         wc->active_node = level;
2982         wc->root_level = level;
2983
2984         /* We may not have checked the root block, lets do that now */
2985         if (btrfs_is_leaf(root->node))
2986                 status = btrfs_check_leaf(root, NULL, root->node);
2987         else
2988                 status = btrfs_check_node(root, NULL, root->node);
2989         if (status != BTRFS_TREE_BLOCK_CLEAN)
2990                 return -EIO;
2991
2992         if (btrfs_root_refs(root_item) > 0 ||
2993             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2994                 path.nodes[level] = root->node;
2995                 extent_buffer_get(root->node);
2996                 path.slots[level] = 0;
2997         } else {
2998                 struct btrfs_key key;
2999                 struct btrfs_disk_key found_key;
3000
3001                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3002                 level = root_item->drop_level;
3003                 path.lowest_level = level;
3004                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3005                 if (wret < 0)
3006                         goto skip_walking;
3007                 btrfs_node_key(path.nodes[level], &found_key,
3008                                 path.slots[level]);
3009                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3010                                         sizeof(found_key)));
3011         }
3012
3013         while (1) {
3014                 wret = walk_down_tree(root, &path, wc, &level);
3015                 if (wret < 0)
3016                         ret = wret;
3017                 if (wret != 0)
3018                         break;
3019
3020                 wret = walk_up_tree(root, &path, wc, &level);
3021                 if (wret < 0)
3022                         ret = wret;
3023                 if (wret != 0)
3024                         break;
3025         }
3026 skip_walking:
3027         btrfs_release_path(&path);
3028
3029         if (!cache_tree_empty(&corrupt_blocks)) {
3030                 struct cache_extent *cache;
3031                 struct btrfs_corrupt_block *corrupt;
3032
3033                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3034                        root->root_key.objectid);
3035                 cache = first_cache_extent(&corrupt_blocks);
3036                 while (cache) {
3037                         corrupt = container_of(cache,
3038                                                struct btrfs_corrupt_block,
3039                                                cache);
3040                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3041                                cache->start, corrupt->level,
3042                                corrupt->key.objectid, corrupt->key.type,
3043                                corrupt->key.offset);
3044                         cache = next_cache_extent(cache);
3045                 }
3046                 if (repair) {
3047                         printf("Try to repair the btree for root %llu\n",
3048                                root->root_key.objectid);
3049                         ret = repair_btree(root, &corrupt_blocks);
3050                         if (ret < 0)
3051                                 fprintf(stderr, "Failed to repair btree: %s\n",
3052                                         strerror(-ret));
3053                         if (!ret)
3054                                 printf("Btree for root %llu is fixed\n",
3055                                        root->root_key.objectid);
3056                 }
3057         }
3058
3059         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3060         if (err < 0)
3061                 ret = err;
3062
3063         if (root_node.current) {
3064                 root_node.current->checked = 1;
3065                 maybe_free_inode_rec(&root_node.inode_cache,
3066                                 root_node.current);
3067         }
3068
3069         err = check_inode_recs(root, &root_node.inode_cache);
3070         if (!ret)
3071                 ret = err;
3072
3073         free_corrupt_blocks_tree(&corrupt_blocks);
3074         root->fs_info->corrupt_blocks = NULL;
3075         print_orphan_data_extents(&root->orphan_data_extents, root->objectid);
3076         free_orphan_data_extents(&root->orphan_data_extents);
3077         return ret;
3078 }
3079
3080 static int fs_root_objectid(u64 objectid)
3081 {
3082         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3083             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3084                 return 1;
3085         return is_fstree(objectid);
3086 }
3087
3088 static int check_fs_roots(struct btrfs_root *root,
3089                           struct cache_tree *root_cache)
3090 {
3091         struct btrfs_path path;
3092         struct btrfs_key key;
3093         struct walk_control wc;
3094         struct extent_buffer *leaf, *tree_node;
3095         struct btrfs_root *tmp_root;
3096         struct btrfs_root *tree_root = root->fs_info->tree_root;
3097         int ret;
3098         int err = 0;
3099
3100         /*
3101          * Just in case we made any changes to the extent tree that weren't
3102          * reflected into the free space cache yet.
3103          */
3104         if (repair)
3105                 reset_cached_block_groups(root->fs_info);
3106         memset(&wc, 0, sizeof(wc));
3107         cache_tree_init(&wc.shared);
3108         btrfs_init_path(&path);
3109
3110 again:
3111         key.offset = 0;
3112         key.objectid = 0;
3113         key.type = BTRFS_ROOT_ITEM_KEY;
3114         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3115         if (ret < 0) {
3116                 err = 1;
3117                 goto out;
3118         }
3119         tree_node = tree_root->node;
3120         while (1) {
3121                 if (tree_node != tree_root->node) {
3122                         free_root_recs_tree(root_cache);
3123                         btrfs_release_path(&path);
3124                         goto again;
3125                 }
3126                 leaf = path.nodes[0];
3127                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3128                         ret = btrfs_next_leaf(tree_root, &path);
3129                         if (ret) {
3130                                 if (ret < 0)
3131                                         err = 1;
3132                                 break;
3133                         }
3134                         leaf = path.nodes[0];
3135                 }
3136                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3137                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3138                     fs_root_objectid(key.objectid)) {
3139                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3140                                 tmp_root = btrfs_read_fs_root_no_cache(
3141                                                 root->fs_info, &key);
3142                         } else {
3143                                 key.offset = (u64)-1;
3144                                 tmp_root = btrfs_read_fs_root(
3145                                                 root->fs_info, &key);
3146                         }
3147                         if (IS_ERR(tmp_root)) {
3148                                 err = 1;
3149                                 goto next;
3150                         }
3151                         ret = check_fs_root(tmp_root, root_cache, &wc);
3152                         if (ret == -EAGAIN) {
3153                                 free_root_recs_tree(root_cache);
3154                                 btrfs_release_path(&path);
3155                                 goto again;
3156                         }
3157                         if (ret)
3158                                 err = 1;
3159                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3160                                 btrfs_free_fs_root(tmp_root);
3161                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3162                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3163                         process_root_ref(leaf, path.slots[0], &key,
3164                                          root_cache);
3165                 }
3166 next:
3167                 path.slots[0]++;
3168         }
3169 out:
3170         btrfs_release_path(&path);
3171         if (err)
3172                 free_extent_cache_tree(&wc.shared);
3173         if (!cache_tree_empty(&wc.shared))
3174                 fprintf(stderr, "warning line %d\n", __LINE__);
3175
3176         return err;
3177 }
3178
3179 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3180 {
3181         struct list_head *cur = rec->backrefs.next;
3182         struct extent_backref *back;
3183         struct tree_backref *tback;
3184         struct data_backref *dback;
3185         u64 found = 0;
3186         int err = 0;
3187
3188         while(cur != &rec->backrefs) {
3189                 back = list_entry(cur, struct extent_backref, list);
3190                 cur = cur->next;
3191                 if (!back->found_extent_tree) {
3192                         err = 1;
3193                         if (!print_errs)
3194                                 goto out;
3195                         if (back->is_data) {
3196                                 dback = (struct data_backref *)back;
3197                                 fprintf(stderr, "Backref %llu %s %llu"
3198                                         " owner %llu offset %llu num_refs %lu"
3199                                         " not found in extent tree\n",
3200                                         (unsigned long long)rec->start,
3201                                         back->full_backref ?
3202                                         "parent" : "root",
3203                                         back->full_backref ?
3204                                         (unsigned long long)dback->parent:
3205                                         (unsigned long long)dback->root,
3206                                         (unsigned long long)dback->owner,
3207                                         (unsigned long long)dback->offset,
3208                                         (unsigned long)dback->num_refs);
3209                         } else {
3210                                 tback = (struct tree_backref *)back;
3211                                 fprintf(stderr, "Backref %llu parent %llu"
3212                                         " root %llu not found in extent tree\n",
3213                                         (unsigned long long)rec->start,
3214                                         (unsigned long long)tback->parent,
3215                                         (unsigned long long)tback->root);
3216                         }
3217                 }
3218                 if (!back->is_data && !back->found_ref) {
3219                         err = 1;
3220                         if (!print_errs)
3221                                 goto out;
3222                         tback = (struct tree_backref *)back;
3223                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3224                                 (unsigned long long)rec->start,
3225                                 back->full_backref ? "parent" : "root",
3226                                 back->full_backref ?
3227                                 (unsigned long long)tback->parent :
3228                                 (unsigned long long)tback->root, back);
3229                 }
3230                 if (back->is_data) {
3231                         dback = (struct data_backref *)back;
3232                         if (dback->found_ref != dback->num_refs) {
3233                                 err = 1;
3234                                 if (!print_errs)
3235                                         goto out;
3236                                 fprintf(stderr, "Incorrect local backref count"
3237                                         " on %llu %s %llu owner %llu"
3238                                         " offset %llu found %u wanted %u back %p\n",
3239                                         (unsigned long long)rec->start,
3240                                         back->full_backref ?
3241                                         "parent" : "root",
3242                                         back->full_backref ?
3243                                         (unsigned long long)dback->parent:
3244                                         (unsigned long long)dback->root,
3245                                         (unsigned long long)dback->owner,
3246                                         (unsigned long long)dback->offset,
3247                                         dback->found_ref, dback->num_refs, back);
3248                         }
3249                         if (dback->disk_bytenr != rec->start) {
3250                                 err = 1;
3251                                 if (!print_errs)
3252                                         goto out;
3253                                 fprintf(stderr, "Backref disk bytenr does not"
3254                                         " match extent record, bytenr=%llu, "
3255                                         "ref bytenr=%llu\n",
3256                                         (unsigned long long)rec->start,
3257                                         (unsigned long long)dback->disk_bytenr);
3258                         }
3259
3260                         if (dback->bytes != rec->nr) {
3261                                 err = 1;
3262                                 if (!print_errs)
3263                                         goto out;
3264                                 fprintf(stderr, "Backref bytes do not match "
3265                                         "extent backref, bytenr=%llu, ref "
3266                                         "bytes=%llu, backref bytes=%llu\n",
3267                                         (unsigned long long)rec->start,
3268                                         (unsigned long long)rec->nr,
3269                                         (unsigned long long)dback->bytes);
3270                         }
3271                 }
3272                 if (!back->is_data) {
3273                         found += 1;
3274                 } else {
3275                         dback = (struct data_backref *)back;
3276                         found += dback->found_ref;
3277                 }
3278         }
3279         if (found != rec->refs) {
3280                 err = 1;
3281                 if (!print_errs)
3282                         goto out;
3283                 fprintf(stderr, "Incorrect global backref count "
3284                         "on %llu found %llu wanted %llu\n",
3285                         (unsigned long long)rec->start,
3286                         (unsigned long long)found,
3287                         (unsigned long long)rec->refs);
3288         }
3289 out:
3290         return err;
3291 }
3292
3293 static int free_all_extent_backrefs(struct extent_record *rec)
3294 {
3295         struct extent_backref *back;
3296         struct list_head *cur;
3297         while (!list_empty(&rec->backrefs)) {
3298                 cur = rec->backrefs.next;
3299                 back = list_entry(cur, struct extent_backref, list);
3300                 list_del(cur);
3301                 free(back);
3302         }
3303         return 0;
3304 }
3305
3306 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3307                                      struct cache_tree *extent_cache)
3308 {
3309         struct cache_extent *cache;
3310         struct extent_record *rec;
3311
3312         while (1) {
3313                 cache = first_cache_extent(extent_cache);
3314                 if (!cache)
3315                         break;
3316                 rec = container_of(cache, struct extent_record, cache);
3317                 btrfs_unpin_extent(fs_info, rec->start, rec->max_size);
3318                 remove_cache_extent(extent_cache, cache);
3319                 free_all_extent_backrefs(rec);
3320                 free(rec);
3321         }
3322 }
3323
3324 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3325                                  struct extent_record *rec)
3326 {
3327         if (rec->content_checked && rec->owner_ref_checked &&
3328             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3329             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) {
3330                 remove_cache_extent(extent_cache, &rec->cache);
3331                 free_all_extent_backrefs(rec);
3332                 list_del_init(&rec->list);
3333                 free(rec);
3334         }
3335         return 0;
3336 }
3337
3338 static int check_owner_ref(struct btrfs_root *root,
3339                             struct extent_record *rec,
3340                             struct extent_buffer *buf)
3341 {
3342         struct extent_backref *node;
3343         struct tree_backref *back;
3344         struct btrfs_root *ref_root;
3345         struct btrfs_key key;
3346         struct btrfs_path path;
3347         struct extent_buffer *parent;
3348         int level;
3349         int found = 0;
3350         int ret;
3351
3352         list_for_each_entry(node, &rec->backrefs, list) {
3353                 if (node->is_data)
3354                         continue;
3355                 if (!node->found_ref)
3356                         continue;
3357                 if (node->full_backref)
3358                         continue;
3359                 back = (struct tree_backref *)node;
3360                 if (btrfs_header_owner(buf) == back->root)
3361                         return 0;
3362         }
3363         BUG_ON(rec->is_root);
3364
3365         /* try to find the block by search corresponding fs tree */
3366         key.objectid = btrfs_header_owner(buf);
3367         key.type = BTRFS_ROOT_ITEM_KEY;
3368         key.offset = (u64)-1;
3369
3370         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3371         if (IS_ERR(ref_root))
3372                 return 1;
3373
3374         level = btrfs_header_level(buf);
3375         if (level == 0)
3376                 btrfs_item_key_to_cpu(buf, &key, 0);
3377         else
3378                 btrfs_node_key_to_cpu(buf, &key, 0);
3379
3380         btrfs_init_path(&path);
3381         path.lowest_level = level + 1;
3382         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3383         if (ret < 0)
3384                 return 0;
3385
3386         parent = path.nodes[level + 1];
3387         if (parent && buf->start == btrfs_node_blockptr(parent,
3388                                                         path.slots[level + 1]))
3389                 found = 1;
3390
3391         btrfs_release_path(&path);
3392         return found ? 0 : 1;
3393 }
3394
3395 static int is_extent_tree_record(struct extent_record *rec)
3396 {
3397         struct list_head *cur = rec->backrefs.next;
3398         struct extent_backref *node;
3399         struct tree_backref *back;
3400         int is_extent = 0;
3401
3402         while(cur != &rec->backrefs) {
3403                 node = list_entry(cur, struct extent_backref, list);
3404                 cur = cur->next;
3405                 if (node->is_data)
3406                         return 0;
3407                 back = (struct tree_backref *)node;
3408                 if (node->full_backref)
3409                         return 0;
3410                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3411                         is_extent = 1;
3412         }
3413         return is_extent;
3414 }
3415
3416
3417 static int record_bad_block_io(struct btrfs_fs_info *info,
3418                                struct cache_tree *extent_cache,
3419                                u64 start, u64 len)
3420 {
3421         struct extent_record *rec;
3422         struct cache_extent *cache;
3423         struct btrfs_key key;
3424
3425         cache = lookup_cache_extent(extent_cache, start, len);
3426         if (!cache)
3427                 return 0;
3428
3429         rec = container_of(cache, struct extent_record, cache);
3430         if (!is_extent_tree_record(rec))
3431                 return 0;
3432
3433         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3434         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3435 }
3436
3437 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3438                        struct extent_buffer *buf, int slot)
3439 {
3440         if (btrfs_header_level(buf)) {
3441                 struct btrfs_key_ptr ptr1, ptr2;
3442
3443                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3444                                    sizeof(struct btrfs_key_ptr));
3445                 read_extent_buffer(buf, &ptr2,
3446                                    btrfs_node_key_ptr_offset(slot + 1),
3447                                    sizeof(struct btrfs_key_ptr));
3448                 write_extent_buffer(buf, &ptr1,
3449                                     btrfs_node_key_ptr_offset(slot + 1),
3450                                     sizeof(struct btrfs_key_ptr));
3451                 write_extent_buffer(buf, &ptr2,
3452                                     btrfs_node_key_ptr_offset(slot),
3453                                     sizeof(struct btrfs_key_ptr));
3454                 if (slot == 0) {
3455                         struct btrfs_disk_key key;
3456                         btrfs_node_key(buf, &key, 0);
3457                         btrfs_fixup_low_keys(root, path, &key,
3458                                              btrfs_header_level(buf) + 1);
3459                 }
3460         } else {
3461                 struct btrfs_item *item1, *item2;
3462                 struct btrfs_key k1, k2;
3463                 char *item1_data, *item2_data;
3464                 u32 item1_offset, item2_offset, item1_size, item2_size;
3465
3466                 item1 = btrfs_item_nr(slot);
3467                 item2 = btrfs_item_nr(slot + 1);
3468                 btrfs_item_key_to_cpu(buf, &k1, slot);
3469                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3470                 item1_offset = btrfs_item_offset(buf, item1);
3471                 item2_offset = btrfs_item_offset(buf, item2);
3472                 item1_size = btrfs_item_size(buf, item1);
3473                 item2_size = btrfs_item_size(buf, item2);
3474
3475                 item1_data = malloc(item1_size);
3476                 if (!item1_data)
3477                         return -ENOMEM;
3478                 item2_data = malloc(item2_size);
3479                 if (!item2_data) {
3480                         free(item1_data);
3481                         return -ENOMEM;
3482                 }
3483
3484                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3485                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3486
3487                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3488                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3489                 free(item1_data);
3490                 free(item2_data);
3491
3492                 btrfs_set_item_offset(buf, item1, item2_offset);
3493                 btrfs_set_item_offset(buf, item2, item1_offset);
3494                 btrfs_set_item_size(buf, item1, item2_size);
3495                 btrfs_set_item_size(buf, item2, item1_size);
3496
3497                 path->slots[0] = slot;
3498                 btrfs_set_item_key_unsafe(root, path, &k2);
3499                 path->slots[0] = slot + 1;
3500                 btrfs_set_item_key_unsafe(root, path, &k1);
3501         }
3502         return 0;
3503 }
3504
3505 static int fix_key_order(struct btrfs_trans_handle *trans,
3506                          struct btrfs_root *root,
3507                          struct btrfs_path *path)
3508 {
3509         struct extent_buffer *buf;
3510         struct btrfs_key k1, k2;
3511         int i;
3512         int level = path->lowest_level;
3513         int ret = -EIO;
3514
3515         buf = path->nodes[level];
3516         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3517                 if (level) {
3518                         btrfs_node_key_to_cpu(buf, &k1, i);
3519                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3520                 } else {
3521                         btrfs_item_key_to_cpu(buf, &k1, i);
3522                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3523                 }
3524                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3525                         continue;
3526                 ret = swap_values(root, path, buf, i);
3527                 if (ret)
3528                         break;
3529                 btrfs_mark_buffer_dirty(buf);
3530                 i = 0;
3531         }
3532         return ret;
3533 }
3534
3535 static int delete_bogus_item(struct btrfs_trans_handle *trans,
3536                              struct btrfs_root *root,
3537                              struct btrfs_path *path,
3538                              struct extent_buffer *buf, int slot)
3539 {
3540         struct btrfs_key key;
3541         int nritems = btrfs_header_nritems(buf);
3542
3543         btrfs_item_key_to_cpu(buf, &key, slot);
3544
3545         /* These are all the keys we can deal with missing. */
3546         if (key.type != BTRFS_DIR_INDEX_KEY &&
3547             key.type != BTRFS_EXTENT_ITEM_KEY &&
3548             key.type != BTRFS_METADATA_ITEM_KEY &&
3549             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3550             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3551                 return -1;
3552
3553         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3554                (unsigned long long)key.objectid, key.type,
3555                (unsigned long long)key.offset, slot, buf->start);
3556         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3557                               btrfs_item_nr_offset(slot + 1),
3558                               sizeof(struct btrfs_item) *
3559                               (nritems - slot - 1));
3560         btrfs_set_header_nritems(buf, nritems - 1);
3561         if (slot == 0) {
3562                 struct btrfs_disk_key disk_key;
3563
3564                 btrfs_item_key(buf, &disk_key, 0);
3565                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3566         }
3567         btrfs_mark_buffer_dirty(buf);
3568         return 0;
3569 }
3570
3571 static int fix_item_offset(struct btrfs_trans_handle *trans,
3572                            struct btrfs_root *root,
3573                            struct btrfs_path *path)
3574 {
3575         struct extent_buffer *buf;
3576         int i;
3577         int ret = 0;
3578
3579         /* We should only get this for leaves */
3580         BUG_ON(path->lowest_level);
3581         buf = path->nodes[0];
3582 again:
3583         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3584                 unsigned int shift = 0, offset;
3585
3586                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3587                     BTRFS_LEAF_DATA_SIZE(root)) {
3588                         if (btrfs_item_end_nr(buf, i) >
3589                             BTRFS_LEAF_DATA_SIZE(root)) {
3590                                 ret = delete_bogus_item(trans, root, path,
3591                                                         buf, i);
3592                                 if (!ret)
3593                                         goto again;
3594                                 fprintf(stderr, "item is off the end of the "
3595                                         "leaf, can't fix\n");
3596                                 ret = -EIO;
3597                                 break;
3598                         }
3599                         shift = BTRFS_LEAF_DATA_SIZE(root) -
3600                                 btrfs_item_end_nr(buf, i);
3601                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3602                            btrfs_item_offset_nr(buf, i - 1)) {
3603                         if (btrfs_item_end_nr(buf, i) >
3604                             btrfs_item_offset_nr(buf, i - 1)) {
3605                                 ret = delete_bogus_item(trans, root, path,
3606                                                         buf, i);
3607                                 if (!ret)
3608                                         goto again;
3609                                 fprintf(stderr, "items overlap, can't fix\n");
3610                                 ret = -EIO;
3611                                 break;
3612                         }
3613                         shift = btrfs_item_offset_nr(buf, i - 1) -
3614                                 btrfs_item_end_nr(buf, i);
3615                 }
3616                 if (!shift)
3617                         continue;
3618
3619                 printf("Shifting item nr %d by %u bytes in block %llu\n",
3620                        i, shift, (unsigned long long)buf->start);
3621                 offset = btrfs_item_offset_nr(buf, i);
3622                 memmove_extent_buffer(buf,
3623                                       btrfs_leaf_data(buf) + offset + shift,
3624                                       btrfs_leaf_data(buf) + offset,
3625                                       btrfs_item_size_nr(buf, i));
3626                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3627                                       offset + shift);
3628                 btrfs_mark_buffer_dirty(buf);
3629         }
3630
3631         /*
3632          * We may have moved things, in which case we want to exit so we don't
3633          * write those changes out.  Once we have proper abort functionality in
3634          * progs this can be changed to something nicer.
3635          */
3636         BUG_ON(ret);
3637         return ret;
3638 }
3639
3640 /*
3641  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
3642  * then just return -EIO.
3643  */
3644 static int try_to_fix_bad_block(struct btrfs_trans_handle *trans,
3645                                 struct btrfs_root *root,
3646                                 struct extent_buffer *buf,
3647                                 enum btrfs_tree_block_status status)
3648 {
3649         struct ulist *roots;
3650         struct ulist_node *node;
3651         struct btrfs_root *search_root;
3652         struct btrfs_path *path;
3653         struct ulist_iterator iter;
3654         struct btrfs_key root_key, key;
3655         int ret;
3656
3657         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
3658             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3659                 return -EIO;
3660
3661         path = btrfs_alloc_path();
3662         if (!path)
3663                 return -EIO;
3664
3665         ret = btrfs_find_all_roots(trans, root->fs_info, buf->start,
3666                                    0, &roots);
3667         if (ret) {
3668                 btrfs_free_path(path);
3669                 return -EIO;
3670         }
3671
3672         ULIST_ITER_INIT(&iter);
3673         while ((node = ulist_next(roots, &iter))) {
3674                 root_key.objectid = node->val;
3675                 root_key.type = BTRFS_ROOT_ITEM_KEY;
3676                 root_key.offset = (u64)-1;
3677
3678                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
3679                 if (IS_ERR(root)) {
3680                         ret = -EIO;
3681                         break;
3682                 }
3683
3684                 record_root_in_trans(trans, search_root);
3685
3686                 path->lowest_level = btrfs_header_level(buf);
3687                 path->skip_check_block = 1;
3688                 if (path->lowest_level)
3689                         btrfs_node_key_to_cpu(buf, &key, 0);
3690                 else
3691                         btrfs_item_key_to_cpu(buf, &key, 0);
3692                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
3693                 if (ret) {
3694                         ret = -EIO;
3695                         break;
3696                 }
3697                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
3698                         ret = fix_key_order(trans, search_root, path);
3699                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
3700                         ret = fix_item_offset(trans, search_root, path);
3701                 if (ret)
3702                         break;
3703                 btrfs_release_path(path);
3704         }
3705         ulist_free(roots);
3706         btrfs_free_path(path);
3707         return ret;
3708 }
3709
3710 static int check_block(struct btrfs_trans_handle *trans,
3711                        struct btrfs_root *root,
3712                        struct cache_tree *extent_cache,
3713                        struct extent_buffer *buf, u64 flags)
3714 {
3715         struct extent_record *rec;
3716         struct cache_extent *cache;
3717         struct btrfs_key key;
3718         enum btrfs_tree_block_status status;
3719         int ret = 0;
3720         int level;
3721
3722         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
3723         if (!cache)
3724                 return 1;
3725         rec = container_of(cache, struct extent_record, cache);
3726         rec->generation = btrfs_header_generation(buf);
3727
3728         level = btrfs_header_level(buf);
3729         if (btrfs_header_nritems(buf) > 0) {
3730
3731                 if (level == 0)
3732                         btrfs_item_key_to_cpu(buf, &key, 0);
3733                 else
3734                         btrfs_node_key_to_cpu(buf, &key, 0);
3735
3736                 rec->info_objectid = key.objectid;
3737         }
3738         rec->info_level = level;
3739
3740         if (btrfs_is_leaf(buf))
3741                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
3742         else
3743                 status = btrfs_check_node(root, &rec->parent_key, buf);
3744
3745         if (status != BTRFS_TREE_BLOCK_CLEAN) {
3746                 if (repair)
3747                         status = try_to_fix_bad_block(trans, root, buf,
3748                                                       status);
3749                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
3750                         ret = -EIO;
3751                         fprintf(stderr, "bad block %llu\n",
3752                                 (unsigned long long)buf->start);
3753                 } else {
3754                         /*
3755                          * Signal to callers we need to start the scan over
3756                          * again since we'll have cow'ed blocks.
3757                          */
3758                         ret = -EAGAIN;
3759                 }
3760         } else {
3761                 rec->content_checked = 1;
3762                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3763                         rec->owner_ref_checked = 1;
3764                 else {
3765                         ret = check_owner_ref(root, rec, buf);
3766                         if (!ret)
3767                                 rec->owner_ref_checked = 1;
3768                 }
3769         }
3770         if (!ret)
3771                 maybe_free_extent_rec(extent_cache, rec);
3772         return ret;
3773 }
3774
3775 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3776                                                 u64 parent, u64 root)
3777 {
3778         struct list_head *cur = rec->backrefs.next;
3779         struct extent_backref *node;
3780         struct tree_backref *back;
3781
3782         while(cur != &rec->backrefs) {
3783                 node = list_entry(cur, struct extent_backref, list);
3784                 cur = cur->next;
3785                 if (node->is_data)
3786                         continue;
3787                 back = (struct tree_backref *)node;
3788                 if (parent > 0) {
3789                         if (!node->full_backref)
3790                                 continue;
3791                         if (parent == back->parent)
3792                                 return back;
3793                 } else {
3794                         if (node->full_backref)
3795                                 continue;
3796                         if (back->root == root)
3797                                 return back;
3798                 }
3799         }
3800         return NULL;
3801 }
3802
3803 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
3804                                                 u64 parent, u64 root)
3805 {
3806         struct tree_backref *ref = malloc(sizeof(*ref));
3807         memset(&ref->node, 0, sizeof(ref->node));
3808         if (parent > 0) {
3809                 ref->parent = parent;
3810                 ref->node.full_backref = 1;
3811         } else {
3812                 ref->root = root;
3813                 ref->node.full_backref = 0;
3814         }
3815         list_add_tail(&ref->node.list, &rec->backrefs);
3816
3817         return ref;
3818 }
3819
3820 static struct data_backref *find_data_backref(struct extent_record *rec,
3821                                                 u64 parent, u64 root,
3822                                                 u64 owner, u64 offset,
3823                                                 int found_ref,
3824                                                 u64 disk_bytenr, u64 bytes)
3825 {
3826         struct list_head *cur = rec->backrefs.next;
3827         struct extent_backref *node;
3828         struct data_backref *back;
3829
3830         while(cur != &rec->backrefs) {
3831                 node = list_entry(cur, struct extent_backref, list);
3832                 cur = cur->next;
3833                 if (!node->is_data)
3834                         continue;
3835                 back = (struct data_backref *)node;
3836                 if (parent > 0) {
3837                         if (!node->full_backref)
3838                                 continue;
3839                         if (parent == back->parent)
3840                                 return back;
3841                 } else {
3842                         if (node->full_backref)
3843                                 continue;
3844                         if (back->root == root && back->owner == owner &&
3845                             back->offset == offset) {
3846                                 if (found_ref && node->found_ref &&
3847                                     (back->bytes != bytes ||
3848                                     back->disk_bytenr != disk_bytenr))
3849                                         continue;
3850                                 return back;
3851                         }
3852                 }
3853         }
3854         return NULL;
3855 }
3856
3857 static struct data_backref *alloc_data_backref(struct extent_record *rec,
3858                                                 u64 parent, u64 root,
3859                                                 u64 owner, u64 offset,
3860                                                 u64 max_size)
3861 {
3862         struct data_backref *ref = malloc(sizeof(*ref));
3863         memset(&ref->node, 0, sizeof(ref->node));
3864         ref->node.is_data = 1;
3865
3866         if (parent > 0) {
3867                 ref->parent = parent;
3868                 ref->owner = 0;
3869                 ref->offset = 0;
3870                 ref->node.full_backref = 1;
3871         } else {
3872                 ref->root = root;
3873                 ref->owner = owner;
3874                 ref->offset = offset;
3875                 ref->node.full_backref = 0;
3876         }
3877         ref->bytes = max_size;
3878         ref->found_ref = 0;
3879         ref->num_refs = 0;
3880         list_add_tail(&ref->node.list, &rec->backrefs);
3881         if (max_size > rec->max_size)
3882                 rec->max_size = max_size;
3883         return ref;
3884 }
3885
3886 static int add_extent_rec(struct cache_tree *extent_cache,
3887                           struct btrfs_key *parent_key, u64 parent_gen,
3888                           u64 start, u64 nr, u64 extent_item_refs,
3889                           int is_root, int inc_ref, int set_checked,
3890                           int metadata, int extent_rec, u64 max_size)
3891 {
3892         struct extent_record *rec;
3893         struct cache_extent *cache;
3894         int ret = 0;
3895         int dup = 0;
3896
3897         cache = lookup_cache_extent(extent_cache, start, nr);
3898         if (cache) {
3899                 rec = container_of(cache, struct extent_record, cache);
3900                 if (inc_ref)
3901                         rec->refs++;
3902                 if (rec->nr == 1)
3903                         rec->nr = max(nr, max_size);
3904
3905                 /*
3906                  * We need to make sure to reset nr to whatever the extent
3907                  * record says was the real size, this way we can compare it to
3908                  * the backrefs.
3909                  */
3910                 if (extent_rec) {
3911                         if (start != rec->start || rec->found_rec) {
3912                                 struct extent_record *tmp;
3913
3914                                 dup = 1;
3915                                 if (list_empty(&rec->list))
3916                                         list_add_tail(&rec->list,
3917                                                       &duplicate_extents);
3918
3919                                 /*
3920                                  * We have to do this song and dance in case we
3921                                  * find an extent record that falls inside of
3922                                  * our current extent record but does not have
3923                                  * the same objectid.
3924                                  */
3925                                 tmp = malloc(sizeof(*tmp));
3926                                 if (!tmp)
3927                                         return -ENOMEM;
3928                                 tmp->start = start;
3929                                 tmp->max_size = max_size;
3930                                 tmp->nr = nr;
3931                                 tmp->found_rec = 1;
3932                                 tmp->metadata = metadata;
3933                                 tmp->extent_item_refs = extent_item_refs;
3934                                 INIT_LIST_HEAD(&tmp->list);
3935                                 list_add_tail(&tmp->list, &rec->dups);
3936                                 rec->num_duplicates++;
3937                         } else {
3938                                 rec->nr = nr;
3939                                 rec->found_rec = 1;
3940                         }
3941                 }
3942
3943                 if (extent_item_refs && !dup) {
3944                         if (rec->extent_item_refs) {
3945                                 fprintf(stderr, "block %llu rec "
3946                                         "extent_item_refs %llu, passed %llu\n",
3947                                         (unsigned long long)start,
3948                                         (unsigned long long)
3949                                                         rec->extent_item_refs,
3950                                         (unsigned long long)extent_item_refs);
3951                         }
3952                         rec->extent_item_refs = extent_item_refs;
3953                 }
3954                 if (is_root)
3955                         rec->is_root = 1;
3956                 if (set_checked) {
3957                         rec->content_checked = 1;
3958                         rec->owner_ref_checked = 1;
3959                 }
3960
3961                 if (parent_key)
3962                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
3963                 if (parent_gen)
3964                         rec->parent_generation = parent_gen;
3965
3966                 if (rec->max_size < max_size)
3967                         rec->max_size = max_size;
3968
3969                 maybe_free_extent_rec(extent_cache, rec);
3970                 return ret;
3971         }
3972         rec = malloc(sizeof(*rec));
3973         rec->start = start;
3974         rec->max_size = max_size;
3975         rec->nr = max(nr, max_size);
3976         rec->found_rec = !!extent_rec;
3977         rec->content_checked = 0;
3978         rec->owner_ref_checked = 0;
3979         rec->num_duplicates = 0;
3980         rec->metadata = metadata;
3981         INIT_LIST_HEAD(&rec->backrefs);
3982         INIT_LIST_HEAD(&rec->dups);
3983         INIT_LIST_HEAD(&rec->list);
3984
3985         if (is_root)
3986                 rec->is_root = 1;
3987         else
3988                 rec->is_root = 0;
3989
3990         if (inc_ref)
3991                 rec->refs = 1;
3992         else
3993                 rec->refs = 0;
3994
3995         if (extent_item_refs)
3996                 rec->extent_item_refs = extent_item_refs;
3997         else
3998                 rec->extent_item_refs = 0;
3999
4000         if (parent_key)
4001                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4002         else
4003                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4004
4005         if (parent_gen)
4006                 rec->parent_generation = parent_gen;
4007         else
4008                 rec->parent_generation = 0;
4009
4010         rec->cache.start = start;
4011         rec->cache.size = nr;
4012         ret = insert_cache_extent(extent_cache, &rec->cache);
4013         BUG_ON(ret);
4014         bytes_used += nr;
4015         if (set_checked) {
4016                 rec->content_checked = 1;
4017                 rec->owner_ref_checked = 1;
4018         }
4019         return ret;
4020 }
4021
4022 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4023                             u64 parent, u64 root, int found_ref)
4024 {
4025         struct extent_record *rec;
4026         struct tree_backref *back;
4027         struct cache_extent *cache;
4028
4029         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4030         if (!cache) {
4031                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4032                                1, 0, 0, 0, 0, 1, 0, 0);
4033                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4034                 if (!cache)
4035                         abort();
4036         }
4037
4038         rec = container_of(cache, struct extent_record, cache);
4039         if (rec->start != bytenr) {
4040                 abort();
4041         }
4042
4043         back = find_tree_backref(rec, parent, root);
4044         if (!back)
4045                 back = alloc_tree_backref(rec, parent, root);
4046
4047         if (found_ref) {
4048                 if (back->node.found_ref) {
4049                         fprintf(stderr, "Extent back ref already exists "
4050                                 "for %llu parent %llu root %llu \n",
4051                                 (unsigned long long)bytenr,
4052                                 (unsigned long long)parent,
4053                                 (unsigned long long)root);
4054                 }
4055                 back->node.found_ref = 1;
4056         } else {
4057                 if (back->node.found_extent_tree) {
4058                         fprintf(stderr, "Extent back ref already exists "
4059                                 "for %llu parent %llu root %llu \n",
4060                                 (unsigned long long)bytenr,
4061                                 (unsigned long long)parent,
4062                                 (unsigned long long)root);
4063                 }
4064                 back->node.found_extent_tree = 1;
4065         }
4066         maybe_free_extent_rec(extent_cache, rec);
4067         return 0;
4068 }
4069
4070 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4071                             u64 parent, u64 root, u64 owner, u64 offset,
4072                             u32 num_refs, int found_ref, u64 max_size)
4073 {
4074         struct extent_record *rec;
4075         struct data_backref *back;
4076         struct cache_extent *cache;
4077
4078         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4079         if (!cache) {
4080                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4081                                0, 0, max_size);
4082                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4083                 if (!cache)
4084                         abort();
4085         }
4086
4087         rec = container_of(cache, struct extent_record, cache);
4088         if (rec->max_size < max_size)
4089                 rec->max_size = max_size;
4090
4091         /*
4092          * If found_ref is set then max_size is the real size and must match the
4093          * existing refs.  So if we have already found a ref then we need to
4094          * make sure that this ref matches the existing one, otherwise we need
4095          * to add a new backref so we can notice that the backrefs don't match
4096          * and we need to figure out who is telling the truth.  This is to
4097          * account for that awful fsync bug I introduced where we'd end up with
4098          * a btrfs_file_extent_item that would have its length include multiple
4099          * prealloc extents or point inside of a prealloc extent.
4100          */
4101         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4102                                  bytenr, max_size);
4103         if (!back)
4104                 back = alloc_data_backref(rec, parent, root, owner, offset,
4105                                           max_size);
4106
4107         if (found_ref) {
4108                 BUG_ON(num_refs != 1);
4109                 if (back->node.found_ref)
4110                         BUG_ON(back->bytes != max_size);
4111                 back->node.found_ref = 1;
4112                 back->found_ref += 1;
4113                 back->bytes = max_size;
4114                 back->disk_bytenr = bytenr;
4115                 rec->refs += 1;
4116                 rec->content_checked = 1;
4117                 rec->owner_ref_checked = 1;
4118         } else {
4119                 if (back->node.found_extent_tree) {
4120                         fprintf(stderr, "Extent back ref already exists "
4121                                 "for %llu parent %llu root %llu "
4122                                 "owner %llu offset %llu num_refs %lu\n",
4123                                 (unsigned long long)bytenr,
4124                                 (unsigned long long)parent,
4125                                 (unsigned long long)root,
4126                                 (unsigned long long)owner,
4127                                 (unsigned long long)offset,
4128                                 (unsigned long)num_refs);
4129                 }
4130                 back->num_refs = num_refs;
4131                 back->node.found_extent_tree = 1;
4132         }
4133         maybe_free_extent_rec(extent_cache, rec);
4134         return 0;
4135 }
4136
4137 static int add_pending(struct cache_tree *pending,
4138                        struct cache_tree *seen, u64 bytenr, u32 size)
4139 {
4140         int ret;
4141         ret = add_cache_extent(seen, bytenr, size);
4142         if (ret)
4143                 return ret;
4144         add_cache_extent(pending, bytenr, size);
4145         return 0;
4146 }
4147
4148 static int pick_next_pending(struct cache_tree *pending,
4149                         struct cache_tree *reada,
4150                         struct cache_tree *nodes,
4151                         u64 last, struct block_info *bits, int bits_nr,
4152                         int *reada_bits)
4153 {
4154         unsigned long node_start = last;
4155         struct cache_extent *cache;
4156         int ret;
4157
4158         cache = search_cache_extent(reada, 0);
4159         if (cache) {
4160                 bits[0].start = cache->start;
4161                 bits[0].size = cache->size;
4162                 *reada_bits = 1;
4163                 return 1;
4164         }
4165         *reada_bits = 0;
4166         if (node_start > 32768)
4167                 node_start -= 32768;
4168
4169         cache = search_cache_extent(nodes, node_start);
4170         if (!cache)
4171                 cache = search_cache_extent(nodes, 0);
4172
4173         if (!cache) {
4174                  cache = search_cache_extent(pending, 0);
4175                  if (!cache)
4176                          return 0;
4177                  ret = 0;
4178                  do {
4179                          bits[ret].start = cache->start;
4180                          bits[ret].size = cache->size;
4181                          cache = next_cache_extent(cache);
4182                          ret++;
4183                  } while (cache && ret < bits_nr);
4184                  return ret;
4185         }
4186
4187         ret = 0;
4188         do {
4189                 bits[ret].start = cache->start;
4190                 bits[ret].size = cache->size;
4191                 cache = next_cache_extent(cache);
4192                 ret++;
4193         } while (cache && ret < bits_nr);
4194
4195         if (bits_nr - ret > 8) {
4196                 u64 lookup = bits[0].start + bits[0].size;
4197                 struct cache_extent *next;
4198                 next = search_cache_extent(pending, lookup);
4199                 while(next) {
4200                         if (next->start - lookup > 32768)
4201                                 break;
4202                         bits[ret].start = next->start;
4203                         bits[ret].size = next->size;
4204                         lookup = next->start + next->size;
4205                         ret++;
4206                         if (ret == bits_nr)
4207                                 break;
4208                         next = next_cache_extent(next);
4209                         if (!next)
4210                                 break;
4211                 }
4212         }
4213         return ret;
4214 }
4215
4216 static void free_chunk_record(struct cache_extent *cache)
4217 {
4218         struct chunk_record *rec;
4219
4220         rec = container_of(cache, struct chunk_record, cache);
4221         list_del_init(&rec->list);
4222         list_del_init(&rec->dextents);
4223         free(rec);
4224 }
4225
4226 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4227 {
4228         cache_tree_free_extents(chunk_cache, free_chunk_record);
4229 }
4230
4231 static void free_device_record(struct rb_node *node)
4232 {
4233         struct device_record *rec;
4234
4235         rec = container_of(node, struct device_record, node);
4236         free(rec);
4237 }
4238
4239 FREE_RB_BASED_TREE(device_cache, free_device_record);
4240
4241 int insert_block_group_record(struct block_group_tree *tree,
4242                               struct block_group_record *bg_rec)
4243 {
4244         int ret;
4245
4246         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4247         if (ret)
4248                 return ret;
4249
4250         list_add_tail(&bg_rec->list, &tree->block_groups);
4251         return 0;
4252 }
4253
4254 static void free_block_group_record(struct cache_extent *cache)
4255 {
4256         struct block_group_record *rec;
4257
4258         rec = container_of(cache, struct block_group_record, cache);
4259         list_del_init(&rec->list);
4260         free(rec);
4261 }
4262
4263 void free_block_group_tree(struct block_group_tree *tree)
4264 {
4265         cache_tree_free_extents(&tree->tree, free_block_group_record);
4266 }
4267
4268 int insert_device_extent_record(struct device_extent_tree *tree,
4269                                 struct device_extent_record *de_rec)
4270 {
4271         int ret;
4272
4273         /*
4274          * Device extent is a bit different from the other extents, because
4275          * the extents which belong to the different devices may have the
4276          * same start and size, so we need use the special extent cache
4277          * search/insert functions.
4278          */
4279         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4280         if (ret)
4281                 return ret;
4282
4283         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4284         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4285         return 0;
4286 }
4287
4288 static void free_device_extent_record(struct cache_extent *cache)
4289 {
4290         struct device_extent_record *rec;
4291
4292         rec = container_of(cache, struct device_extent_record, cache);
4293         if (!list_empty(&rec->chunk_list))
4294                 list_del_init(&rec->chunk_list);
4295         if (!list_empty(&rec->device_list))
4296                 list_del_init(&rec->device_list);
4297         free(rec);
4298 }
4299
4300 void free_device_extent_tree(struct device_extent_tree *tree)
4301 {
4302         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4303 }
4304
4305 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4306 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4307                                  struct extent_buffer *leaf, int slot)
4308 {
4309         struct btrfs_extent_ref_v0 *ref0;
4310         struct btrfs_key key;
4311
4312         btrfs_item_key_to_cpu(leaf, &key, slot);
4313         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4314         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4315                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4316         } else {
4317                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4318                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4319         }
4320         return 0;
4321 }
4322 #endif
4323
4324 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4325                                             struct btrfs_key *key,
4326                                             int slot)
4327 {
4328         struct btrfs_chunk *ptr;
4329         struct chunk_record *rec;
4330         int num_stripes, i;
4331
4332         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4333         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4334
4335         rec = malloc(btrfs_chunk_record_size(num_stripes));
4336         if (!rec) {
4337                 fprintf(stderr, "memory allocation failed\n");
4338                 exit(-1);
4339         }
4340
4341         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
4342
4343         INIT_LIST_HEAD(&rec->list);
4344         INIT_LIST_HEAD(&rec->dextents);
4345         rec->bg_rec = NULL;
4346
4347         rec->cache.start = key->offset;
4348         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4349
4350         rec->generation = btrfs_header_generation(leaf);
4351
4352         rec->objectid = key->objectid;
4353         rec->type = key->type;
4354         rec->offset = key->offset;
4355
4356         rec->length = rec->cache.size;
4357         rec->owner = btrfs_chunk_owner(leaf, ptr);
4358         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4359         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4360         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4361         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4362         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4363         rec->num_stripes = num_stripes;
4364         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4365
4366         for (i = 0; i < rec->num_stripes; ++i) {
4367                 rec->stripes[i].devid =
4368                         btrfs_stripe_devid_nr(leaf, ptr, i);
4369                 rec->stripes[i].offset =
4370                         btrfs_stripe_offset_nr(leaf, ptr, i);
4371                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4372                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4373                                 BTRFS_UUID_SIZE);
4374         }
4375
4376         return rec;
4377 }
4378
4379 static int process_chunk_item(struct cache_tree *chunk_cache,
4380                               struct btrfs_key *key, struct extent_buffer *eb,
4381                               int slot)
4382 {
4383         struct chunk_record *rec;
4384         int ret = 0;
4385
4386         rec = btrfs_new_chunk_record(eb, key, slot);
4387         ret = insert_cache_extent(chunk_cache, &rec->cache);
4388         if (ret) {
4389                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4390                         rec->offset, rec->length);
4391                 free(rec);
4392         }
4393
4394         return ret;
4395 }
4396
4397 static int process_device_item(struct rb_root *dev_cache,
4398                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4399 {
4400         struct btrfs_dev_item *ptr;
4401         struct device_record *rec;
4402         int ret = 0;
4403
4404         ptr = btrfs_item_ptr(eb,
4405                 slot, struct btrfs_dev_item);
4406
4407         rec = malloc(sizeof(*rec));
4408         if (!rec) {
4409                 fprintf(stderr, "memory allocation failed\n");
4410                 return -ENOMEM;
4411         }
4412
4413         rec->devid = key->offset;
4414         rec->generation = btrfs_header_generation(eb);
4415
4416         rec->objectid = key->objectid;
4417         rec->type = key->type;
4418         rec->offset = key->offset;
4419
4420         rec->devid = btrfs_device_id(eb, ptr);
4421         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4422         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4423
4424         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4425         if (ret) {
4426                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4427                 free(rec);
4428         }
4429
4430         return ret;
4431 }
4432
4433 struct block_group_record *
4434 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4435                              int slot)
4436 {
4437         struct btrfs_block_group_item *ptr;
4438         struct block_group_record *rec;
4439
4440         rec = malloc(sizeof(*rec));
4441         if (!rec) {
4442                 fprintf(stderr, "memory allocation failed\n");
4443                 exit(-1);
4444         }
4445         memset(rec, 0, sizeof(*rec));
4446
4447         rec->cache.start = key->objectid;
4448         rec->cache.size = key->offset;
4449
4450         rec->generation = btrfs_header_generation(leaf);
4451
4452         rec->objectid = key->objectid;
4453         rec->type = key->type;
4454         rec->offset = key->offset;
4455
4456         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4457         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4458
4459         INIT_LIST_HEAD(&rec->list);
4460
4461         return rec;
4462 }
4463
4464 static int process_block_group_item(struct block_group_tree *block_group_cache,
4465                                     struct btrfs_key *key,
4466                                     struct extent_buffer *eb, int slot)
4467 {
4468         struct block_group_record *rec;
4469         int ret = 0;
4470
4471         rec = btrfs_new_block_group_record(eb, key, slot);
4472         ret = insert_block_group_record(block_group_cache, rec);
4473         if (ret) {
4474                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4475                         rec->objectid, rec->offset);
4476                 free(rec);
4477         }
4478
4479         return ret;
4480 }
4481
4482 struct device_extent_record *
4483 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4484                                struct btrfs_key *key, int slot)
4485 {
4486         struct device_extent_record *rec;
4487         struct btrfs_dev_extent *ptr;
4488
4489         rec = malloc(sizeof(*rec));
4490         if (!rec) {
4491                 fprintf(stderr, "memory allocation failed\n");
4492                 exit(-1);
4493         }
4494         memset(rec, 0, sizeof(*rec));
4495
4496         rec->cache.objectid = key->objectid;
4497         rec->cache.start = key->offset;
4498
4499         rec->generation = btrfs_header_generation(leaf);
4500
4501         rec->objectid = key->objectid;
4502         rec->type = key->type;
4503         rec->offset = key->offset;
4504
4505         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4506         rec->chunk_objecteid =
4507                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4508         rec->chunk_offset =
4509                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4510         rec->length = btrfs_dev_extent_length(leaf, ptr);
4511         rec->cache.size = rec->length;
4512
4513         INIT_LIST_HEAD(&rec->chunk_list);
4514         INIT_LIST_HEAD(&rec->device_list);
4515
4516         return rec;
4517 }
4518
4519 static int
4520 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4521                            struct btrfs_key *key, struct extent_buffer *eb,
4522                            int slot)
4523 {
4524         struct device_extent_record *rec;
4525         int ret;
4526
4527         rec = btrfs_new_device_extent_record(eb, key, slot);
4528         ret = insert_device_extent_record(dev_extent_cache, rec);
4529         if (ret) {
4530                 fprintf(stderr,
4531                         "Device extent[%llu, %llu, %llu] existed.\n",
4532                         rec->objectid, rec->offset, rec->length);
4533                 free(rec);
4534         }
4535
4536         return ret;
4537 }
4538
4539 static int process_extent_item(struct btrfs_root *root,
4540                                struct cache_tree *extent_cache,
4541                                struct extent_buffer *eb, int slot)
4542 {
4543         struct btrfs_extent_item *ei;
4544         struct btrfs_extent_inline_ref *iref;
4545         struct btrfs_extent_data_ref *dref;
4546         struct btrfs_shared_data_ref *sref;
4547         struct btrfs_key key;
4548         unsigned long end;
4549         unsigned long ptr;
4550         int type;
4551         u32 item_size = btrfs_item_size_nr(eb, slot);
4552         u64 refs = 0;
4553         u64 offset;
4554         u64 num_bytes;
4555         int metadata = 0;
4556
4557         btrfs_item_key_to_cpu(eb, &key, slot);
4558
4559         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4560                 metadata = 1;
4561                 num_bytes = root->leafsize;
4562         } else {
4563                 num_bytes = key.offset;
4564         }
4565
4566         if (item_size < sizeof(*ei)) {
4567 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4568                 struct btrfs_extent_item_v0 *ei0;
4569                 BUG_ON(item_size != sizeof(*ei0));
4570                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
4571                 refs = btrfs_extent_refs_v0(eb, ei0);
4572 #else
4573                 BUG();
4574 #endif
4575                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
4576                                       num_bytes, refs, 0, 0, 0, metadata, 1,
4577                                       num_bytes);
4578         }
4579
4580         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
4581         refs = btrfs_extent_refs(eb, ei);
4582
4583         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
4584                        refs, 0, 0, 0, metadata, 1, num_bytes);
4585
4586         ptr = (unsigned long)(ei + 1);
4587         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
4588             key.type == BTRFS_EXTENT_ITEM_KEY)
4589                 ptr += sizeof(struct btrfs_tree_block_info);
4590
4591         end = (unsigned long)ei + item_size;
4592         while (ptr < end) {
4593                 iref = (struct btrfs_extent_inline_ref *)ptr;
4594                 type = btrfs_extent_inline_ref_type(eb, iref);
4595                 offset = btrfs_extent_inline_ref_offset(eb, iref);
4596                 switch (type) {
4597                 case BTRFS_TREE_BLOCK_REF_KEY:
4598                         add_tree_backref(extent_cache, key.objectid,
4599                                          0, offset, 0);
4600                         break;
4601                 case BTRFS_SHARED_BLOCK_REF_KEY:
4602                         add_tree_backref(extent_cache, key.objectid,
4603                                          offset, 0, 0);
4604                         break;
4605                 case BTRFS_EXTENT_DATA_REF_KEY:
4606                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
4607                         add_data_backref(extent_cache, key.objectid, 0,
4608                                         btrfs_extent_data_ref_root(eb, dref),
4609                                         btrfs_extent_data_ref_objectid(eb,
4610                                                                        dref),
4611                                         btrfs_extent_data_ref_offset(eb, dref),
4612                                         btrfs_extent_data_ref_count(eb, dref),
4613                                         0, num_bytes);
4614                         break;
4615                 case BTRFS_SHARED_DATA_REF_KEY:
4616                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
4617                         add_data_backref(extent_cache, key.objectid, offset,
4618                                         0, 0, 0,
4619                                         btrfs_shared_data_ref_count(eb, sref),
4620                                         0, num_bytes);
4621                         break;
4622                 default:
4623                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
4624                                 key.objectid, key.type, num_bytes);
4625                         goto out;
4626                 }
4627                 ptr += btrfs_extent_inline_ref_size(type);
4628         }
4629         WARN_ON(ptr > end);
4630 out:
4631         return 0;
4632 }
4633
4634 static int check_cache_range(struct btrfs_root *root,
4635                              struct btrfs_block_group_cache *cache,
4636                              u64 offset, u64 bytes)
4637 {
4638         struct btrfs_free_space *entry;
4639         u64 *logical;
4640         u64 bytenr;
4641         int stripe_len;
4642         int i, nr, ret;
4643
4644         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
4645                 bytenr = btrfs_sb_offset(i);
4646                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
4647                                        cache->key.objectid, bytenr, 0,
4648                                        &logical, &nr, &stripe_len);
4649                 if (ret)
4650                         return ret;
4651
4652                 while (nr--) {
4653                         if (logical[nr] + stripe_len <= offset)
4654                                 continue;
4655                         if (offset + bytes <= logical[nr])
4656                                 continue;
4657                         if (logical[nr] == offset) {
4658                                 if (stripe_len >= bytes) {
4659                                         kfree(logical);
4660                                         return 0;
4661                                 }
4662                                 bytes -= stripe_len;
4663                                 offset += stripe_len;
4664                         } else if (logical[nr] < offset) {
4665                                 if (logical[nr] + stripe_len >=
4666                                     offset + bytes) {
4667                                         kfree(logical);
4668                                         return 0;
4669                                 }
4670                                 bytes = (offset + bytes) -
4671                                         (logical[nr] + stripe_len);
4672                                 offset = logical[nr] + stripe_len;
4673                         } else {
4674                                 /*
4675                                  * Could be tricky, the super may land in the
4676                                  * middle of the area we're checking.  First
4677                                  * check the easiest case, it's at the end.
4678                                  */
4679                                 if (logical[nr] + stripe_len >=
4680                                     bytes + offset) {
4681                                         bytes = logical[nr] - offset;
4682                                         continue;
4683                                 }
4684
4685                                 /* Check the left side */
4686                                 ret = check_cache_range(root, cache,
4687                                                         offset,
4688                                                         logical[nr] - offset);
4689                                 if (ret) {
4690                                         kfree(logical);
4691                                         return ret;
4692                                 }
4693
4694                                 /* Now we continue with the right side */
4695                                 bytes = (offset + bytes) -
4696                                         (logical[nr] + stripe_len);
4697                                 offset = logical[nr] + stripe_len;
4698                         }
4699                 }
4700
4701                 kfree(logical);
4702         }
4703
4704         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
4705         if (!entry) {
4706                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
4707                         offset, offset+bytes);
4708                 return -EINVAL;
4709         }
4710
4711         if (entry->offset != offset) {
4712                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
4713                         entry->offset);
4714                 return -EINVAL;
4715         }
4716
4717         if (entry->bytes != bytes) {
4718                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
4719                         bytes, entry->bytes, offset);
4720                 return -EINVAL;
4721         }
4722
4723         unlink_free_space(cache->free_space_ctl, entry);
4724         free(entry);
4725         return 0;
4726 }
4727
4728 static int verify_space_cache(struct btrfs_root *root,
4729                               struct btrfs_block_group_cache *cache)
4730 {
4731         struct btrfs_path *path;
4732         struct extent_buffer *leaf;
4733         struct btrfs_key key;
4734         u64 last;
4735         int ret = 0;
4736
4737         path = btrfs_alloc_path();
4738         if (!path)
4739                 return -ENOMEM;
4740
4741         root = root->fs_info->extent_root;
4742
4743         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
4744
4745         key.objectid = last;
4746         key.offset = 0;
4747         key.type = BTRFS_EXTENT_ITEM_KEY;
4748
4749         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4750         if (ret < 0)
4751                 goto out;
4752         ret = 0;
4753         while (1) {
4754                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
4755                         ret = btrfs_next_leaf(root, path);
4756                         if (ret < 0)
4757                                 goto out;
4758                         if (ret > 0) {
4759                                 ret = 0;
4760                                 break;
4761                         }
4762                 }
4763                 leaf = path->nodes[0];
4764                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4765                 if (key.objectid >= cache->key.offset + cache->key.objectid)
4766                         break;
4767                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
4768                     key.type != BTRFS_METADATA_ITEM_KEY) {
4769                         path->slots[0]++;
4770                         continue;
4771                 }
4772
4773                 if (last == key.objectid) {
4774                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
4775                                 last = key.objectid + key.offset;
4776                         else
4777                                 last = key.objectid + root->leafsize;
4778                         path->slots[0]++;
4779                         continue;
4780                 }
4781
4782                 ret = check_cache_range(root, cache, last,
4783                                         key.objectid - last);
4784                 if (ret)
4785                         break;
4786                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
4787                         last = key.objectid + key.offset;
4788                 else
4789                         last = key.objectid + root->leafsize;
4790                 path->slots[0]++;
4791         }
4792
4793         if (last < cache->key.objectid + cache->key.offset)
4794                 ret = check_cache_range(root, cache, last,
4795                                         cache->key.objectid +
4796                                         cache->key.offset - last);
4797
4798 out:
4799         btrfs_free_path(path);
4800
4801         if (!ret &&
4802             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
4803                 fprintf(stderr, "There are still entries left in the space "
4804                         "cache\n");
4805                 ret = -EINVAL;
4806         }
4807
4808         return ret;
4809 }
4810
4811 static int check_space_cache(struct btrfs_root *root)
4812 {
4813         struct btrfs_block_group_cache *cache;
4814         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
4815         int ret;
4816         int error = 0;
4817
4818         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
4819             btrfs_super_generation(root->fs_info->super_copy) !=
4820             btrfs_super_cache_generation(root->fs_info->super_copy)) {
4821                 printf("cache and super generation don't match, space cache "
4822                        "will be invalidated\n");
4823                 return 0;
4824         }
4825
4826         while (1) {
4827                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
4828                 if (!cache)
4829                         break;
4830
4831                 start = cache->key.objectid + cache->key.offset;
4832                 if (!cache->free_space_ctl) {
4833                         if (btrfs_init_free_space_ctl(cache,
4834                                                       root->sectorsize)) {
4835                                 ret = -ENOMEM;
4836                                 break;
4837                         }
4838                 } else {
4839                         btrfs_remove_free_space_cache(cache);
4840                 }
4841
4842                 ret = load_free_space_cache(root->fs_info, cache);
4843                 if (!ret)
4844                         continue;
4845
4846                 ret = verify_space_cache(root, cache);
4847                 if (ret) {
4848                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
4849                                 cache->key.objectid);
4850                         error++;
4851                 }
4852         }
4853
4854         return error ? -EINVAL : 0;
4855 }
4856
4857 static int read_extent_data(struct btrfs_root *root, char *data,
4858                         u64 logical, u64 *len, int mirror)
4859 {
4860         u64 offset = 0;
4861         struct btrfs_multi_bio *multi = NULL;
4862         struct btrfs_fs_info *info = root->fs_info;
4863         struct btrfs_device *device;
4864         int ret = 0;
4865         u64 max_len = *len;
4866
4867         ret = btrfs_map_block(&info->mapping_tree, READ, logical, len,
4868                               &multi, mirror, NULL);
4869         if (ret) {
4870                 fprintf(stderr, "Couldn't map the block %llu\n",
4871                                 logical + offset);
4872                 goto err;
4873         }
4874         device = multi->stripes[0].dev;
4875
4876         if (device->fd == 0)
4877                 goto err;
4878         if (*len > max_len)
4879                 *len = max_len;
4880
4881         ret = pread64(device->fd, data, *len, multi->stripes[0].physical);
4882         if (ret != *len)
4883                 ret = -EIO;
4884         else
4885                 ret = 0;
4886 err:
4887         kfree(multi);
4888         return ret;
4889 }
4890
4891 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
4892                         u64 num_bytes, unsigned long leaf_offset,
4893                         struct extent_buffer *eb) {
4894
4895         u64 offset = 0;
4896         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
4897         char *data;
4898         unsigned long csum_offset;
4899         u32 csum;
4900         u32 csum_expected;
4901         u64 read_len;
4902         u64 data_checked = 0;
4903         u64 tmp;
4904         int ret = 0;
4905         int mirror;
4906         int num_copies;
4907
4908         if (num_bytes % root->sectorsize)
4909                 return -EINVAL;
4910
4911         data = malloc(num_bytes);
4912         if (!data)
4913                 return -ENOMEM;
4914
4915         while (offset < num_bytes) {
4916                 mirror = 0;
4917 again:
4918                 read_len = num_bytes - offset;
4919                 /* read as much space once a time */
4920                 ret = read_extent_data(root, data + offset,
4921                                 bytenr + offset, &read_len, mirror);
4922                 if (ret)
4923                         goto out;
4924                 data_checked = 0;
4925                 /* verify every 4k data's checksum */
4926                 while (data_checked < read_len) {
4927                         csum = ~(u32)0;
4928                         tmp = offset + data_checked;
4929
4930                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
4931                                                csum, root->sectorsize);
4932                         btrfs_csum_final(csum, (char *)&csum);
4933
4934                         csum_offset = leaf_offset +
4935                                  tmp / root->sectorsize * csum_size;
4936                         read_extent_buffer(eb, (char *)&csum_expected,
4937                                            csum_offset, csum_size);
4938                         /* try another mirror */
4939                         if (csum != csum_expected) {
4940                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
4941                                                 mirror, bytenr + tmp,
4942                                                 csum, csum_expected);
4943                                 num_copies = btrfs_num_copies(
4944                                                 &root->fs_info->mapping_tree,
4945                                                 bytenr, num_bytes);
4946                                 if (mirror < num_copies - 1) {
4947                                         mirror += 1;
4948                                         goto again;
4949                                 }
4950                         }
4951                         data_checked += root->sectorsize;
4952                 }
4953                 offset += read_len;
4954         }
4955 out:
4956         free(data);
4957         return ret;
4958 }
4959
4960 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
4961                                u64 num_bytes)
4962 {
4963         struct btrfs_path *path;
4964         struct extent_buffer *leaf;
4965         struct btrfs_key key;
4966         int ret;
4967
4968         path = btrfs_alloc_path();
4969         if (!path) {
4970                 fprintf(stderr, "Error allocing path\n");
4971                 return -ENOMEM;
4972         }
4973
4974         key.objectid = bytenr;
4975         key.type = BTRFS_EXTENT_ITEM_KEY;
4976         key.offset = (u64)-1;
4977
4978 again:
4979         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
4980                                 0, 0);
4981         if (ret < 0) {
4982                 fprintf(stderr, "Error looking up extent record %d\n", ret);
4983                 btrfs_free_path(path);
4984                 return ret;
4985         } else if (ret) {
4986                 if (path->slots[0] > 0) {
4987                         path->slots[0]--;
4988                 } else {
4989                         ret = btrfs_prev_leaf(root, path);
4990                         if (ret < 0) {
4991                                 goto out;
4992                         } else if (ret > 0) {
4993                                 ret = 0;
4994                                 goto out;
4995                         }
4996                 }
4997         }
4998
4999         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5000
5001         /*
5002          * Block group items come before extent items if they have the same
5003          * bytenr, so walk back one more just in case.  Dear future traveler,
5004          * first congrats on mastering time travel.  Now if it's not too much
5005          * trouble could you go back to 2006 and tell Chris to make the
5006          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5007          * EXTENT_ITEM_KEY please?
5008          */
5009         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5010                 if (path->slots[0] > 0) {
5011                         path->slots[0]--;
5012                 } else {
5013                         ret = btrfs_prev_leaf(root, path);
5014                         if (ret < 0) {
5015                                 goto out;
5016                         } else if (ret > 0) {
5017                                 ret = 0;
5018                                 goto out;
5019                         }
5020                 }
5021                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5022         }
5023
5024         while (num_bytes) {
5025                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5026                         ret = btrfs_next_leaf(root, path);
5027                         if (ret < 0) {
5028                                 fprintf(stderr, "Error going to next leaf "
5029                                         "%d\n", ret);
5030                                 btrfs_free_path(path);
5031                                 return ret;
5032                         } else if (ret) {
5033                                 break;
5034                         }
5035                 }
5036                 leaf = path->nodes[0];
5037                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5038                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5039                         path->slots[0]++;
5040                         continue;
5041                 }
5042                 if (key.objectid + key.offset < bytenr) {
5043                         path->slots[0]++;
5044                         continue;
5045                 }
5046                 if (key.objectid > bytenr + num_bytes)
5047                         break;
5048
5049                 if (key.objectid == bytenr) {
5050                         if (key.offset >= num_bytes) {
5051                                 num_bytes = 0;
5052                                 break;
5053                         }
5054                         num_bytes -= key.offset;
5055                         bytenr += key.offset;
5056                 } else if (key.objectid < bytenr) {
5057                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5058                                 num_bytes = 0;
5059                                 break;
5060                         }
5061                         num_bytes = (bytenr + num_bytes) -
5062                                 (key.objectid + key.offset);
5063                         bytenr = key.objectid + key.offset;
5064                 } else {
5065                         if (key.objectid + key.offset < bytenr + num_bytes) {
5066                                 u64 new_start = key.objectid + key.offset;
5067                                 u64 new_bytes = bytenr + num_bytes - new_start;
5068
5069                                 /*
5070                                  * Weird case, the extent is in the middle of
5071                                  * our range, we'll have to search one side
5072                                  * and then the other.  Not sure if this happens
5073                                  * in real life, but no harm in coding it up
5074                                  * anyway just in case.
5075                                  */
5076                                 btrfs_release_path(path);
5077                                 ret = check_extent_exists(root, new_start,
5078                                                           new_bytes);
5079                                 if (ret) {
5080                                         fprintf(stderr, "Right section didn't "
5081                                                 "have a record\n");
5082                                         break;
5083                                 }
5084                                 num_bytes = key.objectid - bytenr;
5085                                 goto again;
5086                         }
5087                         num_bytes = key.objectid - bytenr;
5088                 }
5089                 path->slots[0]++;
5090         }
5091         ret = 0;
5092
5093 out:
5094         if (num_bytes && !ret) {
5095                 fprintf(stderr, "There are no extents for csum range "
5096                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5097                 ret = 1;
5098         }
5099
5100         btrfs_free_path(path);
5101         return ret;
5102 }
5103
5104 static int check_csums(struct btrfs_root *root)
5105 {
5106         struct btrfs_path *path;
5107         struct extent_buffer *leaf;
5108         struct btrfs_key key;
5109         u64 offset = 0, num_bytes = 0;
5110         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5111         int errors = 0;
5112         int ret;
5113         u64 data_len;
5114         unsigned long leaf_offset;
5115
5116         root = root->fs_info->csum_root;
5117         if (!extent_buffer_uptodate(root->node)) {
5118                 fprintf(stderr, "No valid csum tree found\n");
5119                 return -ENOENT;
5120         }
5121
5122         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5123         key.type = BTRFS_EXTENT_CSUM_KEY;
5124         key.offset = 0;
5125
5126         path = btrfs_alloc_path();
5127         if (!path)
5128                 return -ENOMEM;
5129
5130         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5131         if (ret < 0) {
5132                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5133                 btrfs_free_path(path);
5134                 return ret;
5135         }
5136
5137         if (ret > 0 && path->slots[0])
5138                 path->slots[0]--;
5139         ret = 0;
5140
5141         while (1) {
5142                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5143                         ret = btrfs_next_leaf(root, path);
5144                         if (ret < 0) {
5145                                 fprintf(stderr, "Error going to next leaf "
5146                                         "%d\n", ret);
5147                                 break;
5148                         }
5149                         if (ret)
5150                                 break;
5151                 }
5152                 leaf = path->nodes[0];
5153
5154                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5155                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5156                         path->slots[0]++;
5157                         continue;
5158                 }
5159
5160                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5161                               csum_size) * root->sectorsize;
5162                 if (!check_data_csum)
5163                         goto skip_csum_check;
5164                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5165                 ret = check_extent_csums(root, key.offset, data_len,
5166                                          leaf_offset, leaf);
5167                 if (ret)
5168                         break;
5169 skip_csum_check:
5170                 if (!num_bytes) {
5171                         offset = key.offset;
5172                 } else if (key.offset != offset + num_bytes) {
5173                         ret = check_extent_exists(root, offset, num_bytes);
5174                         if (ret) {
5175                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5176                                         "there is no extent record\n",
5177                                         offset, offset+num_bytes);
5178                                 errors++;
5179                         }
5180                         offset = key.offset;
5181                         num_bytes = 0;
5182                 }
5183                 num_bytes += data_len;
5184                 path->slots[0]++;
5185         }
5186
5187         btrfs_free_path(path);
5188         return errors;
5189 }
5190
5191 static int is_dropped_key(struct btrfs_key *key,
5192                           struct btrfs_key *drop_key) {
5193         if (key->objectid < drop_key->objectid)
5194                 return 1;
5195         else if (key->objectid == drop_key->objectid) {
5196                 if (key->type < drop_key->type)
5197                         return 1;
5198                 else if (key->type == drop_key->type) {
5199                         if (key->offset < drop_key->offset)
5200                                 return 1;
5201                 }
5202         }
5203         return 0;
5204 }
5205
5206 static int calc_extent_flag(struct btrfs_root *root,
5207                            struct cache_tree *extent_cache,
5208                            struct extent_buffer *buf,
5209                            struct root_item_record *ri,
5210                            u64 *flags)
5211 {
5212         int i;
5213         int nritems = btrfs_header_nritems(buf);
5214         struct btrfs_key key;
5215         struct extent_record *rec;
5216         struct cache_extent *cache;
5217         struct data_backref *dback;
5218         struct tree_backref *tback;
5219         struct extent_buffer *new_buf;
5220         u64 owner = 0;
5221         u64 bytenr;
5222         u64 offset;
5223         u64 ptr;
5224         int size;
5225         int ret;
5226         u8 level;
5227
5228         /*
5229          * Except file/reloc tree, we can not have
5230          * FULL BACKREF MODE
5231          */
5232         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5233                 goto normal;
5234         /*
5235          * root node
5236          */
5237         if (buf->start == ri->bytenr)
5238                 goto normal;
5239         if (btrfs_is_leaf(buf)) {
5240                 /*
5241                  * we are searching from original root, world
5242                  * peace is achieved, we use normal backref.
5243                  */
5244                 owner = btrfs_header_owner(buf);
5245                 if (owner == ri->objectid)
5246                         goto normal;
5247                 /*
5248                  * we check every eb here, and if any of
5249                  * eb dosen't have original root refers
5250                  * to this eb, we set full backref flag for
5251                  * this extent, otherwise normal backref.
5252                  */
5253                 for (i = 0; i < nritems; i++) {
5254                         struct btrfs_file_extent_item *fi;
5255                         btrfs_item_key_to_cpu(buf, &key, i);
5256
5257                         if (key.type != BTRFS_EXTENT_DATA_KEY)
5258                                 continue;
5259                         fi = btrfs_item_ptr(buf, i,
5260                                             struct btrfs_file_extent_item);
5261                         if (btrfs_file_extent_type(buf, fi) ==
5262                             BTRFS_FILE_EXTENT_INLINE)
5263                                 continue;
5264                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
5265                                 continue;
5266                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
5267                         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5268                         if (!cache)
5269                                 goto full_backref;
5270                         offset = btrfs_file_extent_offset(buf, fi);
5271                         rec = container_of(cache, struct extent_record, cache);
5272                         dback = find_data_backref(rec, 0, ri->objectid, owner,
5273                                         key.offset - offset, 1, bytenr, bytenr);
5274                         if (!dback)
5275                                 goto full_backref;
5276                 }
5277                 goto full_backref;
5278         } else {
5279                 level = btrfs_header_level(buf);
5280                 for (i = 0; i < nritems; i++) {
5281                         ptr = btrfs_node_blockptr(buf, i);
5282                         size = btrfs_level_size(root, level);
5283                         if (i == 0) {
5284                                 new_buf = read_tree_block(root, ptr, size, 0);
5285                                 if (!extent_buffer_uptodate(new_buf)) {
5286                                         free_extent_buffer(new_buf);
5287                                         ret = -EIO;
5288                                         return ret;
5289                                 }
5290                                 /*
5291                                  * we are searching from origin root, world
5292                                  * peace is achieved, we use normal backref.
5293                                  */
5294                                 owner = btrfs_header_owner(new_buf);
5295                                 free_extent_buffer(new_buf);
5296                                 if (owner == ri->objectid)
5297                                         goto normal;
5298                         }
5299                         cache = lookup_cache_extent(extent_cache, ptr, size);
5300                         if (!cache)
5301                                 goto full_backref;
5302                         rec = container_of(cache, struct extent_record, cache);
5303                         tback = find_tree_backref(rec, 0, owner);
5304                         if (!tback)
5305                                 goto full_backref;
5306                 }
5307
5308         }
5309 normal:
5310         *flags = 0;
5311         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5312         /* we have added this extent before */
5313         BUG_ON(!cache);
5314         rec = container_of(cache, struct extent_record, cache);
5315         rec->flag_block_full_backref = 0;
5316         return 0;
5317 full_backref:
5318         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5319         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5320         /* we have added this extent before */
5321         BUG_ON(!cache);
5322         rec = container_of(cache, struct extent_record, cache);
5323         rec->flag_block_full_backref = 1;
5324         return 0;
5325 }
5326
5327 static int run_next_block(struct btrfs_trans_handle *trans,
5328                           struct btrfs_root *root,
5329                           struct block_info *bits,
5330                           int bits_nr,
5331                           u64 *last,
5332                           struct cache_tree *pending,
5333                           struct cache_tree *seen,
5334                           struct cache_tree *reada,
5335                           struct cache_tree *nodes,
5336                           struct cache_tree *extent_cache,
5337                           struct cache_tree *chunk_cache,
5338                           struct rb_root *dev_cache,
5339                           struct block_group_tree *block_group_cache,
5340                           struct device_extent_tree *dev_extent_cache,
5341                           struct root_item_record *ri)
5342 {
5343         struct extent_buffer *buf;
5344         u64 bytenr;
5345         u32 size;
5346         u64 parent;
5347         u64 owner;
5348         u64 flags;
5349         u64 ptr;
5350         u64 gen = 0;
5351         int ret = 0;
5352         int i;
5353         int nritems;
5354         struct btrfs_key key;
5355         struct cache_extent *cache;
5356         int reada_bits;
5357
5358         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5359                                     bits_nr, &reada_bits);
5360         if (nritems == 0)
5361                 return 1;
5362
5363         if (!reada_bits) {
5364                 for(i = 0; i < nritems; i++) {
5365                         ret = add_cache_extent(reada, bits[i].start,
5366                                                bits[i].size);
5367                         if (ret == -EEXIST)
5368                                 continue;
5369
5370                         /* fixme, get the parent transid */
5371                         readahead_tree_block(root, bits[i].start,
5372                                              bits[i].size, 0);
5373                 }
5374         }
5375         *last = bits[0].start;
5376         bytenr = bits[0].start;
5377         size = bits[0].size;
5378
5379         cache = lookup_cache_extent(pending, bytenr, size);
5380         if (cache) {
5381                 remove_cache_extent(pending, cache);
5382                 free(cache);
5383         }
5384         cache = lookup_cache_extent(reada, bytenr, size);
5385         if (cache) {
5386                 remove_cache_extent(reada, cache);
5387                 free(cache);
5388         }
5389         cache = lookup_cache_extent(nodes, bytenr, size);
5390         if (cache) {
5391                 remove_cache_extent(nodes, cache);
5392                 free(cache);
5393         }
5394         cache = lookup_cache_extent(extent_cache, bytenr, size);
5395         if (cache) {
5396                 struct extent_record *rec;
5397
5398                 rec = container_of(cache, struct extent_record, cache);
5399                 gen = rec->parent_generation;
5400         }
5401
5402         /* fixme, get the real parent transid */
5403         buf = read_tree_block(root, bytenr, size, gen);
5404         if (!extent_buffer_uptodate(buf)) {
5405                 record_bad_block_io(root->fs_info,
5406                                     extent_cache, bytenr, size);
5407                 goto out;
5408         }
5409
5410         nritems = btrfs_header_nritems(buf);
5411
5412         /*
5413          * FIXME, this only works only if we don't have any full
5414          * backref mode.
5415          */
5416         if (!init_extent_tree) {
5417                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5418                                        btrfs_header_level(buf), 1, NULL,
5419                                        &flags);
5420                 if (ret < 0)
5421                         goto out;
5422         } else {
5423                 flags = 0;
5424                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5425                 if (ret < 0)
5426                         goto out;
5427         }
5428
5429         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5430                 parent = bytenr;
5431                 owner = 0;
5432         } else {
5433                 parent = 0;
5434                 owner = btrfs_header_owner(buf);
5435         }
5436
5437         ret = check_block(trans, root, extent_cache, buf, flags);
5438         if (ret)
5439                 goto out;
5440
5441         if (btrfs_is_leaf(buf)) {
5442                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5443                 for (i = 0; i < nritems; i++) {
5444                         struct btrfs_file_extent_item *fi;
5445                         btrfs_item_key_to_cpu(buf, &key, i);
5446                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5447                                 process_extent_item(root, extent_cache, buf,
5448                                                     i);
5449                                 continue;
5450                         }
5451                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5452                                 process_extent_item(root, extent_cache, buf,
5453                                                     i);
5454                                 continue;
5455                         }
5456                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5457                                 total_csum_bytes +=
5458                                         btrfs_item_size_nr(buf, i);
5459                                 continue;
5460                         }
5461                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5462                                 process_chunk_item(chunk_cache, &key, buf, i);
5463                                 continue;
5464                         }
5465                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5466                                 process_device_item(dev_cache, &key, buf, i);
5467                                 continue;
5468                         }
5469                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5470                                 process_block_group_item(block_group_cache,
5471                                         &key, buf, i);
5472                                 continue;
5473                         }
5474                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5475                                 process_device_extent_item(dev_extent_cache,
5476                                         &key, buf, i);
5477                                 continue;
5478
5479                         }
5480                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5481 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5482                                 process_extent_ref_v0(extent_cache, buf, i);
5483 #else
5484                                 BUG();
5485 #endif
5486                                 continue;
5487                         }
5488
5489                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5490                                 add_tree_backref(extent_cache, key.objectid, 0,
5491                                                  key.offset, 0);
5492                                 continue;
5493                         }
5494                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
5495                                 add_tree_backref(extent_cache, key.objectid,
5496                                                  key.offset, 0, 0);
5497                                 continue;
5498                         }
5499                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
5500                                 struct btrfs_extent_data_ref *ref;
5501                                 ref = btrfs_item_ptr(buf, i,
5502                                                 struct btrfs_extent_data_ref);
5503                                 add_data_backref(extent_cache,
5504                                         key.objectid, 0,
5505                                         btrfs_extent_data_ref_root(buf, ref),
5506                                         btrfs_extent_data_ref_objectid(buf,
5507                                                                        ref),
5508                                         btrfs_extent_data_ref_offset(buf, ref),
5509                                         btrfs_extent_data_ref_count(buf, ref),
5510                                         0, root->sectorsize);
5511                                 continue;
5512                         }
5513                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
5514                                 struct btrfs_shared_data_ref *ref;
5515                                 ref = btrfs_item_ptr(buf, i,
5516                                                 struct btrfs_shared_data_ref);
5517                                 add_data_backref(extent_cache,
5518                                         key.objectid, key.offset, 0, 0, 0,
5519                                         btrfs_shared_data_ref_count(buf, ref),
5520                                         0, root->sectorsize);
5521                                 continue;
5522                         }
5523                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
5524                                 struct bad_item *bad;
5525
5526                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
5527                                         continue;
5528                                 if (!owner)
5529                                         continue;
5530                                 bad = malloc(sizeof(struct bad_item));
5531                                 if (!bad)
5532                                         continue;
5533                                 INIT_LIST_HEAD(&bad->list);
5534                                 memcpy(&bad->key, &key,
5535                                        sizeof(struct btrfs_key));
5536                                 bad->root_id = owner;
5537                                 list_add_tail(&bad->list, &delete_items);
5538                                 continue;
5539                         }
5540                         if (key.type != BTRFS_EXTENT_DATA_KEY)
5541                                 continue;
5542                         fi = btrfs_item_ptr(buf, i,
5543                                             struct btrfs_file_extent_item);
5544                         if (btrfs_file_extent_type(buf, fi) ==
5545                             BTRFS_FILE_EXTENT_INLINE)
5546                                 continue;
5547                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
5548                                 continue;
5549
5550                         data_bytes_allocated +=
5551                                 btrfs_file_extent_disk_num_bytes(buf, fi);
5552                         if (data_bytes_allocated < root->sectorsize) {
5553                                 abort();
5554                         }
5555                         data_bytes_referenced +=
5556                                 btrfs_file_extent_num_bytes(buf, fi);
5557                         add_data_backref(extent_cache,
5558                                 btrfs_file_extent_disk_bytenr(buf, fi),
5559                                 parent, owner, key.objectid, key.offset -
5560                                 btrfs_file_extent_offset(buf, fi), 1, 1,
5561                                 btrfs_file_extent_disk_num_bytes(buf, fi));
5562                 }
5563         } else {
5564                 int level;
5565                 struct btrfs_key first_key;
5566
5567                 first_key.objectid = 0;
5568
5569                 if (nritems > 0)
5570                         btrfs_item_key_to_cpu(buf, &first_key, 0);
5571                 level = btrfs_header_level(buf);
5572                 for (i = 0; i < nritems; i++) {
5573                         ptr = btrfs_node_blockptr(buf, i);
5574                         size = btrfs_level_size(root, level - 1);
5575                         btrfs_node_key_to_cpu(buf, &key, i);
5576                         if (ri != NULL) {
5577                                 if ((level == ri->drop_level)
5578                                     && is_dropped_key(&key, &ri->drop_key)) {
5579                                         continue;
5580                                 }
5581                         }
5582                         ret = add_extent_rec(extent_cache, &key,
5583                                              btrfs_node_ptr_generation(buf, i),
5584                                              ptr, size, 0, 0, 1, 0, 1, 0,
5585                                              size);
5586                         BUG_ON(ret);
5587
5588                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
5589
5590                         if (level > 1) {
5591                                 add_pending(nodes, seen, ptr, size);
5592                         } else {
5593                                 add_pending(pending, seen, ptr, size);
5594                         }
5595                 }
5596                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
5597                                       nritems) * sizeof(struct btrfs_key_ptr);
5598         }
5599         total_btree_bytes += buf->len;
5600         if (fs_root_objectid(btrfs_header_owner(buf)))
5601                 total_fs_tree_bytes += buf->len;
5602         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
5603                 total_extent_tree_bytes += buf->len;
5604         if (!found_old_backref &&
5605             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
5606             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
5607             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5608                 found_old_backref = 1;
5609 out:
5610         free_extent_buffer(buf);
5611         return ret;
5612 }
5613
5614 static int add_root_to_pending(struct extent_buffer *buf,
5615                                struct cache_tree *extent_cache,
5616                                struct cache_tree *pending,
5617                                struct cache_tree *seen,
5618                                struct cache_tree *nodes,
5619                                u64 objectid)
5620 {
5621         if (btrfs_header_level(buf) > 0)
5622                 add_pending(nodes, seen, buf->start, buf->len);
5623         else
5624                 add_pending(pending, seen, buf->start, buf->len);
5625         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
5626                        0, 1, 1, 0, 1, 0, buf->len);
5627
5628         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
5629             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
5630                 add_tree_backref(extent_cache, buf->start, buf->start,
5631                                  0, 1);
5632         else
5633                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
5634         return 0;
5635 }
5636
5637 /* as we fix the tree, we might be deleting blocks that
5638  * we're tracking for repair.  This hook makes sure we
5639  * remove any backrefs for blocks as we are fixing them.
5640  */
5641 static int free_extent_hook(struct btrfs_trans_handle *trans,
5642                             struct btrfs_root *root,
5643                             u64 bytenr, u64 num_bytes, u64 parent,
5644                             u64 root_objectid, u64 owner, u64 offset,
5645                             int refs_to_drop)
5646 {
5647         struct extent_record *rec;
5648         struct cache_extent *cache;
5649         int is_data;
5650         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
5651
5652         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
5653         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
5654         if (!cache)
5655                 return 0;
5656
5657         rec = container_of(cache, struct extent_record, cache);
5658         if (is_data) {
5659                 struct data_backref *back;
5660                 back = find_data_backref(rec, parent, root_objectid, owner,
5661                                          offset, 1, bytenr, num_bytes);
5662                 if (!back)
5663                         goto out;
5664                 if (back->node.found_ref) {
5665                         back->found_ref -= refs_to_drop;
5666                         if (rec->refs)
5667                                 rec->refs -= refs_to_drop;
5668                 }
5669                 if (back->node.found_extent_tree) {
5670                         back->num_refs -= refs_to_drop;
5671                         if (rec->extent_item_refs)
5672                                 rec->extent_item_refs -= refs_to_drop;
5673                 }
5674                 if (back->found_ref == 0)
5675                         back->node.found_ref = 0;
5676                 if (back->num_refs == 0)
5677                         back->node.found_extent_tree = 0;
5678
5679                 if (!back->node.found_extent_tree && back->node.found_ref) {
5680                         list_del(&back->node.list);
5681                         free(back);
5682                 }
5683         } else {
5684                 struct tree_backref *back;
5685                 back = find_tree_backref(rec, parent, root_objectid);
5686                 if (!back)
5687                         goto out;
5688                 if (back->node.found_ref) {
5689                         if (rec->refs)
5690                                 rec->refs--;
5691                         back->node.found_ref = 0;
5692                 }
5693                 if (back->node.found_extent_tree) {
5694                         if (rec->extent_item_refs)
5695                                 rec->extent_item_refs--;
5696                         back->node.found_extent_tree = 0;
5697                 }
5698                 if (!back->node.found_extent_tree && back->node.found_ref) {
5699                         list_del(&back->node.list);
5700                         free(back);
5701                 }
5702         }
5703         maybe_free_extent_rec(extent_cache, rec);
5704 out:
5705         return 0;
5706 }
5707
5708 static int delete_extent_records(struct btrfs_trans_handle *trans,
5709                                  struct btrfs_root *root,
5710                                  struct btrfs_path *path,
5711                                  u64 bytenr, u64 new_len)
5712 {
5713         struct btrfs_key key;
5714         struct btrfs_key found_key;
5715         struct extent_buffer *leaf;
5716         int ret;
5717         int slot;
5718
5719
5720         key.objectid = bytenr;
5721         key.type = (u8)-1;
5722         key.offset = (u64)-1;
5723
5724         while(1) {
5725                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
5726                                         &key, path, 0, 1);
5727                 if (ret < 0)
5728                         break;
5729
5730                 if (ret > 0) {
5731                         ret = 0;
5732                         if (path->slots[0] == 0)
5733                                 break;
5734                         path->slots[0]--;
5735                 }
5736                 ret = 0;
5737
5738                 leaf = path->nodes[0];
5739                 slot = path->slots[0];
5740
5741                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5742                 if (found_key.objectid != bytenr)
5743                         break;
5744
5745                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
5746                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
5747                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5748                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
5749                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
5750                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
5751                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
5752                         btrfs_release_path(path);
5753                         if (found_key.type == 0) {
5754                                 if (found_key.offset == 0)
5755                                         break;
5756                                 key.offset = found_key.offset - 1;
5757                                 key.type = found_key.type;
5758                         }
5759                         key.type = found_key.type - 1;
5760                         key.offset = (u64)-1;
5761                         continue;
5762                 }
5763
5764                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
5765                         found_key.objectid, found_key.type, found_key.offset);
5766
5767                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
5768                 if (ret)
5769                         break;
5770                 btrfs_release_path(path);
5771
5772                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5773                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
5774                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
5775                                 found_key.offset : root->leafsize;
5776
5777                         ret = btrfs_update_block_group(trans, root, bytenr,
5778                                                        bytes, 0, 0);
5779                         if (ret)
5780                                 break;
5781                 }
5782         }
5783
5784         btrfs_release_path(path);
5785         return ret;
5786 }
5787
5788 /*
5789  * for a single backref, this will allocate a new extent
5790  * and add the backref to it.
5791  */
5792 static int record_extent(struct btrfs_trans_handle *trans,
5793                          struct btrfs_fs_info *info,
5794                          struct btrfs_path *path,
5795                          struct extent_record *rec,
5796                          struct extent_backref *back,
5797                          int allocated, u64 flags)
5798 {
5799         int ret;
5800         struct btrfs_root *extent_root = info->extent_root;
5801         struct extent_buffer *leaf;
5802         struct btrfs_key ins_key;
5803         struct btrfs_extent_item *ei;
5804         struct tree_backref *tback;
5805         struct data_backref *dback;
5806         struct btrfs_tree_block_info *bi;
5807
5808         if (!back->is_data)
5809                 rec->max_size = max_t(u64, rec->max_size,
5810                                     info->extent_root->leafsize);
5811
5812         if (!allocated) {
5813                 u32 item_size = sizeof(*ei);
5814
5815                 if (!back->is_data)
5816                         item_size += sizeof(*bi);
5817
5818                 ins_key.objectid = rec->start;
5819                 ins_key.offset = rec->max_size;
5820                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
5821
5822                 ret = btrfs_insert_empty_item(trans, extent_root, path,
5823                                         &ins_key, item_size);
5824                 if (ret)
5825                         goto fail;
5826
5827                 leaf = path->nodes[0];
5828                 ei = btrfs_item_ptr(leaf, path->slots[0],
5829                                     struct btrfs_extent_item);
5830
5831                 btrfs_set_extent_refs(leaf, ei, 0);
5832                 btrfs_set_extent_generation(leaf, ei, rec->generation);
5833
5834                 if (back->is_data) {
5835                         btrfs_set_extent_flags(leaf, ei,
5836                                                BTRFS_EXTENT_FLAG_DATA);
5837                 } else {
5838                         struct btrfs_disk_key copy_key;;
5839
5840                         tback = (struct tree_backref *)back;
5841                         bi = (struct btrfs_tree_block_info *)(ei + 1);
5842                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
5843                                              sizeof(*bi));
5844
5845                         btrfs_set_disk_key_objectid(&copy_key,
5846                                                     rec->info_objectid);
5847                         btrfs_set_disk_key_type(&copy_key, 0);
5848                         btrfs_set_disk_key_offset(&copy_key, 0);
5849
5850                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
5851                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
5852
5853                         btrfs_set_extent_flags(leaf, ei,
5854                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
5855                 }
5856
5857                 btrfs_mark_buffer_dirty(leaf);
5858                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
5859                                                rec->max_size, 1, 0);
5860                 if (ret)
5861                         goto fail;
5862                 btrfs_release_path(path);
5863         }
5864
5865         if (back->is_data) {
5866                 u64 parent;
5867                 int i;
5868
5869                 dback = (struct data_backref *)back;
5870                 if (back->full_backref)
5871                         parent = dback->parent;
5872                 else
5873                         parent = 0;
5874
5875                 for (i = 0; i < dback->found_ref; i++) {
5876                         /* if parent != 0, we're doing a full backref
5877                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
5878                          * just makes the backref allocator create a data
5879                          * backref
5880                          */
5881                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
5882                                                    rec->start, rec->max_size,
5883                                                    parent,
5884                                                    dback->root,
5885                                                    parent ?
5886                                                    BTRFS_FIRST_FREE_OBJECTID :
5887                                                    dback->owner,
5888                                                    dback->offset);
5889                         if (ret)
5890                                 break;
5891                 }
5892                 fprintf(stderr, "adding new data backref"
5893                                 " on %llu %s %llu owner %llu"
5894                                 " offset %llu found %d\n",
5895                                 (unsigned long long)rec->start,
5896                                 back->full_backref ?
5897                                 "parent" : "root",
5898                                 back->full_backref ?
5899                                 (unsigned long long)parent :
5900                                 (unsigned long long)dback->root,
5901                                 (unsigned long long)dback->owner,
5902                                 (unsigned long long)dback->offset,
5903                                 dback->found_ref);
5904         } else {
5905                 u64 parent;
5906
5907                 tback = (struct tree_backref *)back;
5908                 if (back->full_backref)
5909                         parent = tback->parent;
5910                 else
5911                         parent = 0;
5912
5913                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
5914                                            rec->start, rec->max_size,
5915                                            parent, tback->root, 0, 0);
5916                 fprintf(stderr, "adding new tree backref on "
5917                         "start %llu len %llu parent %llu root %llu\n",
5918                         rec->start, rec->max_size, tback->parent, tback->root);
5919         }
5920         if (ret)
5921                 goto fail;
5922 fail:
5923         btrfs_release_path(path);
5924         return ret;
5925 }
5926
5927 struct extent_entry {
5928         u64 bytenr;
5929         u64 bytes;
5930         int count;
5931         int broken;
5932         struct list_head list;
5933 };
5934
5935 static struct extent_entry *find_entry(struct list_head *entries,
5936                                        u64 bytenr, u64 bytes)
5937 {
5938         struct extent_entry *entry = NULL;
5939
5940         list_for_each_entry(entry, entries, list) {
5941                 if (entry->bytenr == bytenr && entry->bytes == bytes)
5942                         return entry;
5943         }
5944
5945         return NULL;
5946 }
5947
5948 static struct extent_entry *find_most_right_entry(struct list_head *entries)
5949 {
5950         struct extent_entry *entry, *best = NULL, *prev = NULL;
5951
5952         list_for_each_entry(entry, entries, list) {
5953                 if (!prev) {
5954                         prev = entry;
5955                         continue;
5956                 }
5957
5958                 /*
5959                  * If there are as many broken entries as entries then we know
5960                  * not to trust this particular entry.
5961                  */
5962                 if (entry->broken == entry->count)
5963                         continue;
5964
5965                 /*
5966                  * If our current entry == best then we can't be sure our best
5967                  * is really the best, so we need to keep searching.
5968                  */
5969                 if (best && best->count == entry->count) {
5970                         prev = entry;
5971                         best = NULL;
5972                         continue;
5973                 }
5974
5975                 /* Prev == entry, not good enough, have to keep searching */
5976                 if (!prev->broken && prev->count == entry->count)
5977                         continue;
5978
5979                 if (!best)
5980                         best = (prev->count > entry->count) ? prev : entry;
5981                 else if (best->count < entry->count)
5982                         best = entry;
5983                 prev = entry;
5984         }
5985
5986         return best;
5987 }
5988
5989 static int repair_ref(struct btrfs_trans_handle *trans,
5990                       struct btrfs_fs_info *info, struct btrfs_path *path,
5991                       struct data_backref *dback, struct extent_entry *entry)
5992 {
5993         struct btrfs_root *root;
5994         struct btrfs_file_extent_item *fi;
5995         struct extent_buffer *leaf;
5996         struct btrfs_key key;
5997         u64 bytenr, bytes;
5998         int ret;
5999
6000         key.objectid = dback->root;
6001         key.type = BTRFS_ROOT_ITEM_KEY;
6002         key.offset = (u64)-1;
6003         root = btrfs_read_fs_root(info, &key);
6004         if (IS_ERR(root)) {
6005                 fprintf(stderr, "Couldn't find root for our ref\n");
6006                 return -EINVAL;
6007         }
6008
6009         /*
6010          * The backref points to the original offset of the extent if it was
6011          * split, so we need to search down to the offset we have and then walk
6012          * forward until we find the backref we're looking for.
6013          */
6014         key.objectid = dback->owner;
6015         key.type = BTRFS_EXTENT_DATA_KEY;
6016         key.offset = dback->offset;
6017         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6018         if (ret < 0) {
6019                 fprintf(stderr, "Error looking up ref %d\n", ret);
6020                 return ret;
6021         }
6022
6023         while (1) {
6024                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6025                         ret = btrfs_next_leaf(root, path);
6026                         if (ret) {
6027                                 fprintf(stderr, "Couldn't find our ref, next\n");
6028                                 return -EINVAL;
6029                         }
6030                 }
6031                 leaf = path->nodes[0];
6032                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6033                 if (key.objectid != dback->owner ||
6034                     key.type != BTRFS_EXTENT_DATA_KEY) {
6035                         fprintf(stderr, "Couldn't find our ref, search\n");
6036                         return -EINVAL;
6037                 }
6038                 fi = btrfs_item_ptr(leaf, path->slots[0],
6039                                     struct btrfs_file_extent_item);
6040                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6041                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6042
6043                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6044                         break;
6045                 path->slots[0]++;
6046         }
6047
6048         btrfs_release_path(path);
6049
6050         /*
6051          * Have to make sure that this root gets updated when we commit the
6052          * transaction
6053          */
6054         record_root_in_trans(trans, root);
6055
6056         /*
6057          * Ok we have the key of the file extent we want to fix, now we can cow
6058          * down to the thing and fix it.
6059          */
6060         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6061         if (ret < 0) {
6062                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6063                         key.objectid, key.type, key.offset, ret);
6064                 return ret;
6065         }
6066         if (ret > 0) {
6067                 fprintf(stderr, "Well that's odd, we just found this key "
6068                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6069                         key.offset);
6070                 return -EINVAL;
6071         }
6072         leaf = path->nodes[0];
6073         fi = btrfs_item_ptr(leaf, path->slots[0],
6074                             struct btrfs_file_extent_item);
6075
6076         if (btrfs_file_extent_compression(leaf, fi) &&
6077             dback->disk_bytenr != entry->bytenr) {
6078                 fprintf(stderr, "Ref doesn't match the record start and is "
6079                         "compressed, please take a btrfs-image of this file "
6080                         "system and send it to a btrfs developer so they can "
6081                         "complete this functionality for bytenr %Lu\n",
6082                         dback->disk_bytenr);
6083                 return -EINVAL;
6084         }
6085
6086         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6087                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6088         } else if (dback->disk_bytenr > entry->bytenr) {
6089                 u64 off_diff, offset;
6090
6091                 off_diff = dback->disk_bytenr - entry->bytenr;
6092                 offset = btrfs_file_extent_offset(leaf, fi);
6093                 if (dback->disk_bytenr + offset +
6094                     btrfs_file_extent_num_bytes(leaf, fi) >
6095                     entry->bytenr + entry->bytes) {
6096                         fprintf(stderr, "Ref is past the entry end, please "
6097                                 "take a btrfs-image of this file system and "
6098                                 "send it to a btrfs developer, ref %Lu\n",
6099                                 dback->disk_bytenr);
6100                         return -EINVAL;
6101                 }
6102                 offset += off_diff;
6103                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6104                 btrfs_set_file_extent_offset(leaf, fi, offset);
6105         } else if (dback->disk_bytenr < entry->bytenr) {
6106                 u64 offset;
6107
6108                 offset = btrfs_file_extent_offset(leaf, fi);
6109                 if (dback->disk_bytenr + offset < entry->bytenr) {
6110                         fprintf(stderr, "Ref is before the entry start, please"
6111                                 " take a btrfs-image of this file system and "
6112                                 "send it to a btrfs developer, ref %Lu\n",
6113                                 dback->disk_bytenr);
6114                         return -EINVAL;
6115                 }
6116
6117                 offset += dback->disk_bytenr;
6118                 offset -= entry->bytenr;
6119                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6120                 btrfs_set_file_extent_offset(leaf, fi, offset);
6121         }
6122
6123         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6124
6125         /*
6126          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6127          * only do this if we aren't using compression, otherwise it's a
6128          * trickier case.
6129          */
6130         if (!btrfs_file_extent_compression(leaf, fi))
6131                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6132         else
6133                 printf("ram bytes may be wrong?\n");
6134         btrfs_mark_buffer_dirty(leaf);
6135         btrfs_release_path(path);
6136         return 0;
6137 }
6138
6139 static int verify_backrefs(struct btrfs_trans_handle *trans,
6140                            struct btrfs_fs_info *info, struct btrfs_path *path,
6141                            struct extent_record *rec)
6142 {
6143         struct extent_backref *back;
6144         struct data_backref *dback;
6145         struct extent_entry *entry, *best = NULL;
6146         LIST_HEAD(entries);
6147         int nr_entries = 0;
6148         int broken_entries = 0;
6149         int ret = 0;
6150         short mismatch = 0;
6151
6152         /*
6153          * Metadata is easy and the backrefs should always agree on bytenr and
6154          * size, if not we've got bigger issues.
6155          */
6156         if (rec->metadata)
6157                 return 0;
6158
6159         list_for_each_entry(back, &rec->backrefs, list) {
6160                 if (back->full_backref || !back->is_data)
6161                         continue;
6162
6163                 dback = (struct data_backref *)back;
6164
6165                 /*
6166                  * We only pay attention to backrefs that we found a real
6167                  * backref for.
6168                  */
6169                 if (dback->found_ref == 0)
6170                         continue;
6171
6172                 /*
6173                  * For now we only catch when the bytes don't match, not the
6174                  * bytenr.  We can easily do this at the same time, but I want
6175                  * to have a fs image to test on before we just add repair
6176                  * functionality willy-nilly so we know we won't screw up the
6177                  * repair.
6178                  */
6179
6180                 entry = find_entry(&entries, dback->disk_bytenr,
6181                                    dback->bytes);
6182                 if (!entry) {
6183                         entry = malloc(sizeof(struct extent_entry));
6184                         if (!entry) {
6185                                 ret = -ENOMEM;
6186                                 goto out;
6187                         }
6188                         memset(entry, 0, sizeof(*entry));
6189                         entry->bytenr = dback->disk_bytenr;
6190                         entry->bytes = dback->bytes;
6191                         list_add_tail(&entry->list, &entries);
6192                         nr_entries++;
6193                 }
6194
6195                 /*
6196                  * If we only have on entry we may think the entries agree when
6197                  * in reality they don't so we have to do some extra checking.
6198                  */
6199                 if (dback->disk_bytenr != rec->start ||
6200                     dback->bytes != rec->nr || back->broken)
6201                         mismatch = 1;
6202
6203                 if (back->broken) {
6204                         entry->broken++;
6205                         broken_entries++;
6206                 }
6207
6208                 entry->count++;
6209         }
6210
6211         /* Yay all the backrefs agree, carry on good sir */
6212         if (nr_entries <= 1 && !mismatch)
6213                 goto out;
6214
6215         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6216                 "%Lu\n", rec->start);
6217
6218         /*
6219          * First we want to see if the backrefs can agree amongst themselves who
6220          * is right, so figure out which one of the entries has the highest
6221          * count.
6222          */
6223         best = find_most_right_entry(&entries);
6224
6225         /*
6226          * Ok so we may have an even split between what the backrefs think, so
6227          * this is where we use the extent ref to see what it thinks.
6228          */
6229         if (!best) {
6230                 entry = find_entry(&entries, rec->start, rec->nr);
6231                 if (!entry && (!broken_entries || !rec->found_rec)) {
6232                         fprintf(stderr, "Backrefs don't agree with each other "
6233                                 "and extent record doesn't agree with anybody,"
6234                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6235                                 rec->start, rec->nr);
6236                         ret = -EINVAL;
6237                         goto out;
6238                 } else if (!entry) {
6239                         /*
6240                          * Ok our backrefs were broken, we'll assume this is the
6241                          * correct value and add an entry for this range.
6242                          */
6243                         entry = malloc(sizeof(struct extent_entry));
6244                         if (!entry) {
6245                                 ret = -ENOMEM;
6246                                 goto out;
6247                         }
6248                         memset(entry, 0, sizeof(*entry));
6249                         entry->bytenr = rec->start;
6250                         entry->bytes = rec->nr;
6251                         list_add_tail(&entry->list, &entries);
6252                         nr_entries++;
6253                 }
6254                 entry->count++;
6255                 best = find_most_right_entry(&entries);
6256                 if (!best) {
6257                         fprintf(stderr, "Backrefs and extent record evenly "
6258                                 "split on who is right, this is going to "
6259                                 "require user input to fix bytenr %Lu bytes "
6260                                 "%Lu\n", rec->start, rec->nr);
6261                         ret = -EINVAL;
6262                         goto out;
6263                 }
6264         }
6265
6266         /*
6267          * I don't think this can happen currently as we'll abort() if we catch
6268          * this case higher up, but in case somebody removes that we still can't
6269          * deal with it properly here yet, so just bail out of that's the case.
6270          */
6271         if (best->bytenr != rec->start) {
6272                 fprintf(stderr, "Extent start and backref starts don't match, "
6273                         "please use btrfs-image on this file system and send "
6274                         "it to a btrfs developer so they can make fsck fix "
6275                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6276                         rec->start, rec->nr);
6277                 ret = -EINVAL;
6278                 goto out;
6279         }
6280
6281         /*
6282          * Ok great we all agreed on an extent record, let's go find the real
6283          * references and fix up the ones that don't match.
6284          */
6285         list_for_each_entry(back, &rec->backrefs, list) {
6286                 if (back->full_backref || !back->is_data)
6287                         continue;
6288
6289                 dback = (struct data_backref *)back;
6290
6291                 /*
6292                  * Still ignoring backrefs that don't have a real ref attached
6293                  * to them.
6294                  */
6295                 if (dback->found_ref == 0)
6296                         continue;
6297
6298                 if (dback->bytes == best->bytes &&
6299                     dback->disk_bytenr == best->bytenr)
6300                         continue;
6301
6302                 ret = repair_ref(trans, info, path, dback, best);
6303                 if (ret)
6304                         goto out;
6305         }
6306
6307         /*
6308          * Ok we messed with the actual refs, which means we need to drop our
6309          * entire cache and go back and rescan.  I know this is a huge pain and
6310          * adds a lot of extra work, but it's the only way to be safe.  Once all
6311          * the backrefs agree we may not need to do anything to the extent
6312          * record itself.
6313          */
6314         ret = -EAGAIN;
6315 out:
6316         while (!list_empty(&entries)) {
6317                 entry = list_entry(entries.next, struct extent_entry, list);
6318                 list_del_init(&entry->list);
6319                 free(entry);
6320         }
6321         return ret;
6322 }
6323
6324 static int process_duplicates(struct btrfs_root *root,
6325                               struct cache_tree *extent_cache,
6326                               struct extent_record *rec)
6327 {
6328         struct extent_record *good, *tmp;
6329         struct cache_extent *cache;
6330         int ret;
6331
6332         /*
6333          * If we found a extent record for this extent then return, or if we
6334          * have more than one duplicate we are likely going to need to delete
6335          * something.
6336          */
6337         if (rec->found_rec || rec->num_duplicates > 1)
6338                 return 0;
6339
6340         /* Shouldn't happen but just in case */
6341         BUG_ON(!rec->num_duplicates);
6342
6343         /*
6344          * So this happens if we end up with a backref that doesn't match the
6345          * actual extent entry.  So either the backref is bad or the extent
6346          * entry is bad.  Either way we want to have the extent_record actually
6347          * reflect what we found in the extent_tree, so we need to take the
6348          * duplicate out and use that as the extent_record since the only way we
6349          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6350          */
6351         remove_cache_extent(extent_cache, &rec->cache);
6352
6353         good = list_entry(rec->dups.next, struct extent_record, list);
6354         list_del_init(&good->list);
6355         INIT_LIST_HEAD(&good->backrefs);
6356         INIT_LIST_HEAD(&good->dups);
6357         good->cache.start = good->start;
6358         good->cache.size = good->nr;
6359         good->content_checked = 0;
6360         good->owner_ref_checked = 0;
6361         good->num_duplicates = 0;
6362         good->refs = rec->refs;
6363         list_splice_init(&rec->backrefs, &good->backrefs);
6364         while (1) {
6365                 cache = lookup_cache_extent(extent_cache, good->start,
6366                                             good->nr);
6367                 if (!cache)
6368                         break;
6369                 tmp = container_of(cache, struct extent_record, cache);
6370
6371                 /*
6372                  * If we find another overlapping extent and it's found_rec is
6373                  * set then it's a duplicate and we need to try and delete
6374                  * something.
6375                  */
6376                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6377                         if (list_empty(&good->list))
6378                                 list_add_tail(&good->list,
6379                                               &duplicate_extents);
6380                         good->num_duplicates += tmp->num_duplicates + 1;
6381                         list_splice_init(&tmp->dups, &good->dups);
6382                         list_del_init(&tmp->list);
6383                         list_add_tail(&tmp->list, &good->dups);
6384                         remove_cache_extent(extent_cache, &tmp->cache);
6385                         continue;
6386                 }
6387
6388                 /*
6389                  * Ok we have another non extent item backed extent rec, so lets
6390                  * just add it to this extent and carry on like we did above.
6391                  */
6392                 good->refs += tmp->refs;
6393                 list_splice_init(&tmp->backrefs, &good->backrefs);
6394                 remove_cache_extent(extent_cache, &tmp->cache);
6395                 free(tmp);
6396         }
6397         ret = insert_cache_extent(extent_cache, &good->cache);
6398         BUG_ON(ret);
6399         free(rec);
6400         return good->num_duplicates ? 0 : 1;
6401 }
6402
6403 static int delete_duplicate_records(struct btrfs_trans_handle *trans,
6404                                     struct btrfs_root *root,
6405                                     struct extent_record *rec)
6406 {
6407         LIST_HEAD(delete_list);
6408         struct btrfs_path *path;
6409         struct extent_record *tmp, *good, *n;
6410         int nr_del = 0;
6411         int ret = 0;
6412         struct btrfs_key key;
6413
6414         path = btrfs_alloc_path();
6415         if (!path) {
6416                 ret = -ENOMEM;
6417                 goto out;
6418         }
6419
6420         good = rec;
6421         /* Find the record that covers all of the duplicates. */
6422         list_for_each_entry(tmp, &rec->dups, list) {
6423                 if (good->start < tmp->start)
6424                         continue;
6425                 if (good->nr > tmp->nr)
6426                         continue;
6427
6428                 if (tmp->start + tmp->nr < good->start + good->nr) {
6429                         fprintf(stderr, "Ok we have overlapping extents that "
6430                                 "aren't completely covered by eachother, this "
6431                                 "is going to require more careful thought.  "
6432                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6433                                 tmp->start, tmp->nr, good->start, good->nr);
6434                         abort();
6435                 }
6436                 good = tmp;
6437         }
6438
6439         if (good != rec)
6440                 list_add_tail(&rec->list, &delete_list);
6441
6442         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6443                 if (tmp == good)
6444                         continue;
6445                 list_move_tail(&tmp->list, &delete_list);
6446         }
6447
6448         root = root->fs_info->extent_root;
6449         list_for_each_entry(tmp, &delete_list, list) {
6450                 if (tmp->found_rec == 0)
6451                         continue;
6452                 key.objectid = tmp->start;
6453                 key.type = BTRFS_EXTENT_ITEM_KEY;
6454                 key.offset = tmp->nr;
6455
6456                 /* Shouldn't happen but just in case */
6457                 if (tmp->metadata) {
6458                         fprintf(stderr, "Well this shouldn't happen, extent "
6459                                 "record overlaps but is metadata? "
6460                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6461                         abort();
6462                 }
6463
6464                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6465                 if (ret) {
6466                         if (ret > 0)
6467                                 ret = -EINVAL;
6468                         goto out;
6469                 }
6470                 ret = btrfs_del_item(trans, root, path);
6471                 if (ret)
6472                         goto out;
6473                 btrfs_release_path(path);
6474                 nr_del++;
6475         }
6476
6477 out:
6478         while (!list_empty(&delete_list)) {
6479                 tmp = list_entry(delete_list.next, struct extent_record, list);
6480                 list_del_init(&tmp->list);
6481                 if (tmp == rec)
6482                         continue;
6483                 free(tmp);
6484         }
6485
6486         while (!list_empty(&rec->dups)) {
6487                 tmp = list_entry(rec->dups.next, struct extent_record, list);
6488                 list_del_init(&tmp->list);
6489                 free(tmp);
6490         }
6491
6492         btrfs_free_path(path);
6493
6494         if (!ret && !nr_del)
6495                 rec->num_duplicates = 0;
6496
6497         return ret ? ret : nr_del;
6498 }
6499
6500 static int find_possible_backrefs(struct btrfs_trans_handle *trans,
6501                                   struct btrfs_fs_info *info,
6502                                   struct btrfs_path *path,
6503                                   struct cache_tree *extent_cache,
6504                                   struct extent_record *rec)
6505 {
6506         struct btrfs_root *root;
6507         struct extent_backref *back;
6508         struct data_backref *dback;
6509         struct cache_extent *cache;
6510         struct btrfs_file_extent_item *fi;
6511         struct btrfs_key key;
6512         u64 bytenr, bytes;
6513         int ret;
6514
6515         list_for_each_entry(back, &rec->backrefs, list) {
6516                 /* Don't care about full backrefs (poor unloved backrefs) */
6517                 if (back->full_backref || !back->is_data)
6518                         continue;
6519
6520                 dback = (struct data_backref *)back;
6521
6522                 /* We found this one, we don't need to do a lookup */
6523                 if (dback->found_ref)
6524                         continue;
6525
6526                 key.objectid = dback->root;
6527                 key.type = BTRFS_ROOT_ITEM_KEY;
6528                 key.offset = (u64)-1;
6529
6530                 root = btrfs_read_fs_root(info, &key);
6531
6532                 /* No root, definitely a bad ref, skip */
6533                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
6534                         continue;
6535                 /* Other err, exit */
6536                 if (IS_ERR(root))
6537                         return PTR_ERR(root);
6538
6539                 key.objectid = dback->owner;
6540                 key.type = BTRFS_EXTENT_DATA_KEY;
6541                 key.offset = dback->offset;
6542                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6543                 if (ret) {
6544                         btrfs_release_path(path);
6545                         if (ret < 0)
6546                                 return ret;
6547                         /* Didn't find it, we can carry on */
6548                         ret = 0;
6549                         continue;
6550                 }
6551
6552                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
6553                                     struct btrfs_file_extent_item);
6554                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
6555                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
6556                 btrfs_release_path(path);
6557                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6558                 if (cache) {
6559                         struct extent_record *tmp;
6560                         tmp = container_of(cache, struct extent_record, cache);
6561
6562                         /*
6563                          * If we found an extent record for the bytenr for this
6564                          * particular backref then we can't add it to our
6565                          * current extent record.  We only want to add backrefs
6566                          * that don't have a corresponding extent item in the
6567                          * extent tree since they likely belong to this record
6568                          * and we need to fix it if it doesn't match bytenrs.
6569                          */
6570                         if  (tmp->found_rec)
6571                                 continue;
6572                 }
6573
6574                 dback->found_ref += 1;
6575                 dback->disk_bytenr = bytenr;
6576                 dback->bytes = bytes;
6577
6578                 /*
6579                  * Set this so the verify backref code knows not to trust the
6580                  * values in this backref.
6581                  */
6582                 back->broken = 1;
6583         }
6584
6585         return 0;
6586 }
6587
6588 /*
6589  * Record orphan data ref into corresponding root.
6590  *
6591  * Return 0 if the extent item contains data ref and recorded.
6592  * Return 1 if the extent item contains no useful data ref
6593  *   On that case, it may contains only shared_dataref or metadata backref
6594  *   or the file extent exists(this should be handled by the extent bytenr
6595  *   recovery routine)
6596  * Return <0 if something goes wrong.
6597  */
6598 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
6599                                       struct extent_record *rec)
6600 {
6601         struct btrfs_key key;
6602         struct btrfs_root *dest_root;
6603         struct extent_backref *back;
6604         struct data_backref *dback;
6605         struct orphan_data_extent *orphan;
6606         struct btrfs_path *path;
6607         int recorded_data_ref = 0;
6608         int ret = 0;
6609
6610         if (rec->metadata)
6611                 return 1;
6612         path = btrfs_alloc_path();
6613         if (!path)
6614                 return -ENOMEM;
6615         list_for_each_entry(back, &rec->backrefs, list) {
6616                 if (back->full_backref || !back->is_data ||
6617                     !back->found_extent_tree)
6618                         continue;
6619                 dback = (struct data_backref *)back;
6620                 if (dback->found_ref)
6621                         continue;
6622                 key.objectid = dback->root;
6623                 key.type = BTRFS_ROOT_ITEM_KEY;
6624                 key.offset = (u64)-1;
6625
6626                 dest_root = btrfs_read_fs_root(fs_info, &key);
6627
6628                 /* For non-exist root we just skip it */
6629                 if (IS_ERR(dest_root) || !dest_root)
6630                         continue;
6631
6632                 key.objectid = dback->owner;
6633                 key.type = BTRFS_EXTENT_DATA_KEY;
6634                 key.offset = dback->offset;
6635
6636                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
6637                 /*
6638                  * For ret < 0, it's OK since the fs-tree may be corrupted,
6639                  * we need to record it for inode/file extent rebuild.
6640                  * For ret > 0, we record it only for file extent rebuild.
6641                  * For ret == 0, the file extent exists but only bytenr
6642                  * mismatch, let the original bytenr fix routine to handle,
6643                  * don't record it.
6644                  */
6645                 if (ret == 0)
6646                         continue;
6647                 ret = 0;
6648                 orphan = malloc(sizeof(*orphan));
6649                 if (!orphan) {
6650                         ret = -ENOMEM;
6651                         goto out;
6652                 }
6653                 INIT_LIST_HEAD(&orphan->list);
6654                 orphan->root = dback->root;
6655                 orphan->objectid = dback->owner;
6656                 orphan->offset = dback->offset;
6657                 orphan->disk_bytenr = rec->cache.start;
6658                 orphan->disk_len = rec->cache.size;
6659                 list_add(&dest_root->orphan_data_extents, &orphan->list);
6660                 recorded_data_ref = 1;
6661         }
6662 out:
6663         btrfs_free_path(path);
6664         if (!ret)
6665                 return !recorded_data_ref;
6666         else
6667                 return ret;
6668 }
6669
6670 /*
6671  * when an incorrect extent item is found, this will delete
6672  * all of the existing entries for it and recreate them
6673  * based on what the tree scan found.
6674  */
6675 static int fixup_extent_refs(struct btrfs_trans_handle *trans,
6676                              struct btrfs_fs_info *info,
6677                              struct cache_tree *extent_cache,
6678                              struct extent_record *rec)
6679 {
6680         int ret;
6681         struct btrfs_path *path;
6682         struct list_head *cur = rec->backrefs.next;
6683         struct cache_extent *cache;
6684         struct extent_backref *back;
6685         int allocated = 0;
6686         u64 flags = 0;
6687
6688         /*
6689          * remember our flags for recreating the extent.
6690          * FIXME, if we have cleared extent tree, we can not
6691          * lookup extent info in extent tree.
6692          */
6693         if (!init_extent_tree) {
6694                 ret = btrfs_lookup_extent_info(NULL, info->extent_root,
6695                                         rec->start, rec->max_size,
6696                                         rec->metadata, NULL, &flags);
6697                 if (ret < 0)
6698                         return ret;
6699         } else {
6700                 if (rec->flag_block_full_backref)
6701                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6702         }
6703
6704         path = btrfs_alloc_path();
6705         if (!path)
6706                 return -ENOMEM;
6707
6708         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
6709                 /*
6710                  * Sometimes the backrefs themselves are so broken they don't
6711                  * get attached to any meaningful rec, so first go back and
6712                  * check any of our backrefs that we couldn't find and throw
6713                  * them into the list if we find the backref so that
6714                  * verify_backrefs can figure out what to do.
6715                  */
6716                 ret = find_possible_backrefs(trans, info, path, extent_cache,
6717                                              rec);
6718                 if (ret < 0)
6719                         goto out;
6720         }
6721
6722         /* step one, make sure all of the backrefs agree */
6723         ret = verify_backrefs(trans, info, path, rec);
6724         if (ret < 0)
6725                 goto out;
6726
6727         /* step two, delete all the existing records */
6728         ret = delete_extent_records(trans, info->extent_root, path,
6729                                     rec->start, rec->max_size);
6730
6731         if (ret < 0)
6732                 goto out;
6733
6734         /* was this block corrupt?  If so, don't add references to it */
6735         cache = lookup_cache_extent(info->corrupt_blocks,
6736                                     rec->start, rec->max_size);
6737         if (cache) {
6738                 ret = 0;
6739                 goto out;
6740         }
6741
6742         /* step three, recreate all the refs we did find */
6743         while(cur != &rec->backrefs) {
6744                 back = list_entry(cur, struct extent_backref, list);
6745                 cur = cur->next;
6746
6747                 /*
6748                  * if we didn't find any references, don't create a
6749                  * new extent record
6750                  */
6751                 if (!back->found_ref)
6752                         continue;
6753
6754                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
6755                 allocated = 1;
6756
6757                 if (ret)
6758                         goto out;
6759         }
6760 out:
6761         btrfs_free_path(path);
6762         return ret;
6763 }
6764
6765 /* right now we only prune from the extent allocation tree */
6766 static int prune_one_block(struct btrfs_trans_handle *trans,
6767                            struct btrfs_fs_info *info,
6768                            struct btrfs_corrupt_block *corrupt)
6769 {
6770         int ret;
6771         struct btrfs_path path;
6772         struct extent_buffer *eb;
6773         u64 found;
6774         int slot;
6775         int nritems;
6776         int level = corrupt->level + 1;
6777
6778         btrfs_init_path(&path);
6779 again:
6780         /* we want to stop at the parent to our busted block */
6781         path.lowest_level = level;
6782
6783         ret = btrfs_search_slot(trans, info->extent_root,
6784                                 &corrupt->key, &path, -1, 1);
6785
6786         if (ret < 0)
6787                 goto out;
6788
6789         eb = path.nodes[level];
6790         if (!eb) {
6791                 ret = -ENOENT;
6792                 goto out;
6793         }
6794
6795         /*
6796          * hopefully the search gave us the block we want to prune,
6797          * lets try that first
6798          */
6799         slot = path.slots[level];
6800         found =  btrfs_node_blockptr(eb, slot);
6801         if (found == corrupt->cache.start)
6802                 goto del_ptr;
6803
6804         nritems = btrfs_header_nritems(eb);
6805
6806         /* the search failed, lets scan this node and hope we find it */
6807         for (slot = 0; slot < nritems; slot++) {
6808                 found =  btrfs_node_blockptr(eb, slot);
6809                 if (found == corrupt->cache.start)
6810                         goto del_ptr;
6811         }
6812         /*
6813          * we couldn't find the bad block.  TODO, search all the nodes for pointers
6814          * to this block
6815          */
6816         if (eb == info->extent_root->node) {
6817                 ret = -ENOENT;
6818                 goto out;
6819         } else {
6820                 level++;
6821                 btrfs_release_path(&path);
6822                 goto again;
6823         }
6824
6825 del_ptr:
6826         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
6827         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
6828
6829 out:
6830         btrfs_release_path(&path);
6831         return ret;
6832 }
6833
6834 static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
6835                                 struct btrfs_fs_info *info)
6836 {
6837         struct cache_extent *cache;
6838         struct btrfs_corrupt_block *corrupt;
6839
6840         cache = search_cache_extent(info->corrupt_blocks, 0);
6841         while (1) {
6842                 if (!cache)
6843                         break;
6844                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
6845                 prune_one_block(trans, info, corrupt);
6846                 cache = next_cache_extent(cache);
6847         }
6848         return 0;
6849 }
6850
6851 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
6852 {
6853         struct btrfs_block_group_cache *cache;
6854         u64 start, end;
6855         int ret;
6856
6857         while (1) {
6858                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
6859                                             &start, &end, EXTENT_DIRTY);
6860                 if (ret)
6861                         break;
6862                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
6863                                    GFP_NOFS);
6864         }
6865
6866         start = 0;
6867         while (1) {
6868                 cache = btrfs_lookup_first_block_group(fs_info, start);
6869                 if (!cache)
6870                         break;
6871                 if (cache->cached)
6872                         cache->cached = 0;
6873                 start = cache->key.objectid + cache->key.offset;
6874         }
6875 }
6876
6877 static int check_extent_refs(struct btrfs_trans_handle *trans,
6878                              struct btrfs_root *root,
6879                              struct cache_tree *extent_cache)
6880 {
6881         struct extent_record *rec;
6882         struct cache_extent *cache;
6883         int err = 0;
6884         int ret = 0;
6885         int fixed = 0;
6886         int had_dups = 0;
6887         int recorded = 0;
6888
6889         if (repair) {
6890                 /*
6891                  * if we're doing a repair, we have to make sure
6892                  * we don't allocate from the problem extents.
6893                  * In the worst case, this will be all the
6894                  * extents in the FS
6895                  */
6896                 cache = search_cache_extent(extent_cache, 0);
6897                 while(cache) {
6898                         rec = container_of(cache, struct extent_record, cache);
6899                         btrfs_pin_extent(root->fs_info,
6900                                          rec->start, rec->max_size);
6901                         cache = next_cache_extent(cache);
6902                 }
6903
6904                 /* pin down all the corrupted blocks too */
6905                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
6906                 while(cache) {
6907                         btrfs_pin_extent(root->fs_info,
6908                                          cache->start, cache->size);
6909                         cache = next_cache_extent(cache);
6910                 }
6911                 prune_corrupt_blocks(trans, root->fs_info);
6912                 reset_cached_block_groups(root->fs_info);
6913         }
6914
6915         /*
6916          * We need to delete any duplicate entries we find first otherwise we
6917          * could mess up the extent tree when we have backrefs that actually
6918          * belong to a different extent item and not the weird duplicate one.
6919          */
6920         while (repair && !list_empty(&duplicate_extents)) {
6921                 rec = list_entry(duplicate_extents.next, struct extent_record,
6922                                  list);
6923                 list_del_init(&rec->list);
6924
6925                 /* Sometimes we can find a backref before we find an actual
6926                  * extent, so we need to process it a little bit to see if there
6927                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
6928                  * if this is a backref screwup.  If we need to delete stuff
6929                  * process_duplicates() will return 0, otherwise it will return
6930                  * 1 and we
6931                  */
6932                 if (process_duplicates(root, extent_cache, rec))
6933                         continue;
6934                 ret = delete_duplicate_records(trans, root, rec);
6935                 if (ret < 0)
6936                         return ret;
6937                 /*
6938                  * delete_duplicate_records will return the number of entries
6939                  * deleted, so if it's greater than 0 then we know we actually
6940                  * did something and we need to remove.
6941                  */
6942                 if (ret)
6943                         had_dups = 1;
6944         }
6945
6946         if (had_dups)
6947                 return -EAGAIN;
6948
6949         while(1) {
6950                 fixed = 0;
6951                 recorded = 0;
6952                 cache = search_cache_extent(extent_cache, 0);
6953                 if (!cache)
6954                         break;
6955                 rec = container_of(cache, struct extent_record, cache);
6956                 if (rec->num_duplicates) {
6957                         fprintf(stderr, "extent item %llu has multiple extent "
6958                                 "items\n", (unsigned long long)rec->start);
6959                         err = 1;
6960                 }
6961
6962                 if (rec->refs != rec->extent_item_refs) {
6963                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
6964                                 (unsigned long long)rec->start,
6965                                 (unsigned long long)rec->nr);
6966                         fprintf(stderr, "extent item %llu, found %llu\n",
6967                                 (unsigned long long)rec->extent_item_refs,
6968                                 (unsigned long long)rec->refs);
6969                         ret = record_orphan_data_extents(root->fs_info, rec);
6970                         if (ret < 0)
6971                                 goto repair_abort;
6972                         if (ret == 0) {
6973                                 recorded = 1;
6974                         } else {
6975                                 /*
6976                                  * we can't use the extent to repair file
6977                                  * extent, let the fallback method handle it.
6978                                  */
6979                                 if (!fixed && repair) {
6980                                         ret = fixup_extent_refs(trans,
6981                                                         root->fs_info,
6982                                                         extent_cache, rec);
6983                                         if (ret)
6984                                                 goto repair_abort;
6985                                         fixed = 1;
6986                                 }
6987                         }
6988                         err = 1;
6989
6990                 }
6991                 if (all_backpointers_checked(rec, 1)) {
6992                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
6993                                 (unsigned long long)rec->start,
6994                                 (unsigned long long)rec->nr);
6995
6996                         if (!fixed && !recorded && repair) {
6997                                 ret = fixup_extent_refs(trans, root->fs_info,
6998                                                         extent_cache, rec);
6999                                 if (ret)
7000                                         goto repair_abort;
7001                                 fixed = 1;
7002                         }
7003                         err = 1;
7004                 }
7005                 if (!rec->owner_ref_checked) {
7006                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7007                                 (unsigned long long)rec->start,
7008                                 (unsigned long long)rec->nr);
7009                         if (!fixed && !recorded && repair) {
7010                                 ret = fixup_extent_refs(trans, root->fs_info,
7011                                                         extent_cache, rec);
7012                                 if (ret)
7013                                         goto repair_abort;
7014                                 fixed = 1;
7015                         }
7016                         err = 1;
7017                 }
7018
7019                 remove_cache_extent(extent_cache, cache);
7020                 free_all_extent_backrefs(rec);
7021                 free(rec);
7022         }
7023 repair_abort:
7024         if (repair) {
7025                 if (ret && ret != -EAGAIN) {
7026                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7027                         exit(1);
7028                 } else if (!ret) {
7029                         btrfs_fix_block_accounting(trans, root);
7030                 }
7031                 if (err)
7032                         fprintf(stderr, "repaired damaged extent references\n");
7033                 return ret;
7034         }
7035         return err;
7036 }
7037
7038 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7039 {
7040         u64 stripe_size;
7041
7042         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7043                 stripe_size = length;
7044                 stripe_size /= num_stripes;
7045         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7046                 stripe_size = length * 2;
7047                 stripe_size /= num_stripes;
7048         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7049                 stripe_size = length;
7050                 stripe_size /= (num_stripes - 1);
7051         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7052                 stripe_size = length;
7053                 stripe_size /= (num_stripes - 2);
7054         } else {
7055                 stripe_size = length;
7056         }
7057         return stripe_size;
7058 }
7059
7060 /*
7061  * Check the chunk with its block group/dev list ref:
7062  * Return 0 if all refs seems valid.
7063  * Return 1 if part of refs seems valid, need later check for rebuild ref
7064  * like missing block group and needs to search extent tree to rebuild them.
7065  * Return -1 if essential refs are missing and unable to rebuild.
7066  */
7067 static int check_chunk_refs(struct chunk_record *chunk_rec,
7068                             struct block_group_tree *block_group_cache,
7069                             struct device_extent_tree *dev_extent_cache,
7070                             int silent)
7071 {
7072         struct cache_extent *block_group_item;
7073         struct block_group_record *block_group_rec;
7074         struct cache_extent *dev_extent_item;
7075         struct device_extent_record *dev_extent_rec;
7076         u64 devid;
7077         u64 offset;
7078         u64 length;
7079         int i;
7080         int ret = 0;
7081
7082         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7083                                                chunk_rec->offset,
7084                                                chunk_rec->length);
7085         if (block_group_item) {
7086                 block_group_rec = container_of(block_group_item,
7087                                                struct block_group_record,
7088                                                cache);
7089                 if (chunk_rec->length != block_group_rec->offset ||
7090                     chunk_rec->offset != block_group_rec->objectid ||
7091                     chunk_rec->type_flags != block_group_rec->flags) {
7092                         if (!silent)
7093                                 fprintf(stderr,
7094                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7095                                         chunk_rec->objectid,
7096                                         chunk_rec->type,
7097                                         chunk_rec->offset,
7098                                         chunk_rec->length,
7099                                         chunk_rec->offset,
7100                                         chunk_rec->type_flags,
7101                                         block_group_rec->objectid,
7102                                         block_group_rec->type,
7103                                         block_group_rec->offset,
7104                                         block_group_rec->offset,
7105                                         block_group_rec->objectid,
7106                                         block_group_rec->flags);
7107                         ret = -1;
7108                 } else {
7109                         list_del_init(&block_group_rec->list);
7110                         chunk_rec->bg_rec = block_group_rec;
7111                 }
7112         } else {
7113                 if (!silent)
7114                         fprintf(stderr,
7115                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7116                                 chunk_rec->objectid,
7117                                 chunk_rec->type,
7118                                 chunk_rec->offset,
7119                                 chunk_rec->length,
7120                                 chunk_rec->offset,
7121                                 chunk_rec->type_flags);
7122                 ret = 1;
7123         }
7124
7125         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7126                                     chunk_rec->num_stripes);
7127         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7128                 devid = chunk_rec->stripes[i].devid;
7129                 offset = chunk_rec->stripes[i].offset;
7130                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7131                                                        devid, offset, length);
7132                 if (dev_extent_item) {
7133                         dev_extent_rec = container_of(dev_extent_item,
7134                                                 struct device_extent_record,
7135                                                 cache);
7136                         if (dev_extent_rec->objectid != devid ||
7137                             dev_extent_rec->offset != offset ||
7138                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7139                             dev_extent_rec->length != length) {
7140                                 if (!silent)
7141                                         fprintf(stderr,
7142                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7143                                                 chunk_rec->objectid,
7144                                                 chunk_rec->type,
7145                                                 chunk_rec->offset,
7146                                                 chunk_rec->stripes[i].devid,
7147                                                 chunk_rec->stripes[i].offset,
7148                                                 dev_extent_rec->objectid,
7149                                                 dev_extent_rec->offset,
7150                                                 dev_extent_rec->length);
7151                                 ret = -1;
7152                         } else {
7153                                 list_move(&dev_extent_rec->chunk_list,
7154                                           &chunk_rec->dextents);
7155                         }
7156                 } else {
7157                         if (!silent)
7158                                 fprintf(stderr,
7159                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7160                                         chunk_rec->objectid,
7161                                         chunk_rec->type,
7162                                         chunk_rec->offset,
7163                                         chunk_rec->stripes[i].devid,
7164                                         chunk_rec->stripes[i].offset);
7165                         ret = -1;
7166                 }
7167         }
7168         return ret;
7169 }
7170
7171 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7172 int check_chunks(struct cache_tree *chunk_cache,
7173                  struct block_group_tree *block_group_cache,
7174                  struct device_extent_tree *dev_extent_cache,
7175                  struct list_head *good, struct list_head *bad,
7176                  struct list_head *rebuild, int silent)
7177 {
7178         struct cache_extent *chunk_item;
7179         struct chunk_record *chunk_rec;
7180         struct block_group_record *bg_rec;
7181         struct device_extent_record *dext_rec;
7182         int err;
7183         int ret = 0;
7184
7185         chunk_item = first_cache_extent(chunk_cache);
7186         while (chunk_item) {
7187                 chunk_rec = container_of(chunk_item, struct chunk_record,
7188                                          cache);
7189                 err = check_chunk_refs(chunk_rec, block_group_cache,
7190                                        dev_extent_cache, silent);
7191                 if (err)
7192                         ret = err;
7193                 if (err == 0 && good)
7194                         list_add_tail(&chunk_rec->list, good);
7195                 if (err > 0 && rebuild)
7196                         list_add_tail(&chunk_rec->list, rebuild);
7197                 if (err < 0 && bad)
7198                         list_add_tail(&chunk_rec->list, bad);
7199                 chunk_item = next_cache_extent(chunk_item);
7200         }
7201
7202         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7203                 if (!silent)
7204                         fprintf(stderr,
7205                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7206                                 bg_rec->objectid,
7207                                 bg_rec->offset,
7208                                 bg_rec->flags);
7209                 if (!ret)
7210                         ret = 1;
7211         }
7212
7213         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7214                             chunk_list) {
7215                 if (!silent)
7216                         fprintf(stderr,
7217                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7218                                 dext_rec->objectid,
7219                                 dext_rec->offset,
7220                                 dext_rec->length);
7221                 if (!ret)
7222                         ret = 1;
7223         }
7224         return ret;
7225 }
7226
7227
7228 static int check_device_used(struct device_record *dev_rec,
7229                              struct device_extent_tree *dext_cache)
7230 {
7231         struct cache_extent *cache;
7232         struct device_extent_record *dev_extent_rec;
7233         u64 total_byte = 0;
7234
7235         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7236         while (cache) {
7237                 dev_extent_rec = container_of(cache,
7238                                               struct device_extent_record,
7239                                               cache);
7240                 if (dev_extent_rec->objectid != dev_rec->devid)
7241                         break;
7242
7243                 list_del_init(&dev_extent_rec->device_list);
7244                 total_byte += dev_extent_rec->length;
7245                 cache = next_cache_extent(cache);
7246         }
7247
7248         if (total_byte != dev_rec->byte_used) {
7249                 fprintf(stderr,
7250                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7251                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7252                         dev_rec->type, dev_rec->offset);
7253                 return -1;
7254         } else {
7255                 return 0;
7256         }
7257 }
7258
7259 /* check btrfs_dev_item -> btrfs_dev_extent */
7260 static int check_devices(struct rb_root *dev_cache,
7261                          struct device_extent_tree *dev_extent_cache)
7262 {
7263         struct rb_node *dev_node;
7264         struct device_record *dev_rec;
7265         struct device_extent_record *dext_rec;
7266         int err;
7267         int ret = 0;
7268
7269         dev_node = rb_first(dev_cache);
7270         while (dev_node) {
7271                 dev_rec = container_of(dev_node, struct device_record, node);
7272                 err = check_device_used(dev_rec, dev_extent_cache);
7273                 if (err)
7274                         ret = err;
7275
7276                 dev_node = rb_next(dev_node);
7277         }
7278         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7279                             device_list) {
7280                 fprintf(stderr,
7281                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7282                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7283                 if (!ret)
7284                         ret = 1;
7285         }
7286         return ret;
7287 }
7288
7289 static int add_root_item_to_list(struct list_head *head,
7290                                   u64 objectid, u64 bytenr,
7291                                   u8 level, u8 drop_level,
7292                                   int level_size, struct btrfs_key *drop_key)
7293 {
7294
7295         struct root_item_record *ri_rec;
7296         ri_rec = malloc(sizeof(*ri_rec));
7297         if (!ri_rec)
7298                 return -ENOMEM;
7299         ri_rec->bytenr = bytenr;
7300         ri_rec->objectid = objectid;
7301         ri_rec->level = level;
7302         ri_rec->level_size = level_size;
7303         ri_rec->drop_level = drop_level;
7304         if (drop_key)
7305                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7306         list_add_tail(&ri_rec->list, head);
7307
7308         return 0;
7309 }
7310
7311 static int deal_root_from_list(struct list_head *list,
7312                                struct btrfs_trans_handle *trans,
7313                                struct btrfs_root *root,
7314                                struct block_info *bits,
7315                                int bits_nr,
7316                                struct cache_tree *pending,
7317                                struct cache_tree *seen,
7318                                struct cache_tree *reada,
7319                                struct cache_tree *nodes,
7320                                struct cache_tree *extent_cache,
7321                                struct cache_tree *chunk_cache,
7322                                struct rb_root *dev_cache,
7323                                struct block_group_tree *block_group_cache,
7324                                struct device_extent_tree *dev_extent_cache)
7325 {
7326         int ret = 0;
7327         u64 last;
7328
7329         while (!list_empty(list)) {
7330                 struct root_item_record *rec;
7331                 struct extent_buffer *buf;
7332                 rec = list_entry(list->next,
7333                                  struct root_item_record, list);
7334                 last = 0;
7335                 buf = read_tree_block(root->fs_info->tree_root,
7336                                       rec->bytenr, rec->level_size, 0);
7337                 if (!extent_buffer_uptodate(buf)) {
7338                         free_extent_buffer(buf);
7339                         ret = -EIO;
7340                         break;
7341                 }
7342                 add_root_to_pending(buf, extent_cache, pending,
7343                                     seen, nodes, rec->objectid);
7344                 /*
7345                  * To rebuild extent tree, we need deal with snapshot
7346                  * one by one, otherwise we deal with node firstly which
7347                  * can maximize readahead.
7348                  */
7349                 if (!init_extent_tree && !rec->drop_level)
7350                         goto skip;
7351                 while (1) {
7352                         ret = run_next_block(trans, root, bits, bits_nr, &last,
7353                                              pending, seen, reada,
7354                                              nodes, extent_cache,
7355                                              chunk_cache, dev_cache,
7356                                              block_group_cache,
7357                                              dev_extent_cache, rec);
7358                         if (ret != 0)
7359                                 break;
7360                 }
7361 skip:
7362                 free_extent_buffer(buf);
7363                 list_del(&rec->list);
7364                 free(rec);
7365         }
7366         while (ret >= 0) {
7367                 ret = run_next_block(trans, root, bits, bits_nr, &last,
7368                                      pending, seen, reada,
7369                                      nodes, extent_cache,
7370                                      chunk_cache, dev_cache,
7371                                      block_group_cache,
7372                                      dev_extent_cache, NULL);
7373                 if (ret != 0) {
7374                         if (ret > 0)
7375                                 ret = 0;
7376                         break;
7377                 }
7378         }
7379         return ret;
7380 }
7381
7382 static int check_chunks_and_extents(struct btrfs_root *root)
7383 {
7384         struct rb_root dev_cache;
7385         struct cache_tree chunk_cache;
7386         struct block_group_tree block_group_cache;
7387         struct device_extent_tree dev_extent_cache;
7388         struct cache_tree extent_cache;
7389         struct cache_tree seen;
7390         struct cache_tree pending;
7391         struct cache_tree reada;
7392         struct cache_tree nodes;
7393         struct cache_tree corrupt_blocks;
7394         struct btrfs_path path;
7395         struct btrfs_key key;
7396         struct btrfs_key found_key;
7397         int ret, err = 0;
7398         struct block_info *bits;
7399         int bits_nr;
7400         struct extent_buffer *leaf;
7401         struct btrfs_trans_handle *trans = NULL;
7402         int slot;
7403         struct btrfs_root_item ri;
7404         struct list_head dropping_trees;
7405         struct list_head normal_trees;
7406         struct btrfs_root *root1;
7407         u64 objectid;
7408         u32 level_size;
7409         u8 level;
7410
7411         dev_cache = RB_ROOT;
7412         cache_tree_init(&chunk_cache);
7413         block_group_tree_init(&block_group_cache);
7414         device_extent_tree_init(&dev_extent_cache);
7415
7416         cache_tree_init(&extent_cache);
7417         cache_tree_init(&seen);
7418         cache_tree_init(&pending);
7419         cache_tree_init(&nodes);
7420         cache_tree_init(&reada);
7421         cache_tree_init(&corrupt_blocks);
7422         INIT_LIST_HEAD(&dropping_trees);
7423         INIT_LIST_HEAD(&normal_trees);
7424
7425         if (repair) {
7426                 trans = btrfs_start_transaction(root, 1);
7427                 if (IS_ERR(trans)) {
7428                         fprintf(stderr, "Error starting transaction\n");
7429                         return PTR_ERR(trans);
7430                 }
7431                 root->fs_info->fsck_extent_cache = &extent_cache;
7432                 root->fs_info->free_extent_hook = free_extent_hook;
7433                 root->fs_info->corrupt_blocks = &corrupt_blocks;
7434         }
7435
7436         bits_nr = 1024;
7437         bits = malloc(bits_nr * sizeof(struct block_info));
7438         if (!bits) {
7439                 perror("malloc");
7440                 exit(1);
7441         }
7442
7443 again:
7444         root1 = root->fs_info->tree_root;
7445         level = btrfs_header_level(root1->node);
7446         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
7447                                     root1->node->start, level, 0,
7448                                     btrfs_level_size(root1, level), NULL);
7449         if (ret < 0)
7450                 goto out;
7451         root1 = root->fs_info->chunk_root;
7452         level = btrfs_header_level(root1->node);
7453         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
7454                                     root1->node->start, level, 0,
7455                                     btrfs_level_size(root1, level), NULL);
7456         if (ret < 0)
7457                 goto out;
7458         btrfs_init_path(&path);
7459         key.offset = 0;
7460         key.objectid = 0;
7461         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
7462         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
7463                                         &key, &path, 0, 0);
7464         if (ret < 0)
7465                 goto out;
7466         while(1) {
7467                 leaf = path.nodes[0];
7468                 slot = path.slots[0];
7469                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
7470                         ret = btrfs_next_leaf(root, &path);
7471                         if (ret != 0)
7472                                 break;
7473                         leaf = path.nodes[0];
7474                         slot = path.slots[0];
7475                 }
7476                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
7477                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
7478                         unsigned long offset;
7479
7480                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7481                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
7482                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
7483                                 level = btrfs_root_level(&ri);
7484                                 level_size = btrfs_level_size(root, level);
7485                                 ret = add_root_item_to_list(&normal_trees,
7486                                                 found_key.objectid,
7487                                                 btrfs_root_bytenr(&ri), level,
7488                                                 0, level_size, NULL);
7489                                 if (ret < 0)
7490                                         goto out;
7491                         } else {
7492                                 level = btrfs_root_level(&ri);
7493                                 level_size = btrfs_level_size(root, level);
7494                                 objectid = found_key.objectid;
7495                                 btrfs_disk_key_to_cpu(&found_key,
7496                                                       &ri.drop_progress);
7497                                 ret = add_root_item_to_list(&dropping_trees,
7498                                                 objectid,
7499                                                 btrfs_root_bytenr(&ri),
7500                                                 level, ri.drop_level,
7501                                                 level_size, &found_key);
7502                                 if (ret < 0)
7503                                         goto out;
7504                         }
7505                 }
7506                 path.slots[0]++;
7507         }
7508         btrfs_release_path(&path);
7509         ret = deal_root_from_list(&normal_trees, trans, root,
7510                                   bits, bits_nr, &pending, &seen,
7511                                   &reada, &nodes, &extent_cache,
7512                                   &chunk_cache, &dev_cache, &block_group_cache,
7513                                   &dev_extent_cache);
7514         if (ret < 0)
7515                 goto out;
7516         ret = deal_root_from_list(&dropping_trees, trans, root,
7517                                   bits, bits_nr, &pending, &seen,
7518                                   &reada, &nodes, &extent_cache,
7519                                   &chunk_cache, &dev_cache, &block_group_cache,
7520                                   &dev_extent_cache);
7521         if (ret < 0)
7522                 goto out;
7523         if (ret >= 0)
7524                 ret = check_extent_refs(trans, root, &extent_cache);
7525         if (ret == -EAGAIN) {
7526                 ret = btrfs_commit_transaction(trans, root);
7527                 if (ret)
7528                         goto out;
7529
7530                 trans = btrfs_start_transaction(root, 1);
7531                 if (IS_ERR(trans)) {
7532                         ret = PTR_ERR(trans);
7533                         goto out;
7534                 }
7535
7536                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
7537                 free_extent_cache_tree(&seen);
7538                 free_extent_cache_tree(&pending);
7539                 free_extent_cache_tree(&reada);
7540                 free_extent_cache_tree(&nodes);
7541                 free_chunk_cache_tree(&chunk_cache);
7542                 free_block_group_tree(&block_group_cache);
7543                 free_device_cache_tree(&dev_cache);
7544                 free_device_extent_tree(&dev_extent_cache);
7545                 free_extent_record_cache(root->fs_info, &extent_cache);
7546                 goto again;
7547         }
7548
7549         err = check_chunks(&chunk_cache, &block_group_cache,
7550                            &dev_extent_cache, NULL, NULL, NULL, 0);
7551         if (err && !ret)
7552                 ret = err;
7553
7554         err = check_devices(&dev_cache, &dev_extent_cache);
7555         if (err && !ret)
7556                 ret = err;
7557
7558 out:
7559         if (trans) {
7560                 err = btrfs_commit_transaction(trans, root);
7561                 if (!ret)
7562                         ret = err;
7563         }
7564         if (repair) {
7565                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
7566                 root->fs_info->fsck_extent_cache = NULL;
7567                 root->fs_info->free_extent_hook = NULL;
7568                 root->fs_info->corrupt_blocks = NULL;
7569         }
7570         free(bits);
7571         free_chunk_cache_tree(&chunk_cache);
7572         free_device_cache_tree(&dev_cache);
7573         free_block_group_tree(&block_group_cache);
7574         free_device_extent_tree(&dev_extent_cache);
7575         free_extent_cache_tree(&seen);
7576         free_extent_cache_tree(&pending);
7577         free_extent_cache_tree(&reada);
7578         free_extent_cache_tree(&nodes);
7579         return ret;
7580 }
7581
7582 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
7583                            struct btrfs_root *root, int overwrite)
7584 {
7585         struct extent_buffer *c;
7586         struct extent_buffer *old = root->node;
7587         int level;
7588         int ret;
7589         struct btrfs_disk_key disk_key = {0,0,0};
7590
7591         level = 0;
7592
7593         if (overwrite) {
7594                 c = old;
7595                 extent_buffer_get(c);
7596                 goto init;
7597         }
7598         c = btrfs_alloc_free_block(trans, root,
7599                                    btrfs_level_size(root, 0),
7600                                    root->root_key.objectid,
7601                                    &disk_key, level, 0, 0);
7602         if (IS_ERR(c)) {
7603                 c = old;
7604                 extent_buffer_get(c);
7605                 overwrite = 1;
7606         }
7607 init:
7608         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
7609         btrfs_set_header_level(c, level);
7610         btrfs_set_header_bytenr(c, c->start);
7611         btrfs_set_header_generation(c, trans->transid);
7612         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
7613         btrfs_set_header_owner(c, root->root_key.objectid);
7614
7615         write_extent_buffer(c, root->fs_info->fsid,
7616                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
7617
7618         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
7619                             btrfs_header_chunk_tree_uuid(c),
7620                             BTRFS_UUID_SIZE);
7621
7622         btrfs_mark_buffer_dirty(c);
7623         /*
7624          * this case can happen in the following case:
7625          *
7626          * 1.overwrite previous root.
7627          *
7628          * 2.reinit reloc data root, this is because we skip pin
7629          * down reloc data tree before which means we can allocate
7630          * same block bytenr here.
7631          */
7632         if (old->start == c->start) {
7633                 btrfs_set_root_generation(&root->root_item,
7634                                           trans->transid);
7635                 root->root_item.level = btrfs_header_level(root->node);
7636                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7637                                         &root->root_key, &root->root_item);
7638                 if (ret) {
7639                         free_extent_buffer(c);
7640                         return ret;
7641                 }
7642         }
7643         free_extent_buffer(old);
7644         root->node = c;
7645         add_root_to_dirty_list(root);
7646         return 0;
7647 }
7648
7649 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
7650                                 struct extent_buffer *eb, int tree_root)
7651 {
7652         struct extent_buffer *tmp;
7653         struct btrfs_root_item *ri;
7654         struct btrfs_key key;
7655         u64 bytenr;
7656         u32 leafsize;
7657         int level = btrfs_header_level(eb);
7658         int nritems;
7659         int ret;
7660         int i;
7661
7662         /*
7663          * If we have pinned this block before, don't pin it again.
7664          * This can not only avoid forever loop with broken filesystem
7665          * but also give us some speedups.
7666          */
7667         if (test_range_bit(&fs_info->pinned_extents, eb->start,
7668                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
7669                 return 0;
7670
7671         btrfs_pin_extent(fs_info, eb->start, eb->len);
7672
7673         leafsize = btrfs_super_leafsize(fs_info->super_copy);
7674         nritems = btrfs_header_nritems(eb);
7675         for (i = 0; i < nritems; i++) {
7676                 if (level == 0) {
7677                         btrfs_item_key_to_cpu(eb, &key, i);
7678                         if (key.type != BTRFS_ROOT_ITEM_KEY)
7679                                 continue;
7680                         /* Skip the extent root and reloc roots */
7681                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
7682                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
7683                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
7684                                 continue;
7685                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
7686                         bytenr = btrfs_disk_root_bytenr(eb, ri);
7687
7688                         /*
7689                          * If at any point we start needing the real root we
7690                          * will have to build a stump root for the root we are
7691                          * in, but for now this doesn't actually use the root so
7692                          * just pass in extent_root.
7693                          */
7694                         tmp = read_tree_block(fs_info->extent_root, bytenr,
7695                                               leafsize, 0);
7696                         if (!tmp) {
7697                                 fprintf(stderr, "Error reading root block\n");
7698                                 return -EIO;
7699                         }
7700                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
7701                         free_extent_buffer(tmp);
7702                         if (ret)
7703                                 return ret;
7704                 } else {
7705                         bytenr = btrfs_node_blockptr(eb, i);
7706
7707                         /* If we aren't the tree root don't read the block */
7708                         if (level == 1 && !tree_root) {
7709                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
7710                                 continue;
7711                         }
7712
7713                         tmp = read_tree_block(fs_info->extent_root, bytenr,
7714                                               leafsize, 0);
7715                         if (!tmp) {
7716                                 fprintf(stderr, "Error reading tree block\n");
7717                                 return -EIO;
7718                         }
7719                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
7720                         free_extent_buffer(tmp);
7721                         if (ret)
7722                                 return ret;
7723                 }
7724         }
7725
7726         return 0;
7727 }
7728
7729 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
7730 {
7731         int ret;
7732
7733         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
7734         if (ret)
7735                 return ret;
7736
7737         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
7738 }
7739
7740 static int reset_block_groups(struct btrfs_fs_info *fs_info)
7741 {
7742         struct btrfs_block_group_cache *cache;
7743         struct btrfs_path *path;
7744         struct extent_buffer *leaf;
7745         struct btrfs_chunk *chunk;
7746         struct btrfs_key key;
7747         int ret;
7748         u64 start;
7749
7750         path = btrfs_alloc_path();
7751         if (!path)
7752                 return -ENOMEM;
7753
7754         key.objectid = 0;
7755         key.type = BTRFS_CHUNK_ITEM_KEY;
7756         key.offset = 0;
7757
7758         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
7759         if (ret < 0) {
7760                 btrfs_free_path(path);
7761                 return ret;
7762         }
7763
7764         /*
7765          * We do this in case the block groups were screwed up and had alloc
7766          * bits that aren't actually set on the chunks.  This happens with
7767          * restored images every time and could happen in real life I guess.
7768          */
7769         fs_info->avail_data_alloc_bits = 0;
7770         fs_info->avail_metadata_alloc_bits = 0;
7771         fs_info->avail_system_alloc_bits = 0;
7772
7773         /* First we need to create the in-memory block groups */
7774         while (1) {
7775                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7776                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
7777                         if (ret < 0) {
7778                                 btrfs_free_path(path);
7779                                 return ret;
7780                         }
7781                         if (ret) {
7782                                 ret = 0;
7783                                 break;
7784                         }
7785                 }
7786                 leaf = path->nodes[0];
7787                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7788                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
7789                         path->slots[0]++;
7790                         continue;
7791                 }
7792
7793                 chunk = btrfs_item_ptr(leaf, path->slots[0],
7794                                        struct btrfs_chunk);
7795                 btrfs_add_block_group(fs_info, 0,
7796                                       btrfs_chunk_type(leaf, chunk),
7797                                       key.objectid, key.offset,
7798                                       btrfs_chunk_length(leaf, chunk));
7799                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
7800                                  key.offset + btrfs_chunk_length(leaf, chunk),
7801                                  GFP_NOFS);
7802                 path->slots[0]++;
7803         }
7804         start = 0;
7805         while (1) {
7806                 cache = btrfs_lookup_first_block_group(fs_info, start);
7807                 if (!cache)
7808                         break;
7809                 cache->cached = 1;
7810                 start = cache->key.objectid + cache->key.offset;
7811         }
7812
7813         btrfs_free_path(path);
7814         return 0;
7815 }
7816
7817 static int reset_balance(struct btrfs_trans_handle *trans,
7818                          struct btrfs_fs_info *fs_info)
7819 {
7820         struct btrfs_root *root = fs_info->tree_root;
7821         struct btrfs_path *path;
7822         struct extent_buffer *leaf;
7823         struct btrfs_key key;
7824         int del_slot, del_nr = 0;
7825         int ret;
7826         int found = 0;
7827
7828         path = btrfs_alloc_path();
7829         if (!path)
7830                 return -ENOMEM;
7831
7832         key.objectid = BTRFS_BALANCE_OBJECTID;
7833         key.type = BTRFS_BALANCE_ITEM_KEY;
7834         key.offset = 0;
7835
7836         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7837         if (ret) {
7838                 if (ret > 0)
7839                         ret = 0;
7840                 if (!ret)
7841                         goto reinit_data_reloc;
7842                 else
7843                         goto out;
7844         }
7845
7846         ret = btrfs_del_item(trans, root, path);
7847         if (ret)
7848                 goto out;
7849         btrfs_release_path(path);
7850
7851         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7852         key.type = BTRFS_ROOT_ITEM_KEY;
7853         key.offset = 0;
7854
7855         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7856         if (ret < 0)
7857                 goto out;
7858         while (1) {
7859                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7860                         if (!found)
7861                                 break;
7862
7863                         if (del_nr) {
7864                                 ret = btrfs_del_items(trans, root, path,
7865                                                       del_slot, del_nr);
7866                                 del_nr = 0;
7867                                 if (ret)
7868                                         goto out;
7869                         }
7870                         key.offset++;
7871                         btrfs_release_path(path);
7872
7873                         found = 0;
7874                         ret = btrfs_search_slot(trans, root, &key, path,
7875                                                 -1, 1);
7876                         if (ret < 0)
7877                                 goto out;
7878                         continue;
7879                 }
7880                 found = 1;
7881                 leaf = path->nodes[0];
7882                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7883                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
7884                         break;
7885                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
7886                         path->slots[0]++;
7887                         continue;
7888                 }
7889                 if (!del_nr) {
7890                         del_slot = path->slots[0];
7891                         del_nr = 1;
7892                 } else {
7893                         del_nr++;
7894                 }
7895                 path->slots[0]++;
7896         }
7897
7898         if (del_nr) {
7899                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
7900                 if (ret)
7901                         goto out;
7902         }
7903         btrfs_release_path(path);
7904
7905 reinit_data_reloc:
7906         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7907         key.type = BTRFS_ROOT_ITEM_KEY;
7908         key.offset = (u64)-1;
7909         root = btrfs_read_fs_root(fs_info, &key);
7910         if (IS_ERR(root)) {
7911                 fprintf(stderr, "Error reading data reloc tree\n");
7912                 ret = PTR_ERR(root);
7913                 goto out;
7914         }
7915         record_root_in_trans(trans, root);
7916         ret = btrfs_fsck_reinit_root(trans, root, 0);
7917         if (ret)
7918                 goto out;
7919         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
7920 out:
7921         btrfs_free_path(path);
7922         return ret;
7923 }
7924
7925 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
7926                               struct btrfs_fs_info *fs_info)
7927 {
7928         u64 start = 0;
7929         int ret;
7930
7931         /*
7932          * The only reason we don't do this is because right now we're just
7933          * walking the trees we find and pinning down their bytes, we don't look
7934          * at any of the leaves.  In order to do mixed groups we'd have to check
7935          * the leaves of any fs roots and pin down the bytes for any file
7936          * extents we find.  Not hard but why do it if we don't have to?
7937          */
7938         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
7939                 fprintf(stderr, "We don't support re-initing the extent tree "
7940                         "for mixed block groups yet, please notify a btrfs "
7941                         "developer you want to do this so they can add this "
7942                         "functionality.\n");
7943                 return -EINVAL;
7944         }
7945
7946         /*
7947          * first we need to walk all of the trees except the extent tree and pin
7948          * down the bytes that are in use so we don't overwrite any existing
7949          * metadata.
7950          */
7951         ret = pin_metadata_blocks(fs_info);
7952         if (ret) {
7953                 fprintf(stderr, "error pinning down used bytes\n");
7954                 return ret;
7955         }
7956
7957         /*
7958          * Need to drop all the block groups since we're going to recreate all
7959          * of them again.
7960          */
7961         btrfs_free_block_groups(fs_info);
7962         ret = reset_block_groups(fs_info);
7963         if (ret) {
7964                 fprintf(stderr, "error resetting the block groups\n");
7965                 return ret;
7966         }
7967
7968         /* Ok we can allocate now, reinit the extent root */
7969         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
7970         if (ret) {
7971                 fprintf(stderr, "extent root initialization failed\n");
7972                 /*
7973                  * When the transaction code is updated we should end the
7974                  * transaction, but for now progs only knows about commit so
7975                  * just return an error.
7976                  */
7977                 return ret;
7978         }
7979
7980         /*
7981          * Now we have all the in-memory block groups setup so we can make
7982          * allocations properly, and the metadata we care about is safe since we
7983          * pinned all of it above.
7984          */
7985         while (1) {
7986                 struct btrfs_block_group_cache *cache;
7987
7988                 cache = btrfs_lookup_first_block_group(fs_info, start);
7989                 if (!cache)
7990                         break;
7991                 start = cache->key.objectid + cache->key.offset;
7992                 ret = btrfs_insert_item(trans, fs_info->extent_root,
7993                                         &cache->key, &cache->item,
7994                                         sizeof(cache->item));
7995                 if (ret) {
7996                         fprintf(stderr, "Error adding block group\n");
7997                         return ret;
7998                 }
7999                 btrfs_extent_post_op(trans, fs_info->extent_root);
8000         }
8001
8002         ret = reset_balance(trans, fs_info);
8003         if (ret)
8004                 fprintf(stderr, "error reseting the pending balance\n");
8005
8006         return ret;
8007 }
8008
8009 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8010 {
8011         struct btrfs_path *path;
8012         struct btrfs_trans_handle *trans;
8013         struct btrfs_key key;
8014         int ret;
8015
8016         printf("Recowing metadata block %llu\n", eb->start);
8017         key.objectid = btrfs_header_owner(eb);
8018         key.type = BTRFS_ROOT_ITEM_KEY;
8019         key.offset = (u64)-1;
8020
8021         root = btrfs_read_fs_root(root->fs_info, &key);
8022         if (IS_ERR(root)) {
8023                 fprintf(stderr, "Couldn't find owner root %llu\n",
8024                         key.objectid);
8025                 return PTR_ERR(root);
8026         }
8027
8028         path = btrfs_alloc_path();
8029         if (!path)
8030                 return -ENOMEM;
8031
8032         trans = btrfs_start_transaction(root, 1);
8033         if (IS_ERR(trans)) {
8034                 btrfs_free_path(path);
8035                 return PTR_ERR(trans);
8036         }
8037
8038         path->lowest_level = btrfs_header_level(eb);
8039         if (path->lowest_level)
8040                 btrfs_node_key_to_cpu(eb, &key, 0);
8041         else
8042                 btrfs_item_key_to_cpu(eb, &key, 0);
8043
8044         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8045         btrfs_commit_transaction(trans, root);
8046         btrfs_free_path(path);
8047         return ret;
8048 }
8049
8050 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8051 {
8052         struct btrfs_path *path;
8053         struct btrfs_trans_handle *trans;
8054         struct btrfs_key key;
8055         int ret;
8056
8057         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8058                bad->key.type, bad->key.offset);
8059         key.objectid = bad->root_id;
8060         key.type = BTRFS_ROOT_ITEM_KEY;
8061         key.offset = (u64)-1;
8062
8063         root = btrfs_read_fs_root(root->fs_info, &key);
8064         if (IS_ERR(root)) {
8065                 fprintf(stderr, "Couldn't find owner root %llu\n",
8066                         key.objectid);
8067                 return PTR_ERR(root);
8068         }
8069
8070         path = btrfs_alloc_path();
8071         if (!path)
8072                 return -ENOMEM;
8073
8074         trans = btrfs_start_transaction(root, 1);
8075         if (IS_ERR(trans)) {
8076                 btrfs_free_path(path);
8077                 return PTR_ERR(trans);
8078         }
8079
8080         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8081         if (ret) {
8082                 if (ret > 0)
8083                         ret = 0;
8084                 goto out;
8085         }
8086         ret = btrfs_del_item(trans, root, path);
8087 out:
8088         btrfs_commit_transaction(trans, root);
8089         btrfs_free_path(path);
8090         return ret;
8091 }
8092
8093 static int zero_log_tree(struct btrfs_root *root)
8094 {
8095         struct btrfs_trans_handle *trans;
8096         int ret;
8097
8098         trans = btrfs_start_transaction(root, 1);
8099         if (IS_ERR(trans)) {
8100                 ret = PTR_ERR(trans);
8101                 return ret;
8102         }
8103         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8104         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8105         ret = btrfs_commit_transaction(trans, root);
8106         return ret;
8107 }
8108
8109 static int populate_csum(struct btrfs_trans_handle *trans,
8110                          struct btrfs_root *csum_root, char *buf, u64 start,
8111                          u64 len)
8112 {
8113         u64 offset = 0;
8114         u64 sectorsize;
8115         int ret = 0;
8116
8117         while (offset < len) {
8118                 sectorsize = csum_root->sectorsize;
8119                 ret = read_extent_data(csum_root, buf, start + offset,
8120                                        &sectorsize, 0);
8121                 if (ret)
8122                         break;
8123                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8124                                             start + offset, buf, sectorsize);
8125                 if (ret)
8126                         break;
8127                 offset += sectorsize;
8128         }
8129         return ret;
8130 }
8131
8132 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8133                           struct btrfs_root *csum_root)
8134 {
8135         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8136         struct btrfs_path *path;
8137         struct btrfs_extent_item *ei;
8138         struct extent_buffer *leaf;
8139         char *buf;
8140         struct btrfs_key key;
8141         int ret;
8142
8143         path = btrfs_alloc_path();
8144         if (!path)
8145                 return -ENOMEM;
8146
8147         key.objectid = 0;
8148         key.type = BTRFS_EXTENT_ITEM_KEY;
8149         key.offset = 0;
8150
8151         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8152         if (ret < 0) {
8153                 btrfs_free_path(path);
8154                 return ret;
8155         }
8156
8157         buf = malloc(csum_root->sectorsize);
8158         if (!buf) {
8159                 btrfs_free_path(path);
8160                 return -ENOMEM;
8161         }
8162
8163         while (1) {
8164                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8165                         ret = btrfs_next_leaf(extent_root, path);
8166                         if (ret < 0)
8167                                 break;
8168                         if (ret) {
8169                                 ret = 0;
8170                                 break;
8171                         }
8172                 }
8173                 leaf = path->nodes[0];
8174
8175                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8176                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8177                         path->slots[0]++;
8178                         continue;
8179                 }
8180
8181                 ei = btrfs_item_ptr(leaf, path->slots[0],
8182                                     struct btrfs_extent_item);
8183                 if (!(btrfs_extent_flags(leaf, ei) &
8184                       BTRFS_EXTENT_FLAG_DATA)) {
8185                         path->slots[0]++;
8186                         continue;
8187                 }
8188
8189                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8190                                     key.offset);
8191                 if (ret)
8192                         break;
8193                 path->slots[0]++;
8194         }
8195
8196         btrfs_free_path(path);
8197         free(buf);
8198         return ret;
8199 }
8200
8201 struct root_item_info {
8202         /* level of the root */
8203         u8 level;
8204         /* number of nodes at this level, must be 1 for a root */
8205         int node_count;
8206         u64 bytenr;
8207         u64 gen;
8208         struct cache_extent cache_extent;
8209 };
8210
8211 static struct cache_tree *roots_info_cache = NULL;
8212
8213 static void free_roots_info_cache(void)
8214 {
8215         if (!roots_info_cache)
8216                 return;
8217
8218         while (!cache_tree_empty(roots_info_cache)) {
8219                 struct cache_extent *entry;
8220                 struct root_item_info *rii;
8221
8222                 entry = first_cache_extent(roots_info_cache);
8223                 if (!entry)
8224                         break;
8225                 remove_cache_extent(roots_info_cache, entry);
8226                 rii = container_of(entry, struct root_item_info, cache_extent);
8227                 free(rii);
8228         }
8229
8230         free(roots_info_cache);
8231         roots_info_cache = NULL;
8232 }
8233
8234 static int build_roots_info_cache(struct btrfs_fs_info *info)
8235 {
8236         int ret = 0;
8237         struct btrfs_key key;
8238         struct extent_buffer *leaf;
8239         struct btrfs_path *path;
8240
8241         if (!roots_info_cache) {
8242                 roots_info_cache = malloc(sizeof(*roots_info_cache));
8243                 if (!roots_info_cache)
8244                         return -ENOMEM;
8245                 cache_tree_init(roots_info_cache);
8246         }
8247
8248         path = btrfs_alloc_path();
8249         if (!path)
8250                 return -ENOMEM;
8251
8252         key.objectid = 0;
8253         key.type = BTRFS_EXTENT_ITEM_KEY;
8254         key.offset = 0;
8255
8256         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
8257         if (ret < 0)
8258                 goto out;
8259         leaf = path->nodes[0];
8260
8261         while (1) {
8262                 struct btrfs_key found_key;
8263                 struct btrfs_extent_item *ei;
8264                 struct btrfs_extent_inline_ref *iref;
8265                 int slot = path->slots[0];
8266                 int type;
8267                 u64 flags;
8268                 u64 root_id;
8269                 u8 level;
8270                 struct cache_extent *entry;
8271                 struct root_item_info *rii;
8272
8273                 if (slot >= btrfs_header_nritems(leaf)) {
8274                         ret = btrfs_next_leaf(info->extent_root, path);
8275                         if (ret < 0) {
8276                                 break;
8277                         } else if (ret) {
8278                                 ret = 0;
8279                                 break;
8280                         }
8281                         leaf = path->nodes[0];
8282                         slot = path->slots[0];
8283                 }
8284
8285                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8286
8287                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8288                     found_key.type != BTRFS_METADATA_ITEM_KEY)
8289                         goto next;
8290
8291                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8292                 flags = btrfs_extent_flags(leaf, ei);
8293
8294                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
8295                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
8296                         goto next;
8297
8298                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
8299                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8300                         level = found_key.offset;
8301                 } else {
8302                         struct btrfs_tree_block_info *info;
8303
8304                         info = (struct btrfs_tree_block_info *)(ei + 1);
8305                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
8306                         level = btrfs_tree_block_level(leaf, info);
8307                 }
8308
8309                 /*
8310                  * For a root extent, it must be of the following type and the
8311                  * first (and only one) iref in the item.
8312                  */
8313                 type = btrfs_extent_inline_ref_type(leaf, iref);
8314                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
8315                         goto next;
8316
8317                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
8318                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
8319                 if (!entry) {
8320                         rii = malloc(sizeof(struct root_item_info));
8321                         if (!rii) {
8322                                 ret = -ENOMEM;
8323                                 goto out;
8324                         }
8325                         rii->cache_extent.start = root_id;
8326                         rii->cache_extent.size = 1;
8327                         rii->level = (u8)-1;
8328                         entry = &rii->cache_extent;
8329                         ret = insert_cache_extent(roots_info_cache, entry);
8330                         ASSERT(ret == 0);
8331                 } else {
8332                         rii = container_of(entry, struct root_item_info,
8333                                            cache_extent);
8334                 }
8335
8336                 ASSERT(rii->cache_extent.start == root_id);
8337                 ASSERT(rii->cache_extent.size == 1);
8338
8339                 if (level > rii->level || rii->level == (u8)-1) {
8340                         rii->level = level;
8341                         rii->bytenr = found_key.objectid;
8342                         rii->gen = btrfs_extent_generation(leaf, ei);
8343                         rii->node_count = 1;
8344                 } else if (level == rii->level) {
8345                         rii->node_count++;
8346                 }
8347 next:
8348                 path->slots[0]++;
8349         }
8350
8351 out:
8352         btrfs_free_path(path);
8353
8354         return ret;
8355 }
8356
8357 static int maybe_repair_root_item(struct btrfs_fs_info *info,
8358                                   struct btrfs_path *path,
8359                                   const struct btrfs_key *root_key,
8360                                   const int read_only_mode)
8361 {
8362         const u64 root_id = root_key->objectid;
8363         struct cache_extent *entry;
8364         struct root_item_info *rii;
8365         struct btrfs_root_item ri;
8366         unsigned long offset;
8367
8368         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
8369         if (!entry) {
8370                 fprintf(stderr,
8371                         "Error: could not find extent items for root %llu\n",
8372                         root_key->objectid);
8373                 return -ENOENT;
8374         }
8375
8376         rii = container_of(entry, struct root_item_info, cache_extent);
8377         ASSERT(rii->cache_extent.start == root_id);
8378         ASSERT(rii->cache_extent.size == 1);
8379
8380         if (rii->node_count != 1) {
8381                 fprintf(stderr,
8382                         "Error: could not find btree root extent for root %llu\n",
8383                         root_id);
8384                 return -ENOENT;
8385         }
8386
8387         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
8388         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
8389
8390         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
8391             btrfs_root_level(&ri) != rii->level ||
8392             btrfs_root_generation(&ri) != rii->gen) {
8393
8394                 /*
8395                  * If we're in repair mode but our caller told us to not update
8396                  * the root item, i.e. just check if it needs to be updated, don't
8397                  * print this message, since the caller will call us again shortly
8398                  * for the same root item without read only mode (the caller will
8399                  * open a transaction first).
8400                  */
8401                 if (!(read_only_mode && repair))
8402                         fprintf(stderr,
8403                                 "%sroot item for root %llu,"
8404                                 " current bytenr %llu, current gen %llu, current level %u,"
8405                                 " new bytenr %llu, new gen %llu, new level %u\n",
8406                                 (read_only_mode ? "" : "fixing "),
8407                                 root_id,
8408                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
8409                                 btrfs_root_level(&ri),
8410                                 rii->bytenr, rii->gen, rii->level);
8411
8412                 if (btrfs_root_generation(&ri) > rii->gen) {
8413                         fprintf(stderr,
8414                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
8415                                 root_id, btrfs_root_generation(&ri), rii->gen);
8416                         return -EINVAL;
8417                 }
8418
8419                 if (!read_only_mode) {
8420                         btrfs_set_root_bytenr(&ri, rii->bytenr);
8421                         btrfs_set_root_level(&ri, rii->level);
8422                         btrfs_set_root_generation(&ri, rii->gen);
8423                         write_extent_buffer(path->nodes[0], &ri,
8424                                             offset, sizeof(ri));
8425                 }
8426
8427                 return 1;
8428         }
8429
8430         return 0;
8431 }
8432
8433 /*
8434  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
8435  * caused read-only snapshots to be corrupted if they were created at a moment
8436  * when the source subvolume/snapshot had orphan items. The issue was that the
8437  * on-disk root items became incorrect, referring to the pre orphan cleanup root
8438  * node instead of the post orphan cleanup root node.
8439  * So this function, and its callees, just detects and fixes those cases. Even
8440  * though the regression was for read-only snapshots, this function applies to
8441  * any snapshot/subvolume root.
8442  * This must be run before any other repair code - not doing it so, makes other
8443  * repair code delete or modify backrefs in the extent tree for example, which
8444  * will result in an inconsistent fs after repairing the root items.
8445  */
8446 static int repair_root_items(struct btrfs_fs_info *info)
8447 {
8448         struct btrfs_path *path = NULL;
8449         struct btrfs_key key;
8450         struct extent_buffer *leaf;
8451         struct btrfs_trans_handle *trans = NULL;
8452         int ret = 0;
8453         int bad_roots = 0;
8454         int need_trans = 0;
8455
8456         ret = build_roots_info_cache(info);
8457         if (ret)
8458                 goto out;
8459
8460         path = btrfs_alloc_path();
8461         if (!path) {
8462                 ret = -ENOMEM;
8463                 goto out;
8464         }
8465
8466         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
8467         key.type = BTRFS_ROOT_ITEM_KEY;
8468         key.offset = 0;
8469
8470 again:
8471         /*
8472          * Avoid opening and committing transactions if a leaf doesn't have
8473          * any root items that need to be fixed, so that we avoid rotating
8474          * backup roots unnecessarily.
8475          */
8476         if (need_trans) {
8477                 trans = btrfs_start_transaction(info->tree_root, 1);
8478                 if (IS_ERR(trans)) {
8479                         ret = PTR_ERR(trans);
8480                         goto out;
8481                 }
8482         }
8483
8484         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
8485                                 0, trans ? 1 : 0);
8486         if (ret < 0)
8487                 goto out;
8488         leaf = path->nodes[0];
8489
8490         while (1) {
8491                 struct btrfs_key found_key;
8492
8493                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
8494                         int no_more_keys = find_next_key(path, &key);
8495
8496                         btrfs_release_path(path);
8497                         if (trans) {
8498                                 ret = btrfs_commit_transaction(trans,
8499                                                                info->tree_root);
8500                                 trans = NULL;
8501                                 if (ret < 0)
8502                                         goto out;
8503                         }
8504                         need_trans = 0;
8505                         if (no_more_keys)
8506                                 break;
8507                         goto again;
8508                 }
8509
8510                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8511
8512                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
8513                         goto next;
8514
8515                 ret = maybe_repair_root_item(info, path, &found_key,
8516                                              trans ? 0 : 1);
8517                 if (ret < 0)
8518                         goto out;
8519                 if (ret) {
8520                         if (!trans && repair) {
8521                                 need_trans = 1;
8522                                 key = found_key;
8523                                 btrfs_release_path(path);
8524                                 goto again;
8525                         }
8526                         bad_roots++;
8527                 }
8528 next:
8529                 path->slots[0]++;
8530         }
8531         ret = 0;
8532 out:
8533         free_roots_info_cache();
8534         if (path)
8535                 btrfs_free_path(path);
8536         if (ret < 0)
8537                 return ret;
8538
8539         return bad_roots;
8540 }
8541
8542 const char * const cmd_check_usage[] = {
8543         "btrfs check [options] <device>",
8544         "Check an unmounted btrfs filesystem.",
8545         "",
8546         "-s|--super <superblock>     use this superblock copy",
8547         "-b|--backup                 use the backup root copy",
8548         "--repair                    try to repair the filesystem",
8549         "--init-csum-tree            create a new CRC tree",
8550         "--init-extent-tree          create a new extent tree",
8551         "--check-data-csum           verify checkums of data blocks",
8552         "--qgroup-report             print a report on qgroup consistency",
8553         "--subvol-extents <subvolid> print subvolume extents and sharing state",
8554         "--tree-root <bytenr>        use the given bytenr for the tree root",
8555         NULL
8556 };
8557
8558 int cmd_check(int argc, char **argv)
8559 {
8560         struct cache_tree root_cache;
8561         struct btrfs_root *root;
8562         struct btrfs_fs_info *info;
8563         u64 bytenr = 0;
8564         u64 subvolid = 0;
8565         u64 tree_root_bytenr = 0;
8566         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
8567         int ret;
8568         u64 num;
8569         int init_csum_tree = 0;
8570         int readonly = 0;
8571         int qgroup_report = 0;
8572         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
8573
8574         while(1) {
8575                 int c;
8576                 int option_index = 0;
8577                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
8578                         OPT_CHECK_CSUM, OPT_READONLY };
8579                 static const struct option long_options[] = {
8580                         { "super", 1, NULL, 's' },
8581                         { "repair", 0, NULL, OPT_REPAIR },
8582                         { "readonly", 0, NULL, OPT_READONLY },
8583                         { "init-csum-tree", 0, NULL, OPT_INIT_CSUM },
8584                         { "init-extent-tree", 0, NULL, OPT_INIT_EXTENT },
8585                         { "check-data-csum", 0, NULL, OPT_CHECK_CSUM },
8586                         { "backup", 0, NULL, 'b' },
8587                         { "subvol-extents", 1, NULL, 'E' },
8588                         { "qgroup-report", 0, NULL, 'Q' },
8589                         { "tree-root", 1, NULL, 'r' },
8590                         { NULL, 0, NULL, 0}
8591                 };
8592
8593                 c = getopt_long(argc, argv, "as:br:", long_options,
8594                                 &option_index);
8595                 if (c < 0)
8596                         break;
8597                 switch(c) {
8598                         case 'a': /* ignored */ break;
8599                         case 'b':
8600                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
8601                                 break;
8602                         case 's':
8603                                 num = arg_strtou64(optarg);
8604                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
8605                                         fprintf(stderr,
8606                                                 "ERROR: super mirror should be less than: %d\n",
8607                                                 BTRFS_SUPER_MIRROR_MAX);
8608                                         exit(1);
8609                                 }
8610                                 bytenr = btrfs_sb_offset(((int)num));
8611                                 printf("using SB copy %llu, bytenr %llu\n", num,
8612                                        (unsigned long long)bytenr);
8613                                 break;
8614                         case 'Q':
8615                                 qgroup_report = 1;
8616                                 break;
8617                         case 'E':
8618                                 subvolid = arg_strtou64(optarg);
8619                                 break;
8620                         case 'r':
8621                                 tree_root_bytenr = arg_strtou64(optarg);
8622                                 break;
8623                         case '?':
8624                         case 'h':
8625                                 usage(cmd_check_usage);
8626                         case OPT_REPAIR:
8627                                 printf("enabling repair mode\n");
8628                                 repair = 1;
8629                                 ctree_flags |= OPEN_CTREE_WRITES;
8630                                 break;
8631                         case OPT_READONLY:
8632                                 readonly = 1;
8633                                 break;
8634                         case OPT_INIT_CSUM:
8635                                 printf("Creating a new CRC tree\n");
8636                                 init_csum_tree = 1;
8637                                 repair = 1;
8638                                 ctree_flags |= OPEN_CTREE_WRITES;
8639                                 break;
8640                         case OPT_INIT_EXTENT:
8641                                 init_extent_tree = 1;
8642                                 ctree_flags |= (OPEN_CTREE_WRITES |
8643                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
8644                                 repair = 1;
8645                                 break;
8646                         case OPT_CHECK_CSUM:
8647                                 check_data_csum = 1;
8648                                 break;
8649                 }
8650         }
8651         argc = argc - optind;
8652
8653         if (check_argc_exact(argc, 1))
8654                 usage(cmd_check_usage);
8655
8656         /* This check is the only reason for --readonly to exist */
8657         if (readonly && repair) {
8658                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
8659                 exit(1);
8660         }
8661
8662         radix_tree_init();
8663         cache_tree_init(&root_cache);
8664
8665         if((ret = check_mounted(argv[optind])) < 0) {
8666                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
8667                 goto err_out;
8668         } else if(ret) {
8669                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
8670                 ret = -EBUSY;
8671                 goto err_out;
8672         }
8673
8674         /* only allow partial opening under repair mode */
8675         if (repair)
8676                 ctree_flags |= OPEN_CTREE_PARTIAL;
8677
8678         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
8679                                   ctree_flags);
8680         if (!info) {
8681                 fprintf(stderr, "Couldn't open file system\n");
8682                 ret = -EIO;
8683                 goto err_out;
8684         }
8685
8686         root = info->fs_root;
8687
8688         /*
8689          * repair mode will force us to commit transaction which
8690          * will make us fail to load log tree when mounting.
8691          */
8692         if (repair && btrfs_super_log_root(info->super_copy)) {
8693                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
8694                 if (!ret) {
8695                         ret = 1;
8696                         goto close_out;
8697                 }
8698                 ret = zero_log_tree(root);
8699                 if (ret) {
8700                         fprintf(stderr, "fail to zero log tree\n");
8701                         goto close_out;
8702                 }
8703         }
8704
8705         uuid_unparse(info->super_copy->fsid, uuidbuf);
8706         if (qgroup_report) {
8707                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
8708                        uuidbuf);
8709                 ret = qgroup_verify_all(info);
8710                 if (ret == 0)
8711                         print_qgroup_report(1);
8712                 goto close_out;
8713         }
8714         if (subvolid) {
8715                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
8716                        subvolid, argv[optind], uuidbuf);
8717                 ret = print_extent_state(info, subvolid);
8718                 goto close_out;
8719         }
8720         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
8721
8722         if (!extent_buffer_uptodate(info->tree_root->node) ||
8723             !extent_buffer_uptodate(info->dev_root->node) ||
8724             !extent_buffer_uptodate(info->chunk_root->node)) {
8725                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
8726                 ret = -EIO;
8727                 goto close_out;
8728         }
8729
8730         if (init_extent_tree || init_csum_tree) {
8731                 struct btrfs_trans_handle *trans;
8732
8733                 trans = btrfs_start_transaction(info->extent_root, 0);
8734                 if (IS_ERR(trans)) {
8735                         fprintf(stderr, "Error starting transaction\n");
8736                         ret = PTR_ERR(trans);
8737                         goto close_out;
8738                 }
8739
8740                 if (init_extent_tree) {
8741                         printf("Creating a new extent tree\n");
8742                         ret = reinit_extent_tree(trans, info);
8743                         if (ret)
8744                                 goto close_out;
8745                 }
8746
8747                 if (init_csum_tree) {
8748                         fprintf(stderr, "Reinit crc root\n");
8749                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
8750                         if (ret) {
8751                                 fprintf(stderr, "crc root initialization failed\n");
8752                                 ret = -EIO;
8753                                 goto close_out;
8754                         }
8755
8756                         ret = fill_csum_tree(trans, info->csum_root);
8757                         if (ret) {
8758                                 fprintf(stderr, "crc refilling failed\n");
8759                                 return -EIO;
8760                         }
8761                 }
8762                 /*
8763                  * Ok now we commit and run the normal fsck, which will add
8764                  * extent entries for all of the items it finds.
8765                  */
8766                 ret = btrfs_commit_transaction(trans, info->extent_root);
8767                 if (ret)
8768                         goto close_out;
8769         }
8770         if (!extent_buffer_uptodate(info->extent_root->node)) {
8771                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
8772                 ret = -EIO;
8773                 goto close_out;
8774         }
8775         if (!extent_buffer_uptodate(info->csum_root->node)) {
8776                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
8777                 ret = -EIO;
8778                 goto close_out;
8779         }
8780
8781         fprintf(stderr, "checking extents\n");
8782         ret = check_chunks_and_extents(root);
8783         if (ret)
8784                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
8785
8786         ret = repair_root_items(info);
8787         if (ret < 0)
8788                 goto close_out;
8789         if (repair) {
8790                 fprintf(stderr, "Fixed %d roots.\n", ret);
8791                 ret = 0;
8792         } else if (ret > 0) {
8793                 fprintf(stderr,
8794                        "Found %d roots with an outdated root item.\n",
8795                        ret);
8796                 fprintf(stderr,
8797                         "Please run a filesystem check with the option --repair to fix them.\n");
8798                 ret = 1;
8799                 goto close_out;
8800         }
8801
8802         fprintf(stderr, "checking free space cache\n");
8803         ret = check_space_cache(root);
8804         if (ret)
8805                 goto out;
8806
8807         /*
8808          * We used to have to have these hole extents in between our real
8809          * extents so if we don't have this flag set we need to make sure there
8810          * are no gaps in the file extents for inodes, otherwise we can just
8811          * ignore it when this happens.
8812          */
8813         no_holes = btrfs_fs_incompat(root->fs_info,
8814                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
8815         fprintf(stderr, "checking fs roots\n");
8816         ret = check_fs_roots(root, &root_cache);
8817         if (ret)
8818                 goto out;
8819
8820         fprintf(stderr, "checking csums\n");
8821         ret = check_csums(root);
8822         if (ret)
8823                 goto out;
8824
8825         fprintf(stderr, "checking root refs\n");
8826         ret = check_root_refs(root, &root_cache);
8827         if (ret)
8828                 goto out;
8829
8830         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
8831                 struct extent_buffer *eb;
8832
8833                 eb = list_first_entry(&root->fs_info->recow_ebs,
8834                                       struct extent_buffer, recow);
8835                 list_del_init(&eb->recow);
8836                 ret = recow_extent_buffer(root, eb);
8837                 if (ret)
8838                         break;
8839         }
8840
8841         while (!list_empty(&delete_items)) {
8842                 struct bad_item *bad;
8843
8844                 bad = list_first_entry(&delete_items, struct bad_item, list);
8845                 list_del_init(&bad->list);
8846                 if (repair)
8847                         ret = delete_bad_item(root, bad);
8848                 free(bad);
8849         }
8850
8851         if (info->quota_enabled) {
8852                 int err;
8853                 fprintf(stderr, "checking quota groups\n");
8854                 err = qgroup_verify_all(info);
8855                 if (err)
8856                         goto out;
8857         }
8858
8859         if (!list_empty(&root->fs_info->recow_ebs)) {
8860                 fprintf(stderr, "Transid errors in file system\n");
8861                 ret = 1;
8862         }
8863 out:
8864         print_qgroup_report(0);
8865         if (found_old_backref) { /*
8866                  * there was a disk format change when mixed
8867                  * backref was in testing tree. The old format
8868                  * existed about one week.
8869                  */
8870                 printf("\n * Found old mixed backref format. "
8871                        "The old format is not supported! *"
8872                        "\n * Please mount the FS in readonly mode, "
8873                        "backup data and re-format the FS. *\n\n");
8874                 ret = 1;
8875         }
8876         printf("found %llu bytes used err is %d\n",
8877                (unsigned long long)bytes_used, ret);
8878         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
8879         printf("total tree bytes: %llu\n",
8880                (unsigned long long)total_btree_bytes);
8881         printf("total fs tree bytes: %llu\n",
8882                (unsigned long long)total_fs_tree_bytes);
8883         printf("total extent tree bytes: %llu\n",
8884                (unsigned long long)total_extent_tree_bytes);
8885         printf("btree space waste bytes: %llu\n",
8886                (unsigned long long)btree_space_waste);
8887         printf("file data blocks allocated: %llu\n referenced %llu\n",
8888                 (unsigned long long)data_bytes_allocated,
8889                 (unsigned long long)data_bytes_referenced);
8890         printf("%s\n", PACKAGE_STRING);
8891
8892         free_root_recs_tree(&root_cache);
8893 close_out:
8894         close_ctree(root);
8895 err_out:
8896         return ret;
8897 }