btrfs-progs: check: introduce function to check file extent
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
3839
3840 /*
3841  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3842  * INODE_REF/INODE_EXTREF match.
3843  *
3844  * @root:       the root of the fs/file tree
3845  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3846  * @key:        the key of the DIR_ITEM/DIR_INDEX
3847  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3848  *              distinguish root_dir between normal dir/file
3849  * @name:       the name in the INODE_REF/INODE_EXTREF
3850  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3851  * @mode:       the st_mode of INODE_ITEM
3852  *
3853  * Return 0 if no error occurred.
3854  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3855  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3856  * dir/file.
3857  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3858  * not match for normal dir/file.
3859  */
3860 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3861                          struct btrfs_key *key, u64 index, char *name,
3862                          u32 namelen, u32 mode)
3863 {
3864         struct btrfs_path path;
3865         struct extent_buffer *node;
3866         struct btrfs_dir_item *di;
3867         struct btrfs_key location;
3868         char namebuf[BTRFS_NAME_LEN] = {0};
3869         u32 total;
3870         u32 cur = 0;
3871         u32 len;
3872         u32 name_len;
3873         u32 data_len;
3874         u8 filetype;
3875         int slot;
3876         int ret;
3877
3878         btrfs_init_path(&path);
3879         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3880         if (ret < 0) {
3881                 ret = DIR_ITEM_MISSING;
3882                 goto out;
3883         }
3884
3885         /* Process root dir and goto out*/
3886         if (index == 0) {
3887                 if (ret == 0) {
3888                         ret = ROOT_DIR_ERROR;
3889                         error(
3890                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3891                                 root->objectid,
3892                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3893                                         "REF" : "EXTREF",
3894                                 ref_key->objectid, ref_key->offset,
3895                                 key->type == BTRFS_DIR_ITEM_KEY ?
3896                                         "DIR_ITEM" : "DIR_INDEX");
3897                 } else {
3898                         ret = 0;
3899                 }
3900
3901                 goto out;
3902         }
3903
3904         /* Process normal file/dir */
3905         if (ret > 0) {
3906                 ret = DIR_ITEM_MISSING;
3907                 error(
3908                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3909                         root->objectid,
3910                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3911                         ref_key->objectid, ref_key->offset,
3912                         key->type == BTRFS_DIR_ITEM_KEY ?
3913                                 "DIR_ITEM" : "DIR_INDEX",
3914                         key->objectid, key->offset, namelen, name,
3915                         imode_to_type(mode));
3916                 goto out;
3917         }
3918
3919         /* Check whether inode_id/filetype/name match */
3920         node = path.nodes[0];
3921         slot = path.slots[0];
3922         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3923         total = btrfs_item_size_nr(node, slot);
3924         while (cur < total) {
3925                 ret = DIR_ITEM_MISMATCH;
3926                 name_len = btrfs_dir_name_len(node, di);
3927                 data_len = btrfs_dir_data_len(node, di);
3928
3929                 btrfs_dir_item_key_to_cpu(node, di, &location);
3930                 if (location.objectid != ref_key->objectid ||
3931                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3932                     location.offset != 0)
3933                         goto next;
3934
3935                 filetype = btrfs_dir_type(node, di);
3936                 if (imode_to_type(mode) != filetype)
3937                         goto next;
3938
3939                 if (name_len <= BTRFS_NAME_LEN) {
3940                         len = name_len;
3941                 } else {
3942                         len = BTRFS_NAME_LEN;
3943                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3944                         root->objectid,
3945                         key->type == BTRFS_DIR_ITEM_KEY ?
3946                         "DIR_ITEM" : "DIR_INDEX",
3947                         key->objectid, key->offset, name_len);
3948                 }
3949                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3950                 if (len != namelen || strncmp(namebuf, name, len))
3951                         goto next;
3952
3953                 ret = 0;
3954                 goto out;
3955 next:
3956                 len = sizeof(*di) + name_len + data_len;
3957                 di = (struct btrfs_dir_item *)((char *)di + len);
3958                 cur += len;
3959         }
3960         if (ret == DIR_ITEM_MISMATCH)
3961                 error(
3962                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3963                         root->objectid,
3964                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3965                         ref_key->objectid, ref_key->offset,
3966                         key->type == BTRFS_DIR_ITEM_KEY ?
3967                                 "DIR_ITEM" : "DIR_INDEX",
3968                         key->objectid, key->offset, namelen, name,
3969                         imode_to_type(mode));
3970 out:
3971         btrfs_release_path(&path);
3972         return ret;
3973 }
3974
3975 /*
3976  * Traverse the given INODE_REF and call find_dir_item() to find related
3977  * DIR_ITEM/DIR_INDEX.
3978  *
3979  * @root:       the root of the fs/file tree
3980  * @ref_key:    the key of the INODE_REF
3981  * @refs:       the count of INODE_REF
3982  * @mode:       the st_mode of INODE_ITEM
3983  *
3984  * Return 0 if no error occurred.
3985  */
3986 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3987                            struct extent_buffer *node, int slot, u64 *refs,
3988                            int mode)
3989 {
3990         struct btrfs_key key;
3991         struct btrfs_inode_ref *ref;
3992         char namebuf[BTRFS_NAME_LEN] = {0};
3993         u32 total;
3994         u32 cur = 0;
3995         u32 len;
3996         u32 name_len;
3997         u64 index;
3998         int ret, err = 0;
3999
4000         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4001         total = btrfs_item_size_nr(node, slot);
4002
4003 next:
4004         /* Update inode ref count */
4005         (*refs)++;
4006
4007         index = btrfs_inode_ref_index(node, ref);
4008         name_len = btrfs_inode_ref_name_len(node, ref);
4009         if (name_len <= BTRFS_NAME_LEN) {
4010                 len = name_len;
4011         } else {
4012                 len = BTRFS_NAME_LEN;
4013                 warning("root %llu INODE_REF[%llu %llu] name too long",
4014                         root->objectid, ref_key->objectid, ref_key->offset);
4015         }
4016
4017         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4018
4019         /* Check root dir ref name */
4020         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4021                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4022                       root->objectid, ref_key->objectid, ref_key->offset,
4023                       namebuf);
4024                 err |= ROOT_DIR_ERROR;
4025         }
4026
4027         /* Find related DIR_INDEX */
4028         key.objectid = ref_key->offset;
4029         key.type = BTRFS_DIR_INDEX_KEY;
4030         key.offset = index;
4031         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4032         err |= ret;
4033
4034         /* Find related dir_item */
4035         key.objectid = ref_key->offset;
4036         key.type = BTRFS_DIR_ITEM_KEY;
4037         key.offset = btrfs_name_hash(namebuf, len);
4038         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4039         err |= ret;
4040
4041         len = sizeof(*ref) + name_len;
4042         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4043         cur += len;
4044         if (cur < total)
4045                 goto next;
4046
4047         return err;
4048 }
4049
4050 /*
4051  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4052  * DIR_ITEM/DIR_INDEX.
4053  *
4054  * @root:       the root of the fs/file tree
4055  * @ref_key:    the key of the INODE_EXTREF
4056  * @refs:       the count of INODE_EXTREF
4057  * @mode:       the st_mode of INODE_ITEM
4058  *
4059  * Return 0 if no error occurred.
4060  */
4061 static int check_inode_extref(struct btrfs_root *root,
4062                               struct btrfs_key *ref_key,
4063                               struct extent_buffer *node, int slot, u64 *refs,
4064                               int mode)
4065 {
4066         struct btrfs_key key;
4067         struct btrfs_inode_extref *extref;
4068         char namebuf[BTRFS_NAME_LEN] = {0};
4069         u32 total;
4070         u32 cur = 0;
4071         u32 len;
4072         u32 name_len;
4073         u64 index;
4074         u64 parent;
4075         int ret;
4076         int err = 0;
4077
4078         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4079         total = btrfs_item_size_nr(node, slot);
4080
4081 next:
4082         /* update inode ref count */
4083         (*refs)++;
4084         name_len = btrfs_inode_extref_name_len(node, extref);
4085         index = btrfs_inode_extref_index(node, extref);
4086         parent = btrfs_inode_extref_parent(node, extref);
4087         if (name_len <= BTRFS_NAME_LEN) {
4088                 len = name_len;
4089         } else {
4090                 len = BTRFS_NAME_LEN;
4091                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4092                         root->objectid, ref_key->objectid, ref_key->offset);
4093         }
4094         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4095
4096         /* Check root dir ref name */
4097         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4098                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4099                       root->objectid, ref_key->objectid, ref_key->offset,
4100                       namebuf);
4101                 err |= ROOT_DIR_ERROR;
4102         }
4103
4104         /* find related dir_index */
4105         key.objectid = parent;
4106         key.type = BTRFS_DIR_INDEX_KEY;
4107         key.offset = index;
4108         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4109         err |= ret;
4110
4111         /* find related dir_item */
4112         key.objectid = parent;
4113         key.type = BTRFS_DIR_ITEM_KEY;
4114         key.offset = btrfs_name_hash(namebuf, len);
4115         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4116         err |= ret;
4117
4118         len = sizeof(*extref) + name_len;
4119         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4120         cur += len;
4121
4122         if (cur < total)
4123                 goto next;
4124
4125         return err;
4126 }
4127
4128 /*
4129  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4130  * DIR_ITEM/DIR_INDEX match.
4131  *
4132  * @root:       the root of the fs/file tree
4133  * @key:        the key of the INODE_REF/INODE_EXTREF
4134  * @name:       the name in the INODE_REF/INODE_EXTREF
4135  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4136  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4137  * to (u64)-1
4138  * @ext_ref:    the EXTENDED_IREF feature
4139  *
4140  * Return 0 if no error occurred.
4141  * Return >0 for error bitmap
4142  */
4143 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4144                           char *name, int namelen, u64 index,
4145                           unsigned int ext_ref)
4146 {
4147         struct btrfs_path path;
4148         struct btrfs_inode_ref *ref;
4149         struct btrfs_inode_extref *extref;
4150         struct extent_buffer *node;
4151         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4152         u32 total;
4153         u32 cur = 0;
4154         u32 len;
4155         u32 ref_namelen;
4156         u64 ref_index;
4157         u64 parent;
4158         u64 dir_id;
4159         int slot;
4160         int ret;
4161
4162         btrfs_init_path(&path);
4163         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4164         if (ret) {
4165                 ret = INODE_REF_MISSING;
4166                 goto extref;
4167         }
4168
4169         node = path.nodes[0];
4170         slot = path.slots[0];
4171
4172         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4173         total = btrfs_item_size_nr(node, slot);
4174
4175         /* Iterate all entry of INODE_REF */
4176         while (cur < total) {
4177                 ret = INODE_REF_MISSING;
4178
4179                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4180                 ref_index = btrfs_inode_ref_index(node, ref);
4181                 if (index != (u64)-1 && index != ref_index)
4182                         goto next_ref;
4183
4184                 if (ref_namelen <= BTRFS_NAME_LEN) {
4185                         len = ref_namelen;
4186                 } else {
4187                         len = BTRFS_NAME_LEN;
4188                         warning("root %llu INODE %s[%llu %llu] name too long",
4189                                 root->objectid,
4190                                 key->type == BTRFS_INODE_REF_KEY ?
4191                                         "REF" : "EXTREF",
4192                                 key->objectid, key->offset);
4193                 }
4194                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4195                                    len);
4196
4197                 if (len != namelen || strncmp(ref_namebuf, name, len))
4198                         goto next_ref;
4199
4200                 ret = 0;
4201                 goto out;
4202 next_ref:
4203                 len = sizeof(*ref) + ref_namelen;
4204                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4205                 cur += len;
4206         }
4207
4208 extref:
4209         /* Skip if not support EXTENDED_IREF feature */
4210         if (!ext_ref)
4211                 goto out;
4212
4213         btrfs_release_path(&path);
4214         btrfs_init_path(&path);
4215
4216         dir_id = key->offset;
4217         key->type = BTRFS_INODE_EXTREF_KEY;
4218         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4219
4220         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4221         if (ret) {
4222                 ret = INODE_REF_MISSING;
4223                 goto out;
4224         }
4225
4226         node = path.nodes[0];
4227         slot = path.slots[0];
4228
4229         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4230         cur = 0;
4231         total = btrfs_item_size_nr(node, slot);
4232
4233         /* Iterate all entry of INODE_EXTREF */
4234         while (cur < total) {
4235                 ret = INODE_REF_MISSING;
4236
4237                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4238                 ref_index = btrfs_inode_extref_index(node, extref);
4239                 parent = btrfs_inode_extref_parent(node, extref);
4240                 if (index != (u64)-1 && index != ref_index)
4241                         goto next_extref;
4242
4243                 if (parent != dir_id)
4244                         goto next_extref;
4245
4246                 if (ref_namelen <= BTRFS_NAME_LEN) {
4247                         len = ref_namelen;
4248                 } else {
4249                         len = BTRFS_NAME_LEN;
4250                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4251                                 root->objectid,
4252                                 key->type == BTRFS_INODE_REF_KEY ?
4253                                         "REF" : "EXTREF",
4254                                 key->objectid, key->offset);
4255                 }
4256                 read_extent_buffer(node, ref_namebuf,
4257                                    (unsigned long)(extref + 1), len);
4258
4259                 if (len != namelen || strncmp(ref_namebuf, name, len))
4260                         goto next_extref;
4261
4262                 ret = 0;
4263                 goto out;
4264
4265 next_extref:
4266                 len = sizeof(*extref) + ref_namelen;
4267                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4268                 cur += len;
4269
4270         }
4271 out:
4272         btrfs_release_path(&path);
4273         return ret;
4274 }
4275
4276 /*
4277  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4278  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4279  *
4280  * @root:       the root of the fs/file tree
4281  * @key:        the key of the INODE_REF/INODE_EXTREF
4282  * @size:       the st_size of the INODE_ITEM
4283  * @ext_ref:    the EXTENDED_IREF feature
4284  *
4285  * Return 0 if no error occurred.
4286  */
4287 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4288                           struct extent_buffer *node, int slot, u64 *size,
4289                           unsigned int ext_ref)
4290 {
4291         struct btrfs_dir_item *di;
4292         struct btrfs_inode_item *ii;
4293         struct btrfs_path path;
4294         struct btrfs_key location;
4295         char namebuf[BTRFS_NAME_LEN] = {0};
4296         u32 total;
4297         u32 cur = 0;
4298         u32 len;
4299         u32 name_len;
4300         u32 data_len;
4301         u8 filetype;
4302         u32 mode;
4303         u64 index;
4304         int ret;
4305         int err = 0;
4306
4307         /*
4308          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4309          * ignore index check.
4310          */
4311         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4312
4313         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4314         total = btrfs_item_size_nr(node, slot);
4315
4316         while (cur < total) {
4317                 data_len = btrfs_dir_data_len(node, di);
4318                 if (data_len)
4319                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4320                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4321                               "DIR_ITEM" : "DIR_INDEX",
4322                               key->objectid, key->offset, data_len);
4323
4324                 name_len = btrfs_dir_name_len(node, di);
4325                 if (name_len <= BTRFS_NAME_LEN) {
4326                         len = name_len;
4327                 } else {
4328                         len = BTRFS_NAME_LEN;
4329                         warning("root %llu %s[%llu %llu] name too long",
4330                                 root->objectid,
4331                                 key->type == BTRFS_DIR_ITEM_KEY ?
4332                                 "DIR_ITEM" : "DIR_INDEX",
4333                                 key->objectid, key->offset);
4334                 }
4335                 (*size) += name_len;
4336
4337                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4338                 filetype = btrfs_dir_type(node, di);
4339
4340                 btrfs_init_path(&path);
4341                 btrfs_dir_item_key_to_cpu(node, di, &location);
4342
4343                 /* Ignore related ROOT_ITEM check */
4344                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4345                         goto next;
4346
4347                 /* Check relative INODE_ITEM(existence/filetype) */
4348                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4349                 if (ret) {
4350                         err |= INODE_ITEM_MISSING;
4351                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4352                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4353                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4354                               key->offset, location.objectid, name_len,
4355                               namebuf, filetype);
4356                         goto next;
4357                 }
4358
4359                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4360                                     struct btrfs_inode_item);
4361                 mode = btrfs_inode_mode(path.nodes[0], ii);
4362
4363                 if (imode_to_type(mode) != filetype) {
4364                         err |= INODE_ITEM_MISMATCH;
4365                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4366                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4367                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4368                               key->offset, name_len, namebuf, filetype);
4369                 }
4370
4371                 /* Check relative INODE_REF/INODE_EXTREF */
4372                 location.type = BTRFS_INODE_REF_KEY;
4373                 location.offset = key->objectid;
4374                 ret = find_inode_ref(root, &location, namebuf, len,
4375                                        index, ext_ref);
4376                 err |= ret;
4377                 if (ret & INODE_REF_MISSING)
4378                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4379                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4380                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4381                               key->offset, name_len, namebuf, filetype);
4382
4383 next:
4384                 btrfs_release_path(&path);
4385                 len = sizeof(*di) + name_len + data_len;
4386                 di = (struct btrfs_dir_item *)((char *)di + len);
4387                 cur += len;
4388
4389                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4390                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4391                               root->objectid, key->objectid, key->offset);
4392                         break;
4393                 }
4394         }
4395
4396         return err;
4397 }
4398
4399 /*
4400  * Check file extent datasum/hole, update the size of the file extents,
4401  * check and update the last offset of the file extent.
4402  *
4403  * @root:       the root of fs/file tree.
4404  * @fkey:       the key of the file extent.
4405  * @nodatasum:  INODE_NODATASUM feature.
4406  * @size:       the sum of all EXTENT_DATA items size for this inode.
4407  * @end:        the offset of the last extent.
4408  *
4409  * Return 0 if no error occurred.
4410  */
4411 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4412                              struct extent_buffer *node, int slot,
4413                              unsigned int nodatasum, u64 *size, u64 *end)
4414 {
4415         struct btrfs_file_extent_item *fi;
4416         u64 disk_bytenr;
4417         u64 disk_num_bytes;
4418         u64 extent_num_bytes;
4419         u64 found;
4420         unsigned int extent_type;
4421         unsigned int is_hole;
4422         int ret;
4423         int err = 0;
4424
4425         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4426
4427         extent_type = btrfs_file_extent_type(node, fi);
4428         /* Skip if file extent is inline */
4429         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4430                 struct btrfs_item *e = btrfs_item_nr(slot);
4431                 u32 item_inline_len;
4432
4433                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4434                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4435                 if (extent_num_bytes == 0 ||
4436                     extent_num_bytes != item_inline_len)
4437                         err |= FILE_EXTENT_ERROR;
4438                 *size += extent_num_bytes;
4439                 return err;
4440         }
4441
4442         /* Check extent type */
4443         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4444                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4445                 err |= FILE_EXTENT_ERROR;
4446                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4447                       root->objectid, fkey->objectid, fkey->offset);
4448                 return err;
4449         }
4450
4451         /* Check REG_EXTENT/PREALLOC_EXTENT */
4452         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4453         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4454         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4455         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4456
4457         /* Check EXTENT_DATA datasum */
4458         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4459         if (found > 0 && nodatasum) {
4460                 err |= ODD_CSUM_ITEM;
4461                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4462                       root->objectid, fkey->objectid, fkey->offset);
4463         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4464                    !is_hole &&
4465                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4466                 err |= CSUM_ITEM_MISSING;
4467                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4468                       root->objectid, fkey->objectid, fkey->offset);
4469         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4470                 err |= ODD_CSUM_ITEM;
4471                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4472                       root->objectid, fkey->objectid, fkey->offset);
4473         }
4474
4475         /* Check EXTENT_DATA hole */
4476         if (no_holes && is_hole) {
4477                 err |= FILE_EXTENT_ERROR;
4478                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4479                       root->objectid, fkey->objectid, fkey->offset);
4480         } else if (!no_holes && *end != fkey->offset) {
4481                 err |= FILE_EXTENT_ERROR;
4482                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4483                       root->objectid, fkey->objectid, fkey->offset);
4484         }
4485
4486         *end += extent_num_bytes;
4487         if (!is_hole)
4488                 *size += extent_num_bytes;
4489
4490         return err;
4491 }
4492
4493 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4494 {
4495         struct list_head *cur = rec->backrefs.next;
4496         struct extent_backref *back;
4497         struct tree_backref *tback;
4498         struct data_backref *dback;
4499         u64 found = 0;
4500         int err = 0;
4501
4502         while(cur != &rec->backrefs) {
4503                 back = to_extent_backref(cur);
4504                 cur = cur->next;
4505                 if (!back->found_extent_tree) {
4506                         err = 1;
4507                         if (!print_errs)
4508                                 goto out;
4509                         if (back->is_data) {
4510                                 dback = to_data_backref(back);
4511                                 fprintf(stderr, "Backref %llu %s %llu"
4512                                         " owner %llu offset %llu num_refs %lu"
4513                                         " not found in extent tree\n",
4514                                         (unsigned long long)rec->start,
4515                                         back->full_backref ?
4516                                         "parent" : "root",
4517                                         back->full_backref ?
4518                                         (unsigned long long)dback->parent:
4519                                         (unsigned long long)dback->root,
4520                                         (unsigned long long)dback->owner,
4521                                         (unsigned long long)dback->offset,
4522                                         (unsigned long)dback->num_refs);
4523                         } else {
4524                                 tback = to_tree_backref(back);
4525                                 fprintf(stderr, "Backref %llu parent %llu"
4526                                         " root %llu not found in extent tree\n",
4527                                         (unsigned long long)rec->start,
4528                                         (unsigned long long)tback->parent,
4529                                         (unsigned long long)tback->root);
4530                         }
4531                 }
4532                 if (!back->is_data && !back->found_ref) {
4533                         err = 1;
4534                         if (!print_errs)
4535                                 goto out;
4536                         tback = to_tree_backref(back);
4537                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4538                                 (unsigned long long)rec->start,
4539                                 back->full_backref ? "parent" : "root",
4540                                 back->full_backref ?
4541                                 (unsigned long long)tback->parent :
4542                                 (unsigned long long)tback->root, back);
4543                 }
4544                 if (back->is_data) {
4545                         dback = to_data_backref(back);
4546                         if (dback->found_ref != dback->num_refs) {
4547                                 err = 1;
4548                                 if (!print_errs)
4549                                         goto out;
4550                                 fprintf(stderr, "Incorrect local backref count"
4551                                         " on %llu %s %llu owner %llu"
4552                                         " offset %llu found %u wanted %u back %p\n",
4553                                         (unsigned long long)rec->start,
4554                                         back->full_backref ?
4555                                         "parent" : "root",
4556                                         back->full_backref ?
4557                                         (unsigned long long)dback->parent:
4558                                         (unsigned long long)dback->root,
4559                                         (unsigned long long)dback->owner,
4560                                         (unsigned long long)dback->offset,
4561                                         dback->found_ref, dback->num_refs, back);
4562                         }
4563                         if (dback->disk_bytenr != rec->start) {
4564                                 err = 1;
4565                                 if (!print_errs)
4566                                         goto out;
4567                                 fprintf(stderr, "Backref disk bytenr does not"
4568                                         " match extent record, bytenr=%llu, "
4569                                         "ref bytenr=%llu\n",
4570                                         (unsigned long long)rec->start,
4571                                         (unsigned long long)dback->disk_bytenr);
4572                         }
4573
4574                         if (dback->bytes != rec->nr) {
4575                                 err = 1;
4576                                 if (!print_errs)
4577                                         goto out;
4578                                 fprintf(stderr, "Backref bytes do not match "
4579                                         "extent backref, bytenr=%llu, ref "
4580                                         "bytes=%llu, backref bytes=%llu\n",
4581                                         (unsigned long long)rec->start,
4582                                         (unsigned long long)rec->nr,
4583                                         (unsigned long long)dback->bytes);
4584                         }
4585                 }
4586                 if (!back->is_data) {
4587                         found += 1;
4588                 } else {
4589                         dback = to_data_backref(back);
4590                         found += dback->found_ref;
4591                 }
4592         }
4593         if (found != rec->refs) {
4594                 err = 1;
4595                 if (!print_errs)
4596                         goto out;
4597                 fprintf(stderr, "Incorrect global backref count "
4598                         "on %llu found %llu wanted %llu\n",
4599                         (unsigned long long)rec->start,
4600                         (unsigned long long)found,
4601                         (unsigned long long)rec->refs);
4602         }
4603 out:
4604         return err;
4605 }
4606
4607 static int free_all_extent_backrefs(struct extent_record *rec)
4608 {
4609         struct extent_backref *back;
4610         struct list_head *cur;
4611         while (!list_empty(&rec->backrefs)) {
4612                 cur = rec->backrefs.next;
4613                 back = to_extent_backref(cur);
4614                 list_del(cur);
4615                 free(back);
4616         }
4617         return 0;
4618 }
4619
4620 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4621                                      struct cache_tree *extent_cache)
4622 {
4623         struct cache_extent *cache;
4624         struct extent_record *rec;
4625
4626         while (1) {
4627                 cache = first_cache_extent(extent_cache);
4628                 if (!cache)
4629                         break;
4630                 rec = container_of(cache, struct extent_record, cache);
4631                 remove_cache_extent(extent_cache, cache);
4632                 free_all_extent_backrefs(rec);
4633                 free(rec);
4634         }
4635 }
4636
4637 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4638                                  struct extent_record *rec)
4639 {
4640         if (rec->content_checked && rec->owner_ref_checked &&
4641             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4642             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4643             !rec->bad_full_backref && !rec->crossing_stripes &&
4644             !rec->wrong_chunk_type) {
4645                 remove_cache_extent(extent_cache, &rec->cache);
4646                 free_all_extent_backrefs(rec);
4647                 list_del_init(&rec->list);
4648                 free(rec);
4649         }
4650         return 0;
4651 }
4652
4653 static int check_owner_ref(struct btrfs_root *root,
4654                             struct extent_record *rec,
4655                             struct extent_buffer *buf)
4656 {
4657         struct extent_backref *node;
4658         struct tree_backref *back;
4659         struct btrfs_root *ref_root;
4660         struct btrfs_key key;
4661         struct btrfs_path path;
4662         struct extent_buffer *parent;
4663         int level;
4664         int found = 0;
4665         int ret;
4666
4667         list_for_each_entry(node, &rec->backrefs, list) {
4668                 if (node->is_data)
4669                         continue;
4670                 if (!node->found_ref)
4671                         continue;
4672                 if (node->full_backref)
4673                         continue;
4674                 back = to_tree_backref(node);
4675                 if (btrfs_header_owner(buf) == back->root)
4676                         return 0;
4677         }
4678         BUG_ON(rec->is_root);
4679
4680         /* try to find the block by search corresponding fs tree */
4681         key.objectid = btrfs_header_owner(buf);
4682         key.type = BTRFS_ROOT_ITEM_KEY;
4683         key.offset = (u64)-1;
4684
4685         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4686         if (IS_ERR(ref_root))
4687                 return 1;
4688
4689         level = btrfs_header_level(buf);
4690         if (level == 0)
4691                 btrfs_item_key_to_cpu(buf, &key, 0);
4692         else
4693                 btrfs_node_key_to_cpu(buf, &key, 0);
4694
4695         btrfs_init_path(&path);
4696         path.lowest_level = level + 1;
4697         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4698         if (ret < 0)
4699                 return 0;
4700
4701         parent = path.nodes[level + 1];
4702         if (parent && buf->start == btrfs_node_blockptr(parent,
4703                                                         path.slots[level + 1]))
4704                 found = 1;
4705
4706         btrfs_release_path(&path);
4707         return found ? 0 : 1;
4708 }
4709
4710 static int is_extent_tree_record(struct extent_record *rec)
4711 {
4712         struct list_head *cur = rec->backrefs.next;
4713         struct extent_backref *node;
4714         struct tree_backref *back;
4715         int is_extent = 0;
4716
4717         while(cur != &rec->backrefs) {
4718                 node = to_extent_backref(cur);
4719                 cur = cur->next;
4720                 if (node->is_data)
4721                         return 0;
4722                 back = to_tree_backref(node);
4723                 if (node->full_backref)
4724                         return 0;
4725                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4726                         is_extent = 1;
4727         }
4728         return is_extent;
4729 }
4730
4731
4732 static int record_bad_block_io(struct btrfs_fs_info *info,
4733                                struct cache_tree *extent_cache,
4734                                u64 start, u64 len)
4735 {
4736         struct extent_record *rec;
4737         struct cache_extent *cache;
4738         struct btrfs_key key;
4739
4740         cache = lookup_cache_extent(extent_cache, start, len);
4741         if (!cache)
4742                 return 0;
4743
4744         rec = container_of(cache, struct extent_record, cache);
4745         if (!is_extent_tree_record(rec))
4746                 return 0;
4747
4748         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4749         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4750 }
4751
4752 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4753                        struct extent_buffer *buf, int slot)
4754 {
4755         if (btrfs_header_level(buf)) {
4756                 struct btrfs_key_ptr ptr1, ptr2;
4757
4758                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4759                                    sizeof(struct btrfs_key_ptr));
4760                 read_extent_buffer(buf, &ptr2,
4761                                    btrfs_node_key_ptr_offset(slot + 1),
4762                                    sizeof(struct btrfs_key_ptr));
4763                 write_extent_buffer(buf, &ptr1,
4764                                     btrfs_node_key_ptr_offset(slot + 1),
4765                                     sizeof(struct btrfs_key_ptr));
4766                 write_extent_buffer(buf, &ptr2,
4767                                     btrfs_node_key_ptr_offset(slot),
4768                                     sizeof(struct btrfs_key_ptr));
4769                 if (slot == 0) {
4770                         struct btrfs_disk_key key;
4771                         btrfs_node_key(buf, &key, 0);
4772                         btrfs_fixup_low_keys(root, path, &key,
4773                                              btrfs_header_level(buf) + 1);
4774                 }
4775         } else {
4776                 struct btrfs_item *item1, *item2;
4777                 struct btrfs_key k1, k2;
4778                 char *item1_data, *item2_data;
4779                 u32 item1_offset, item2_offset, item1_size, item2_size;
4780
4781                 item1 = btrfs_item_nr(slot);
4782                 item2 = btrfs_item_nr(slot + 1);
4783                 btrfs_item_key_to_cpu(buf, &k1, slot);
4784                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4785                 item1_offset = btrfs_item_offset(buf, item1);
4786                 item2_offset = btrfs_item_offset(buf, item2);
4787                 item1_size = btrfs_item_size(buf, item1);
4788                 item2_size = btrfs_item_size(buf, item2);
4789
4790                 item1_data = malloc(item1_size);
4791                 if (!item1_data)
4792                         return -ENOMEM;
4793                 item2_data = malloc(item2_size);
4794                 if (!item2_data) {
4795                         free(item1_data);
4796                         return -ENOMEM;
4797                 }
4798
4799                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4800                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4801
4802                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4803                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4804                 free(item1_data);
4805                 free(item2_data);
4806
4807                 btrfs_set_item_offset(buf, item1, item2_offset);
4808                 btrfs_set_item_offset(buf, item2, item1_offset);
4809                 btrfs_set_item_size(buf, item1, item2_size);
4810                 btrfs_set_item_size(buf, item2, item1_size);
4811
4812                 path->slots[0] = slot;
4813                 btrfs_set_item_key_unsafe(root, path, &k2);
4814                 path->slots[0] = slot + 1;
4815                 btrfs_set_item_key_unsafe(root, path, &k1);
4816         }
4817         return 0;
4818 }
4819
4820 static int fix_key_order(struct btrfs_trans_handle *trans,
4821                          struct btrfs_root *root,
4822                          struct btrfs_path *path)
4823 {
4824         struct extent_buffer *buf;
4825         struct btrfs_key k1, k2;
4826         int i;
4827         int level = path->lowest_level;
4828         int ret = -EIO;
4829
4830         buf = path->nodes[level];
4831         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4832                 if (level) {
4833                         btrfs_node_key_to_cpu(buf, &k1, i);
4834                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4835                 } else {
4836                         btrfs_item_key_to_cpu(buf, &k1, i);
4837                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4838                 }
4839                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4840                         continue;
4841                 ret = swap_values(root, path, buf, i);
4842                 if (ret)
4843                         break;
4844                 btrfs_mark_buffer_dirty(buf);
4845                 i = 0;
4846         }
4847         return ret;
4848 }
4849
4850 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4851                              struct btrfs_root *root,
4852                              struct btrfs_path *path,
4853                              struct extent_buffer *buf, int slot)
4854 {
4855         struct btrfs_key key;
4856         int nritems = btrfs_header_nritems(buf);
4857
4858         btrfs_item_key_to_cpu(buf, &key, slot);
4859
4860         /* These are all the keys we can deal with missing. */
4861         if (key.type != BTRFS_DIR_INDEX_KEY &&
4862             key.type != BTRFS_EXTENT_ITEM_KEY &&
4863             key.type != BTRFS_METADATA_ITEM_KEY &&
4864             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4865             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4866                 return -1;
4867
4868         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4869                (unsigned long long)key.objectid, key.type,
4870                (unsigned long long)key.offset, slot, buf->start);
4871         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4872                               btrfs_item_nr_offset(slot + 1),
4873                               sizeof(struct btrfs_item) *
4874                               (nritems - slot - 1));
4875         btrfs_set_header_nritems(buf, nritems - 1);
4876         if (slot == 0) {
4877                 struct btrfs_disk_key disk_key;
4878
4879                 btrfs_item_key(buf, &disk_key, 0);
4880                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4881         }
4882         btrfs_mark_buffer_dirty(buf);
4883         return 0;
4884 }
4885
4886 static int fix_item_offset(struct btrfs_trans_handle *trans,
4887                            struct btrfs_root *root,
4888                            struct btrfs_path *path)
4889 {
4890         struct extent_buffer *buf;
4891         int i;
4892         int ret = 0;
4893
4894         /* We should only get this for leaves */
4895         BUG_ON(path->lowest_level);
4896         buf = path->nodes[0];
4897 again:
4898         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4899                 unsigned int shift = 0, offset;
4900
4901                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4902                     BTRFS_LEAF_DATA_SIZE(root)) {
4903                         if (btrfs_item_end_nr(buf, i) >
4904                             BTRFS_LEAF_DATA_SIZE(root)) {
4905                                 ret = delete_bogus_item(trans, root, path,
4906                                                         buf, i);
4907                                 if (!ret)
4908                                         goto again;
4909                                 fprintf(stderr, "item is off the end of the "
4910                                         "leaf, can't fix\n");
4911                                 ret = -EIO;
4912                                 break;
4913                         }
4914                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4915                                 btrfs_item_end_nr(buf, i);
4916                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4917                            btrfs_item_offset_nr(buf, i - 1)) {
4918                         if (btrfs_item_end_nr(buf, i) >
4919                             btrfs_item_offset_nr(buf, i - 1)) {
4920                                 ret = delete_bogus_item(trans, root, path,
4921                                                         buf, i);
4922                                 if (!ret)
4923                                         goto again;
4924                                 fprintf(stderr, "items overlap, can't fix\n");
4925                                 ret = -EIO;
4926                                 break;
4927                         }
4928                         shift = btrfs_item_offset_nr(buf, i - 1) -
4929                                 btrfs_item_end_nr(buf, i);
4930                 }
4931                 if (!shift)
4932                         continue;
4933
4934                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4935                        i, shift, (unsigned long long)buf->start);
4936                 offset = btrfs_item_offset_nr(buf, i);
4937                 memmove_extent_buffer(buf,
4938                                       btrfs_leaf_data(buf) + offset + shift,
4939                                       btrfs_leaf_data(buf) + offset,
4940                                       btrfs_item_size_nr(buf, i));
4941                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4942                                       offset + shift);
4943                 btrfs_mark_buffer_dirty(buf);
4944         }
4945
4946         /*
4947          * We may have moved things, in which case we want to exit so we don't
4948          * write those changes out.  Once we have proper abort functionality in
4949          * progs this can be changed to something nicer.
4950          */
4951         BUG_ON(ret);
4952         return ret;
4953 }
4954
4955 /*
4956  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4957  * then just return -EIO.
4958  */
4959 static int try_to_fix_bad_block(struct btrfs_root *root,
4960                                 struct extent_buffer *buf,
4961                                 enum btrfs_tree_block_status status)
4962 {
4963         struct btrfs_trans_handle *trans;
4964         struct ulist *roots;
4965         struct ulist_node *node;
4966         struct btrfs_root *search_root;
4967         struct btrfs_path path;
4968         struct ulist_iterator iter;
4969         struct btrfs_key root_key, key;
4970         int ret;
4971
4972         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4973             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4974                 return -EIO;
4975
4976         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4977         if (ret)
4978                 return -EIO;
4979
4980         btrfs_init_path(&path);
4981         ULIST_ITER_INIT(&iter);
4982         while ((node = ulist_next(roots, &iter))) {
4983                 root_key.objectid = node->val;
4984                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4985                 root_key.offset = (u64)-1;
4986
4987                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4988                 if (IS_ERR(root)) {
4989                         ret = -EIO;
4990                         break;
4991                 }
4992
4993
4994                 trans = btrfs_start_transaction(search_root, 0);
4995                 if (IS_ERR(trans)) {
4996                         ret = PTR_ERR(trans);
4997                         break;
4998                 }
4999
5000                 path.lowest_level = btrfs_header_level(buf);
5001                 path.skip_check_block = 1;
5002                 if (path.lowest_level)
5003                         btrfs_node_key_to_cpu(buf, &key, 0);
5004                 else
5005                         btrfs_item_key_to_cpu(buf, &key, 0);
5006                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5007                 if (ret) {
5008                         ret = -EIO;
5009                         btrfs_commit_transaction(trans, search_root);
5010                         break;
5011                 }
5012                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5013                         ret = fix_key_order(trans, search_root, &path);
5014                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5015                         ret = fix_item_offset(trans, search_root, &path);
5016                 if (ret) {
5017                         btrfs_commit_transaction(trans, search_root);
5018                         break;
5019                 }
5020                 btrfs_release_path(&path);
5021                 btrfs_commit_transaction(trans, search_root);
5022         }
5023         ulist_free(roots);
5024         btrfs_release_path(&path);
5025         return ret;
5026 }
5027
5028 static int check_block(struct btrfs_root *root,
5029                        struct cache_tree *extent_cache,
5030                        struct extent_buffer *buf, u64 flags)
5031 {
5032         struct extent_record *rec;
5033         struct cache_extent *cache;
5034         struct btrfs_key key;
5035         enum btrfs_tree_block_status status;
5036         int ret = 0;
5037         int level;
5038
5039         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5040         if (!cache)
5041                 return 1;
5042         rec = container_of(cache, struct extent_record, cache);
5043         rec->generation = btrfs_header_generation(buf);
5044
5045         level = btrfs_header_level(buf);
5046         if (btrfs_header_nritems(buf) > 0) {
5047
5048                 if (level == 0)
5049                         btrfs_item_key_to_cpu(buf, &key, 0);
5050                 else
5051                         btrfs_node_key_to_cpu(buf, &key, 0);
5052
5053                 rec->info_objectid = key.objectid;
5054         }
5055         rec->info_level = level;
5056
5057         if (btrfs_is_leaf(buf))
5058                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5059         else
5060                 status = btrfs_check_node(root, &rec->parent_key, buf);
5061
5062         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5063                 if (repair)
5064                         status = try_to_fix_bad_block(root, buf, status);
5065                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5066                         ret = -EIO;
5067                         fprintf(stderr, "bad block %llu\n",
5068                                 (unsigned long long)buf->start);
5069                 } else {
5070                         /*
5071                          * Signal to callers we need to start the scan over
5072                          * again since we'll have cowed blocks.
5073                          */
5074                         ret = -EAGAIN;
5075                 }
5076         } else {
5077                 rec->content_checked = 1;
5078                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5079                         rec->owner_ref_checked = 1;
5080                 else {
5081                         ret = check_owner_ref(root, rec, buf);
5082                         if (!ret)
5083                                 rec->owner_ref_checked = 1;
5084                 }
5085         }
5086         if (!ret)
5087                 maybe_free_extent_rec(extent_cache, rec);
5088         return ret;
5089 }
5090
5091 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5092                                                 u64 parent, u64 root)
5093 {
5094         struct list_head *cur = rec->backrefs.next;
5095         struct extent_backref *node;
5096         struct tree_backref *back;
5097
5098         while(cur != &rec->backrefs) {
5099                 node = to_extent_backref(cur);
5100                 cur = cur->next;
5101                 if (node->is_data)
5102                         continue;
5103                 back = to_tree_backref(node);
5104                 if (parent > 0) {
5105                         if (!node->full_backref)
5106                                 continue;
5107                         if (parent == back->parent)
5108                                 return back;
5109                 } else {
5110                         if (node->full_backref)
5111                                 continue;
5112                         if (back->root == root)
5113                                 return back;
5114                 }
5115         }
5116         return NULL;
5117 }
5118
5119 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5120                                                 u64 parent, u64 root)
5121 {
5122         struct tree_backref *ref = malloc(sizeof(*ref));
5123
5124         if (!ref)
5125                 return NULL;
5126         memset(&ref->node, 0, sizeof(ref->node));
5127         if (parent > 0) {
5128                 ref->parent = parent;
5129                 ref->node.full_backref = 1;
5130         } else {
5131                 ref->root = root;
5132                 ref->node.full_backref = 0;
5133         }
5134         list_add_tail(&ref->node.list, &rec->backrefs);
5135
5136         return ref;
5137 }
5138
5139 static struct data_backref *find_data_backref(struct extent_record *rec,
5140                                                 u64 parent, u64 root,
5141                                                 u64 owner, u64 offset,
5142                                                 int found_ref,
5143                                                 u64 disk_bytenr, u64 bytes)
5144 {
5145         struct list_head *cur = rec->backrefs.next;
5146         struct extent_backref *node;
5147         struct data_backref *back;
5148
5149         while(cur != &rec->backrefs) {
5150                 node = to_extent_backref(cur);
5151                 cur = cur->next;
5152                 if (!node->is_data)
5153                         continue;
5154                 back = to_data_backref(node);
5155                 if (parent > 0) {
5156                         if (!node->full_backref)
5157                                 continue;
5158                         if (parent == back->parent)
5159                                 return back;
5160                 } else {
5161                         if (node->full_backref)
5162                                 continue;
5163                         if (back->root == root && back->owner == owner &&
5164                             back->offset == offset) {
5165                                 if (found_ref && node->found_ref &&
5166                                     (back->bytes != bytes ||
5167                                     back->disk_bytenr != disk_bytenr))
5168                                         continue;
5169                                 return back;
5170                         }
5171                 }
5172         }
5173         return NULL;
5174 }
5175
5176 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5177                                                 u64 parent, u64 root,
5178                                                 u64 owner, u64 offset,
5179                                                 u64 max_size)
5180 {
5181         struct data_backref *ref = malloc(sizeof(*ref));
5182
5183         if (!ref)
5184                 return NULL;
5185         memset(&ref->node, 0, sizeof(ref->node));
5186         ref->node.is_data = 1;
5187
5188         if (parent > 0) {
5189                 ref->parent = parent;
5190                 ref->owner = 0;
5191                 ref->offset = 0;
5192                 ref->node.full_backref = 1;
5193         } else {
5194                 ref->root = root;
5195                 ref->owner = owner;
5196                 ref->offset = offset;
5197                 ref->node.full_backref = 0;
5198         }
5199         ref->bytes = max_size;
5200         ref->found_ref = 0;
5201         ref->num_refs = 0;
5202         list_add_tail(&ref->node.list, &rec->backrefs);
5203         if (max_size > rec->max_size)
5204                 rec->max_size = max_size;
5205         return ref;
5206 }
5207
5208 /* Check if the type of extent matches with its chunk */
5209 static void check_extent_type(struct extent_record *rec)
5210 {
5211         struct btrfs_block_group_cache *bg_cache;
5212
5213         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5214         if (!bg_cache)
5215                 return;
5216
5217         /* data extent, check chunk directly*/
5218         if (!rec->metadata) {
5219                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5220                         rec->wrong_chunk_type = 1;
5221                 return;
5222         }
5223
5224         /* metadata extent, check the obvious case first */
5225         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5226                                  BTRFS_BLOCK_GROUP_METADATA))) {
5227                 rec->wrong_chunk_type = 1;
5228                 return;
5229         }
5230
5231         /*
5232          * Check SYSTEM extent, as it's also marked as metadata, we can only
5233          * make sure it's a SYSTEM extent by its backref
5234          */
5235         if (!list_empty(&rec->backrefs)) {
5236                 struct extent_backref *node;
5237                 struct tree_backref *tback;
5238                 u64 bg_type;
5239
5240                 node = to_extent_backref(rec->backrefs.next);
5241                 if (node->is_data) {
5242                         /* tree block shouldn't have data backref */
5243                         rec->wrong_chunk_type = 1;
5244                         return;
5245                 }
5246                 tback = container_of(node, struct tree_backref, node);
5247
5248                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5249                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5250                 else
5251                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5252                 if (!(bg_cache->flags & bg_type))
5253                         rec->wrong_chunk_type = 1;
5254         }
5255 }
5256
5257 /*
5258  * Allocate a new extent record, fill default values from @tmpl and insert int
5259  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5260  * the cache, otherwise it fails.
5261  */
5262 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5263                 struct extent_record *tmpl)
5264 {
5265         struct extent_record *rec;
5266         int ret = 0;
5267
5268         rec = malloc(sizeof(*rec));
5269         if (!rec)
5270                 return -ENOMEM;
5271         rec->start = tmpl->start;
5272         rec->max_size = tmpl->max_size;
5273         rec->nr = max(tmpl->nr, tmpl->max_size);
5274         rec->found_rec = tmpl->found_rec;
5275         rec->content_checked = tmpl->content_checked;
5276         rec->owner_ref_checked = tmpl->owner_ref_checked;
5277         rec->num_duplicates = 0;
5278         rec->metadata = tmpl->metadata;
5279         rec->flag_block_full_backref = FLAG_UNSET;
5280         rec->bad_full_backref = 0;
5281         rec->crossing_stripes = 0;
5282         rec->wrong_chunk_type = 0;
5283         rec->is_root = tmpl->is_root;
5284         rec->refs = tmpl->refs;
5285         rec->extent_item_refs = tmpl->extent_item_refs;
5286         rec->parent_generation = tmpl->parent_generation;
5287         INIT_LIST_HEAD(&rec->backrefs);
5288         INIT_LIST_HEAD(&rec->dups);
5289         INIT_LIST_HEAD(&rec->list);
5290         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5291         rec->cache.start = tmpl->start;
5292         rec->cache.size = tmpl->nr;
5293         ret = insert_cache_extent(extent_cache, &rec->cache);
5294         if (ret) {
5295                 free(rec);
5296                 return ret;
5297         }
5298         bytes_used += rec->nr;
5299
5300         if (tmpl->metadata)
5301                 rec->crossing_stripes = check_crossing_stripes(global_info,
5302                                 rec->start, global_info->tree_root->nodesize);
5303         check_extent_type(rec);
5304         return ret;
5305 }
5306
5307 /*
5308  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5309  * some are hints:
5310  * - refs              - if found, increase refs
5311  * - is_root           - if found, set
5312  * - content_checked   - if found, set
5313  * - owner_ref_checked - if found, set
5314  *
5315  * If not found, create a new one, initialize and insert.
5316  */
5317 static int add_extent_rec(struct cache_tree *extent_cache,
5318                 struct extent_record *tmpl)
5319 {
5320         struct extent_record *rec;
5321         struct cache_extent *cache;
5322         int ret = 0;
5323         int dup = 0;
5324
5325         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5326         if (cache) {
5327                 rec = container_of(cache, struct extent_record, cache);
5328                 if (tmpl->refs)
5329                         rec->refs++;
5330                 if (rec->nr == 1)
5331                         rec->nr = max(tmpl->nr, tmpl->max_size);
5332
5333                 /*
5334                  * We need to make sure to reset nr to whatever the extent
5335                  * record says was the real size, this way we can compare it to
5336                  * the backrefs.
5337                  */
5338                 if (tmpl->found_rec) {
5339                         if (tmpl->start != rec->start || rec->found_rec) {
5340                                 struct extent_record *tmp;
5341
5342                                 dup = 1;
5343                                 if (list_empty(&rec->list))
5344                                         list_add_tail(&rec->list,
5345                                                       &duplicate_extents);
5346
5347                                 /*
5348                                  * We have to do this song and dance in case we
5349                                  * find an extent record that falls inside of
5350                                  * our current extent record but does not have
5351                                  * the same objectid.
5352                                  */
5353                                 tmp = malloc(sizeof(*tmp));
5354                                 if (!tmp)
5355                                         return -ENOMEM;
5356                                 tmp->start = tmpl->start;
5357                                 tmp->max_size = tmpl->max_size;
5358                                 tmp->nr = tmpl->nr;
5359                                 tmp->found_rec = 1;
5360                                 tmp->metadata = tmpl->metadata;
5361                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5362                                 INIT_LIST_HEAD(&tmp->list);
5363                                 list_add_tail(&tmp->list, &rec->dups);
5364                                 rec->num_duplicates++;
5365                         } else {
5366                                 rec->nr = tmpl->nr;
5367                                 rec->found_rec = 1;
5368                         }
5369                 }
5370
5371                 if (tmpl->extent_item_refs && !dup) {
5372                         if (rec->extent_item_refs) {
5373                                 fprintf(stderr, "block %llu rec "
5374                                         "extent_item_refs %llu, passed %llu\n",
5375                                         (unsigned long long)tmpl->start,
5376                                         (unsigned long long)
5377                                                         rec->extent_item_refs,
5378                                         (unsigned long long)tmpl->extent_item_refs);
5379                         }
5380                         rec->extent_item_refs = tmpl->extent_item_refs;
5381                 }
5382                 if (tmpl->is_root)
5383                         rec->is_root = 1;
5384                 if (tmpl->content_checked)
5385                         rec->content_checked = 1;
5386                 if (tmpl->owner_ref_checked)
5387                         rec->owner_ref_checked = 1;
5388                 memcpy(&rec->parent_key, &tmpl->parent_key,
5389                                 sizeof(tmpl->parent_key));
5390                 if (tmpl->parent_generation)
5391                         rec->parent_generation = tmpl->parent_generation;
5392                 if (rec->max_size < tmpl->max_size)
5393                         rec->max_size = tmpl->max_size;
5394
5395                 /*
5396                  * A metadata extent can't cross stripe_len boundary, otherwise
5397                  * kernel scrub won't be able to handle it.
5398                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5399                  * it.
5400                  */
5401                 if (tmpl->metadata)
5402                         rec->crossing_stripes = check_crossing_stripes(
5403                                         global_info, rec->start,
5404                                         global_info->tree_root->nodesize);
5405                 check_extent_type(rec);
5406                 maybe_free_extent_rec(extent_cache, rec);
5407                 return ret;
5408         }
5409
5410         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5411
5412         return ret;
5413 }
5414
5415 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5416                             u64 parent, u64 root, int found_ref)
5417 {
5418         struct extent_record *rec;
5419         struct tree_backref *back;
5420         struct cache_extent *cache;
5421         int ret;
5422
5423         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5424         if (!cache) {
5425                 struct extent_record tmpl;
5426
5427                 memset(&tmpl, 0, sizeof(tmpl));
5428                 tmpl.start = bytenr;
5429                 tmpl.nr = 1;
5430                 tmpl.metadata = 1;
5431
5432                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5433                 if (ret)
5434                         return ret;
5435
5436                 /* really a bug in cache_extent implement now */
5437                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5438                 if (!cache)
5439                         return -ENOENT;
5440         }
5441
5442         rec = container_of(cache, struct extent_record, cache);
5443         if (rec->start != bytenr) {
5444                 /*
5445                  * Several cause, from unaligned bytenr to over lapping extents
5446                  */
5447                 return -EEXIST;
5448         }
5449
5450         back = find_tree_backref(rec, parent, root);
5451         if (!back) {
5452                 back = alloc_tree_backref(rec, parent, root);
5453                 if (!back)
5454                         return -ENOMEM;
5455         }
5456
5457         if (found_ref) {
5458                 if (back->node.found_ref) {
5459                         fprintf(stderr, "Extent back ref already exists "
5460                                 "for %llu parent %llu root %llu \n",
5461                                 (unsigned long long)bytenr,
5462                                 (unsigned long long)parent,
5463                                 (unsigned long long)root);
5464                 }
5465                 back->node.found_ref = 1;
5466         } else {
5467                 if (back->node.found_extent_tree) {
5468                         fprintf(stderr, "Extent back ref already exists "
5469                                 "for %llu parent %llu root %llu \n",
5470                                 (unsigned long long)bytenr,
5471                                 (unsigned long long)parent,
5472                                 (unsigned long long)root);
5473                 }
5474                 back->node.found_extent_tree = 1;
5475         }
5476         check_extent_type(rec);
5477         maybe_free_extent_rec(extent_cache, rec);
5478         return 0;
5479 }
5480
5481 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5482                             u64 parent, u64 root, u64 owner, u64 offset,
5483                             u32 num_refs, int found_ref, u64 max_size)
5484 {
5485         struct extent_record *rec;
5486         struct data_backref *back;
5487         struct cache_extent *cache;
5488         int ret;
5489
5490         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5491         if (!cache) {
5492                 struct extent_record tmpl;
5493
5494                 memset(&tmpl, 0, sizeof(tmpl));
5495                 tmpl.start = bytenr;
5496                 tmpl.nr = 1;
5497                 tmpl.max_size = max_size;
5498
5499                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5500                 if (ret)
5501                         return ret;
5502
5503                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5504                 if (!cache)
5505                         abort();
5506         }
5507
5508         rec = container_of(cache, struct extent_record, cache);
5509         if (rec->max_size < max_size)
5510                 rec->max_size = max_size;
5511
5512         /*
5513          * If found_ref is set then max_size is the real size and must match the
5514          * existing refs.  So if we have already found a ref then we need to
5515          * make sure that this ref matches the existing one, otherwise we need
5516          * to add a new backref so we can notice that the backrefs don't match
5517          * and we need to figure out who is telling the truth.  This is to
5518          * account for that awful fsync bug I introduced where we'd end up with
5519          * a btrfs_file_extent_item that would have its length include multiple
5520          * prealloc extents or point inside of a prealloc extent.
5521          */
5522         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5523                                  bytenr, max_size);
5524         if (!back) {
5525                 back = alloc_data_backref(rec, parent, root, owner, offset,
5526                                           max_size);
5527                 BUG_ON(!back);
5528         }
5529
5530         if (found_ref) {
5531                 BUG_ON(num_refs != 1);
5532                 if (back->node.found_ref)
5533                         BUG_ON(back->bytes != max_size);
5534                 back->node.found_ref = 1;
5535                 back->found_ref += 1;
5536                 back->bytes = max_size;
5537                 back->disk_bytenr = bytenr;
5538                 rec->refs += 1;
5539                 rec->content_checked = 1;
5540                 rec->owner_ref_checked = 1;
5541         } else {
5542                 if (back->node.found_extent_tree) {
5543                         fprintf(stderr, "Extent back ref already exists "
5544                                 "for %llu parent %llu root %llu "
5545                                 "owner %llu offset %llu num_refs %lu\n",
5546                                 (unsigned long long)bytenr,
5547                                 (unsigned long long)parent,
5548                                 (unsigned long long)root,
5549                                 (unsigned long long)owner,
5550                                 (unsigned long long)offset,
5551                                 (unsigned long)num_refs);
5552                 }
5553                 back->num_refs = num_refs;
5554                 back->node.found_extent_tree = 1;
5555         }
5556         maybe_free_extent_rec(extent_cache, rec);
5557         return 0;
5558 }
5559
5560 static int add_pending(struct cache_tree *pending,
5561                        struct cache_tree *seen, u64 bytenr, u32 size)
5562 {
5563         int ret;
5564         ret = add_cache_extent(seen, bytenr, size);
5565         if (ret)
5566                 return ret;
5567         add_cache_extent(pending, bytenr, size);
5568         return 0;
5569 }
5570
5571 static int pick_next_pending(struct cache_tree *pending,
5572                         struct cache_tree *reada,
5573                         struct cache_tree *nodes,
5574                         u64 last, struct block_info *bits, int bits_nr,
5575                         int *reada_bits)
5576 {
5577         unsigned long node_start = last;
5578         struct cache_extent *cache;
5579         int ret;
5580
5581         cache = search_cache_extent(reada, 0);
5582         if (cache) {
5583                 bits[0].start = cache->start;
5584                 bits[0].size = cache->size;
5585                 *reada_bits = 1;
5586                 return 1;
5587         }
5588         *reada_bits = 0;
5589         if (node_start > 32768)
5590                 node_start -= 32768;
5591
5592         cache = search_cache_extent(nodes, node_start);
5593         if (!cache)
5594                 cache = search_cache_extent(nodes, 0);
5595
5596         if (!cache) {
5597                  cache = search_cache_extent(pending, 0);
5598                  if (!cache)
5599                          return 0;
5600                  ret = 0;
5601                  do {
5602                          bits[ret].start = cache->start;
5603                          bits[ret].size = cache->size;
5604                          cache = next_cache_extent(cache);
5605                          ret++;
5606                  } while (cache && ret < bits_nr);
5607                  return ret;
5608         }
5609
5610         ret = 0;
5611         do {
5612                 bits[ret].start = cache->start;
5613                 bits[ret].size = cache->size;
5614                 cache = next_cache_extent(cache);
5615                 ret++;
5616         } while (cache && ret < bits_nr);
5617
5618         if (bits_nr - ret > 8) {
5619                 u64 lookup = bits[0].start + bits[0].size;
5620                 struct cache_extent *next;
5621                 next = search_cache_extent(pending, lookup);
5622                 while(next) {
5623                         if (next->start - lookup > 32768)
5624                                 break;
5625                         bits[ret].start = next->start;
5626                         bits[ret].size = next->size;
5627                         lookup = next->start + next->size;
5628                         ret++;
5629                         if (ret == bits_nr)
5630                                 break;
5631                         next = next_cache_extent(next);
5632                         if (!next)
5633                                 break;
5634                 }
5635         }
5636         return ret;
5637 }
5638
5639 static void free_chunk_record(struct cache_extent *cache)
5640 {
5641         struct chunk_record *rec;
5642
5643         rec = container_of(cache, struct chunk_record, cache);
5644         list_del_init(&rec->list);
5645         list_del_init(&rec->dextents);
5646         free(rec);
5647 }
5648
5649 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5650 {
5651         cache_tree_free_extents(chunk_cache, free_chunk_record);
5652 }
5653
5654 static void free_device_record(struct rb_node *node)
5655 {
5656         struct device_record *rec;
5657
5658         rec = container_of(node, struct device_record, node);
5659         free(rec);
5660 }
5661
5662 FREE_RB_BASED_TREE(device_cache, free_device_record);
5663
5664 int insert_block_group_record(struct block_group_tree *tree,
5665                               struct block_group_record *bg_rec)
5666 {
5667         int ret;
5668
5669         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5670         if (ret)
5671                 return ret;
5672
5673         list_add_tail(&bg_rec->list, &tree->block_groups);
5674         return 0;
5675 }
5676
5677 static void free_block_group_record(struct cache_extent *cache)
5678 {
5679         struct block_group_record *rec;
5680
5681         rec = container_of(cache, struct block_group_record, cache);
5682         list_del_init(&rec->list);
5683         free(rec);
5684 }
5685
5686 void free_block_group_tree(struct block_group_tree *tree)
5687 {
5688         cache_tree_free_extents(&tree->tree, free_block_group_record);
5689 }
5690
5691 int insert_device_extent_record(struct device_extent_tree *tree,
5692                                 struct device_extent_record *de_rec)
5693 {
5694         int ret;
5695
5696         /*
5697          * Device extent is a bit different from the other extents, because
5698          * the extents which belong to the different devices may have the
5699          * same start and size, so we need use the special extent cache
5700          * search/insert functions.
5701          */
5702         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5703         if (ret)
5704                 return ret;
5705
5706         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5707         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5708         return 0;
5709 }
5710
5711 static void free_device_extent_record(struct cache_extent *cache)
5712 {
5713         struct device_extent_record *rec;
5714
5715         rec = container_of(cache, struct device_extent_record, cache);
5716         if (!list_empty(&rec->chunk_list))
5717                 list_del_init(&rec->chunk_list);
5718         if (!list_empty(&rec->device_list))
5719                 list_del_init(&rec->device_list);
5720         free(rec);
5721 }
5722
5723 void free_device_extent_tree(struct device_extent_tree *tree)
5724 {
5725         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5726 }
5727
5728 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5729 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5730                                  struct extent_buffer *leaf, int slot)
5731 {
5732         struct btrfs_extent_ref_v0 *ref0;
5733         struct btrfs_key key;
5734         int ret;
5735
5736         btrfs_item_key_to_cpu(leaf, &key, slot);
5737         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5738         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5739                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5740                                 0, 0);
5741         } else {
5742                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5743                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5744         }
5745         return ret;
5746 }
5747 #endif
5748
5749 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5750                                             struct btrfs_key *key,
5751                                             int slot)
5752 {
5753         struct btrfs_chunk *ptr;
5754         struct chunk_record *rec;
5755         int num_stripes, i;
5756
5757         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5758         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5759
5760         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5761         if (!rec) {
5762                 fprintf(stderr, "memory allocation failed\n");
5763                 exit(-1);
5764         }
5765
5766         INIT_LIST_HEAD(&rec->list);
5767         INIT_LIST_HEAD(&rec->dextents);
5768         rec->bg_rec = NULL;
5769
5770         rec->cache.start = key->offset;
5771         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5772
5773         rec->generation = btrfs_header_generation(leaf);
5774
5775         rec->objectid = key->objectid;
5776         rec->type = key->type;
5777         rec->offset = key->offset;
5778
5779         rec->length = rec->cache.size;
5780         rec->owner = btrfs_chunk_owner(leaf, ptr);
5781         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5782         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5783         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5784         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5785         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5786         rec->num_stripes = num_stripes;
5787         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5788
5789         for (i = 0; i < rec->num_stripes; ++i) {
5790                 rec->stripes[i].devid =
5791                         btrfs_stripe_devid_nr(leaf, ptr, i);
5792                 rec->stripes[i].offset =
5793                         btrfs_stripe_offset_nr(leaf, ptr, i);
5794                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5795                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5796                                 BTRFS_UUID_SIZE);
5797         }
5798
5799         return rec;
5800 }
5801
5802 static int process_chunk_item(struct cache_tree *chunk_cache,
5803                               struct btrfs_key *key, struct extent_buffer *eb,
5804                               int slot)
5805 {
5806         struct chunk_record *rec;
5807         struct btrfs_chunk *chunk;
5808         int ret = 0;
5809
5810         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5811         /*
5812          * Do extra check for this chunk item,
5813          *
5814          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5815          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5816          * and owner<->key_type check.
5817          */
5818         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5819                                       key->offset);
5820         if (ret < 0) {
5821                 error("chunk(%llu, %llu) is not valid, ignore it",
5822                       key->offset, btrfs_chunk_length(eb, chunk));
5823                 return 0;
5824         }
5825         rec = btrfs_new_chunk_record(eb, key, slot);
5826         ret = insert_cache_extent(chunk_cache, &rec->cache);
5827         if (ret) {
5828                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5829                         rec->offset, rec->length);
5830                 free(rec);
5831         }
5832
5833         return ret;
5834 }
5835
5836 static int process_device_item(struct rb_root *dev_cache,
5837                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5838 {
5839         struct btrfs_dev_item *ptr;
5840         struct device_record *rec;
5841         int ret = 0;
5842
5843         ptr = btrfs_item_ptr(eb,
5844                 slot, struct btrfs_dev_item);
5845
5846         rec = malloc(sizeof(*rec));
5847         if (!rec) {
5848                 fprintf(stderr, "memory allocation failed\n");
5849                 return -ENOMEM;
5850         }
5851
5852         rec->devid = key->offset;
5853         rec->generation = btrfs_header_generation(eb);
5854
5855         rec->objectid = key->objectid;
5856         rec->type = key->type;
5857         rec->offset = key->offset;
5858
5859         rec->devid = btrfs_device_id(eb, ptr);
5860         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5861         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5862
5863         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5864         if (ret) {
5865                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5866                 free(rec);
5867         }
5868
5869         return ret;
5870 }
5871
5872 struct block_group_record *
5873 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5874                              int slot)
5875 {
5876         struct btrfs_block_group_item *ptr;
5877         struct block_group_record *rec;
5878
5879         rec = calloc(1, sizeof(*rec));
5880         if (!rec) {
5881                 fprintf(stderr, "memory allocation failed\n");
5882                 exit(-1);
5883         }
5884
5885         rec->cache.start = key->objectid;
5886         rec->cache.size = key->offset;
5887
5888         rec->generation = btrfs_header_generation(leaf);
5889
5890         rec->objectid = key->objectid;
5891         rec->type = key->type;
5892         rec->offset = key->offset;
5893
5894         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5895         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5896
5897         INIT_LIST_HEAD(&rec->list);
5898
5899         return rec;
5900 }
5901
5902 static int process_block_group_item(struct block_group_tree *block_group_cache,
5903                                     struct btrfs_key *key,
5904                                     struct extent_buffer *eb, int slot)
5905 {
5906         struct block_group_record *rec;
5907         int ret = 0;
5908
5909         rec = btrfs_new_block_group_record(eb, key, slot);
5910         ret = insert_block_group_record(block_group_cache, rec);
5911         if (ret) {
5912                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5913                         rec->objectid, rec->offset);
5914                 free(rec);
5915         }
5916
5917         return ret;
5918 }
5919
5920 struct device_extent_record *
5921 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5922                                struct btrfs_key *key, int slot)
5923 {
5924         struct device_extent_record *rec;
5925         struct btrfs_dev_extent *ptr;
5926
5927         rec = calloc(1, sizeof(*rec));
5928         if (!rec) {
5929                 fprintf(stderr, "memory allocation failed\n");
5930                 exit(-1);
5931         }
5932
5933         rec->cache.objectid = key->objectid;
5934         rec->cache.start = key->offset;
5935
5936         rec->generation = btrfs_header_generation(leaf);
5937
5938         rec->objectid = key->objectid;
5939         rec->type = key->type;
5940         rec->offset = key->offset;
5941
5942         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5943         rec->chunk_objecteid =
5944                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5945         rec->chunk_offset =
5946                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5947         rec->length = btrfs_dev_extent_length(leaf, ptr);
5948         rec->cache.size = rec->length;
5949
5950         INIT_LIST_HEAD(&rec->chunk_list);
5951         INIT_LIST_HEAD(&rec->device_list);
5952
5953         return rec;
5954 }
5955
5956 static int
5957 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5958                            struct btrfs_key *key, struct extent_buffer *eb,
5959                            int slot)
5960 {
5961         struct device_extent_record *rec;
5962         int ret;
5963
5964         rec = btrfs_new_device_extent_record(eb, key, slot);
5965         ret = insert_device_extent_record(dev_extent_cache, rec);
5966         if (ret) {
5967                 fprintf(stderr,
5968                         "Device extent[%llu, %llu, %llu] existed.\n",
5969                         rec->objectid, rec->offset, rec->length);
5970                 free(rec);
5971         }
5972
5973         return ret;
5974 }
5975
5976 static int process_extent_item(struct btrfs_root *root,
5977                                struct cache_tree *extent_cache,
5978                                struct extent_buffer *eb, int slot)
5979 {
5980         struct btrfs_extent_item *ei;
5981         struct btrfs_extent_inline_ref *iref;
5982         struct btrfs_extent_data_ref *dref;
5983         struct btrfs_shared_data_ref *sref;
5984         struct btrfs_key key;
5985         struct extent_record tmpl;
5986         unsigned long end;
5987         unsigned long ptr;
5988         int ret;
5989         int type;
5990         u32 item_size = btrfs_item_size_nr(eb, slot);
5991         u64 refs = 0;
5992         u64 offset;
5993         u64 num_bytes;
5994         int metadata = 0;
5995
5996         btrfs_item_key_to_cpu(eb, &key, slot);
5997
5998         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5999                 metadata = 1;
6000                 num_bytes = root->nodesize;
6001         } else {
6002                 num_bytes = key.offset;
6003         }
6004
6005         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6006                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6007                       key.objectid, root->sectorsize);
6008                 return -EIO;
6009         }
6010         if (item_size < sizeof(*ei)) {
6011 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6012                 struct btrfs_extent_item_v0 *ei0;
6013                 BUG_ON(item_size != sizeof(*ei0));
6014                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6015                 refs = btrfs_extent_refs_v0(eb, ei0);
6016 #else
6017                 BUG();
6018 #endif
6019                 memset(&tmpl, 0, sizeof(tmpl));
6020                 tmpl.start = key.objectid;
6021                 tmpl.nr = num_bytes;
6022                 tmpl.extent_item_refs = refs;
6023                 tmpl.metadata = metadata;
6024                 tmpl.found_rec = 1;
6025                 tmpl.max_size = num_bytes;
6026
6027                 return add_extent_rec(extent_cache, &tmpl);
6028         }
6029
6030         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6031         refs = btrfs_extent_refs(eb, ei);
6032         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6033                 metadata = 1;
6034         else
6035                 metadata = 0;
6036         if (metadata && num_bytes != root->nodesize) {
6037                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6038                       num_bytes, root->nodesize);
6039                 return -EIO;
6040         }
6041         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6042                 error("ignore invalid data extent, length %llu is not aligned to %u",
6043                       num_bytes, root->sectorsize);
6044                 return -EIO;
6045         }
6046
6047         memset(&tmpl, 0, sizeof(tmpl));
6048         tmpl.start = key.objectid;
6049         tmpl.nr = num_bytes;
6050         tmpl.extent_item_refs = refs;
6051         tmpl.metadata = metadata;
6052         tmpl.found_rec = 1;
6053         tmpl.max_size = num_bytes;
6054         add_extent_rec(extent_cache, &tmpl);
6055
6056         ptr = (unsigned long)(ei + 1);
6057         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6058             key.type == BTRFS_EXTENT_ITEM_KEY)
6059                 ptr += sizeof(struct btrfs_tree_block_info);
6060
6061         end = (unsigned long)ei + item_size;
6062         while (ptr < end) {
6063                 iref = (struct btrfs_extent_inline_ref *)ptr;
6064                 type = btrfs_extent_inline_ref_type(eb, iref);
6065                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6066                 switch (type) {
6067                 case BTRFS_TREE_BLOCK_REF_KEY:
6068                         ret = add_tree_backref(extent_cache, key.objectid,
6069                                         0, offset, 0);
6070                         if (ret < 0)
6071                                 error("add_tree_backref failed: %s",
6072                                       strerror(-ret));
6073                         break;
6074                 case BTRFS_SHARED_BLOCK_REF_KEY:
6075                         ret = add_tree_backref(extent_cache, key.objectid,
6076                                         offset, 0, 0);
6077                         if (ret < 0)
6078                                 error("add_tree_backref failed: %s",
6079                                       strerror(-ret));
6080                         break;
6081                 case BTRFS_EXTENT_DATA_REF_KEY:
6082                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6083                         add_data_backref(extent_cache, key.objectid, 0,
6084                                         btrfs_extent_data_ref_root(eb, dref),
6085                                         btrfs_extent_data_ref_objectid(eb,
6086                                                                        dref),
6087                                         btrfs_extent_data_ref_offset(eb, dref),
6088                                         btrfs_extent_data_ref_count(eb, dref),
6089                                         0, num_bytes);
6090                         break;
6091                 case BTRFS_SHARED_DATA_REF_KEY:
6092                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6093                         add_data_backref(extent_cache, key.objectid, offset,
6094                                         0, 0, 0,
6095                                         btrfs_shared_data_ref_count(eb, sref),
6096                                         0, num_bytes);
6097                         break;
6098                 default:
6099                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6100                                 key.objectid, key.type, num_bytes);
6101                         goto out;
6102                 }
6103                 ptr += btrfs_extent_inline_ref_size(type);
6104         }
6105         WARN_ON(ptr > end);
6106 out:
6107         return 0;
6108 }
6109
6110 static int check_cache_range(struct btrfs_root *root,
6111                              struct btrfs_block_group_cache *cache,
6112                              u64 offset, u64 bytes)
6113 {
6114         struct btrfs_free_space *entry;
6115         u64 *logical;
6116         u64 bytenr;
6117         int stripe_len;
6118         int i, nr, ret;
6119
6120         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6121                 bytenr = btrfs_sb_offset(i);
6122                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6123                                        cache->key.objectid, bytenr, 0,
6124                                        &logical, &nr, &stripe_len);
6125                 if (ret)
6126                         return ret;
6127
6128                 while (nr--) {
6129                         if (logical[nr] + stripe_len <= offset)
6130                                 continue;
6131                         if (offset + bytes <= logical[nr])
6132                                 continue;
6133                         if (logical[nr] == offset) {
6134                                 if (stripe_len >= bytes) {
6135                                         free(logical);
6136                                         return 0;
6137                                 }
6138                                 bytes -= stripe_len;
6139                                 offset += stripe_len;
6140                         } else if (logical[nr] < offset) {
6141                                 if (logical[nr] + stripe_len >=
6142                                     offset + bytes) {
6143                                         free(logical);
6144                                         return 0;
6145                                 }
6146                                 bytes = (offset + bytes) -
6147                                         (logical[nr] + stripe_len);
6148                                 offset = logical[nr] + stripe_len;
6149                         } else {
6150                                 /*
6151                                  * Could be tricky, the super may land in the
6152                                  * middle of the area we're checking.  First
6153                                  * check the easiest case, it's at the end.
6154                                  */
6155                                 if (logical[nr] + stripe_len >=
6156                                     bytes + offset) {
6157                                         bytes = logical[nr] - offset;
6158                                         continue;
6159                                 }
6160
6161                                 /* Check the left side */
6162                                 ret = check_cache_range(root, cache,
6163                                                         offset,
6164                                                         logical[nr] - offset);
6165                                 if (ret) {
6166                                         free(logical);
6167                                         return ret;
6168                                 }
6169
6170                                 /* Now we continue with the right side */
6171                                 bytes = (offset + bytes) -
6172                                         (logical[nr] + stripe_len);
6173                                 offset = logical[nr] + stripe_len;
6174                         }
6175                 }
6176
6177                 free(logical);
6178         }
6179
6180         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6181         if (!entry) {
6182                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6183                         offset, offset+bytes);
6184                 return -EINVAL;
6185         }
6186
6187         if (entry->offset != offset) {
6188                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6189                         entry->offset);
6190                 return -EINVAL;
6191         }
6192
6193         if (entry->bytes != bytes) {
6194                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6195                         bytes, entry->bytes, offset);
6196                 return -EINVAL;
6197         }
6198
6199         unlink_free_space(cache->free_space_ctl, entry);
6200         free(entry);
6201         return 0;
6202 }
6203
6204 static int verify_space_cache(struct btrfs_root *root,
6205                               struct btrfs_block_group_cache *cache)
6206 {
6207         struct btrfs_path path;
6208         struct extent_buffer *leaf;
6209         struct btrfs_key key;
6210         u64 last;
6211         int ret = 0;
6212
6213         root = root->fs_info->extent_root;
6214
6215         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6216
6217         btrfs_init_path(&path);
6218         key.objectid = last;
6219         key.offset = 0;
6220         key.type = BTRFS_EXTENT_ITEM_KEY;
6221         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6222         if (ret < 0)
6223                 goto out;
6224         ret = 0;
6225         while (1) {
6226                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6227                         ret = btrfs_next_leaf(root, &path);
6228                         if (ret < 0)
6229                                 goto out;
6230                         if (ret > 0) {
6231                                 ret = 0;
6232                                 break;
6233                         }
6234                 }
6235                 leaf = path.nodes[0];
6236                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6237                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6238                         break;
6239                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6240                     key.type != BTRFS_METADATA_ITEM_KEY) {
6241                         path.slots[0]++;
6242                         continue;
6243                 }
6244
6245                 if (last == key.objectid) {
6246                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6247                                 last = key.objectid + key.offset;
6248                         else
6249                                 last = key.objectid + root->nodesize;
6250                         path.slots[0]++;
6251                         continue;
6252                 }
6253
6254                 ret = check_cache_range(root, cache, last,
6255                                         key.objectid - last);
6256                 if (ret)
6257                         break;
6258                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6259                         last = key.objectid + key.offset;
6260                 else
6261                         last = key.objectid + root->nodesize;
6262                 path.slots[0]++;
6263         }
6264
6265         if (last < cache->key.objectid + cache->key.offset)
6266                 ret = check_cache_range(root, cache, last,
6267                                         cache->key.objectid +
6268                                         cache->key.offset - last);
6269
6270 out:
6271         btrfs_release_path(&path);
6272
6273         if (!ret &&
6274             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6275                 fprintf(stderr, "There are still entries left in the space "
6276                         "cache\n");
6277                 ret = -EINVAL;
6278         }
6279
6280         return ret;
6281 }
6282
6283 static int check_space_cache(struct btrfs_root *root)
6284 {
6285         struct btrfs_block_group_cache *cache;
6286         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6287         int ret;
6288         int error = 0;
6289
6290         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6291             btrfs_super_generation(root->fs_info->super_copy) !=
6292             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6293                 printf("cache and super generation don't match, space cache "
6294                        "will be invalidated\n");
6295                 return 0;
6296         }
6297
6298         if (ctx.progress_enabled) {
6299                 ctx.tp = TASK_FREE_SPACE;
6300                 task_start(ctx.info);
6301         }
6302
6303         while (1) {
6304                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6305                 if (!cache)
6306                         break;
6307
6308                 start = cache->key.objectid + cache->key.offset;
6309                 if (!cache->free_space_ctl) {
6310                         if (btrfs_init_free_space_ctl(cache,
6311                                                       root->sectorsize)) {
6312                                 ret = -ENOMEM;
6313                                 break;
6314                         }
6315                 } else {
6316                         btrfs_remove_free_space_cache(cache);
6317                 }
6318
6319                 if (btrfs_fs_compat_ro(root->fs_info,
6320                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6321                         ret = exclude_super_stripes(root, cache);
6322                         if (ret) {
6323                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6324                                         strerror(-ret));
6325                                 error++;
6326                                 continue;
6327                         }
6328                         ret = load_free_space_tree(root->fs_info, cache);
6329                         free_excluded_extents(root, cache);
6330                         if (ret < 0) {
6331                                 fprintf(stderr, "could not load free space tree: %s\n",
6332                                         strerror(-ret));
6333                                 error++;
6334                                 continue;
6335                         }
6336                         error += ret;
6337                 } else {
6338                         ret = load_free_space_cache(root->fs_info, cache);
6339                         if (!ret)
6340                                 continue;
6341                 }
6342
6343                 ret = verify_space_cache(root, cache);
6344                 if (ret) {
6345                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6346                                 cache->key.objectid);
6347                         error++;
6348                 }
6349         }
6350
6351         task_stop(ctx.info);
6352
6353         return error ? -EINVAL : 0;
6354 }
6355
6356 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6357                         u64 num_bytes, unsigned long leaf_offset,
6358                         struct extent_buffer *eb) {
6359
6360         u64 offset = 0;
6361         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6362         char *data;
6363         unsigned long csum_offset;
6364         u32 csum;
6365         u32 csum_expected;
6366         u64 read_len;
6367         u64 data_checked = 0;
6368         u64 tmp;
6369         int ret = 0;
6370         int mirror;
6371         int num_copies;
6372
6373         if (num_bytes % root->sectorsize)
6374                 return -EINVAL;
6375
6376         data = malloc(num_bytes);
6377         if (!data)
6378                 return -ENOMEM;
6379
6380         while (offset < num_bytes) {
6381                 mirror = 0;
6382 again:
6383                 read_len = num_bytes - offset;
6384                 /* read as much space once a time */
6385                 ret = read_extent_data(root, data + offset,
6386                                 bytenr + offset, &read_len, mirror);
6387                 if (ret)
6388                         goto out;
6389                 data_checked = 0;
6390                 /* verify every 4k data's checksum */
6391                 while (data_checked < read_len) {
6392                         csum = ~(u32)0;
6393                         tmp = offset + data_checked;
6394
6395                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6396                                                csum, root->sectorsize);
6397                         btrfs_csum_final(csum, (u8 *)&csum);
6398
6399                         csum_offset = leaf_offset +
6400                                  tmp / root->sectorsize * csum_size;
6401                         read_extent_buffer(eb, (char *)&csum_expected,
6402                                            csum_offset, csum_size);
6403                         /* try another mirror */
6404                         if (csum != csum_expected) {
6405                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6406                                                 mirror, bytenr + tmp,
6407                                                 csum, csum_expected);
6408                                 num_copies = btrfs_num_copies(
6409                                                 &root->fs_info->mapping_tree,
6410                                                 bytenr, num_bytes);
6411                                 if (mirror < num_copies - 1) {
6412                                         mirror += 1;
6413                                         goto again;
6414                                 }
6415                         }
6416                         data_checked += root->sectorsize;
6417                 }
6418                 offset += read_len;
6419         }
6420 out:
6421         free(data);
6422         return ret;
6423 }
6424
6425 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6426                                u64 num_bytes)
6427 {
6428         struct btrfs_path path;
6429         struct extent_buffer *leaf;
6430         struct btrfs_key key;
6431         int ret;
6432
6433         btrfs_init_path(&path);
6434         key.objectid = bytenr;
6435         key.type = BTRFS_EXTENT_ITEM_KEY;
6436         key.offset = (u64)-1;
6437
6438 again:
6439         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6440                                 0, 0);
6441         if (ret < 0) {
6442                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6443                 btrfs_release_path(&path);
6444                 return ret;
6445         } else if (ret) {
6446                 if (path.slots[0] > 0) {
6447                         path.slots[0]--;
6448                 } else {
6449                         ret = btrfs_prev_leaf(root, &path);
6450                         if (ret < 0) {
6451                                 goto out;
6452                         } else if (ret > 0) {
6453                                 ret = 0;
6454                                 goto out;
6455                         }
6456                 }
6457         }
6458
6459         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6460
6461         /*
6462          * Block group items come before extent items if they have the same
6463          * bytenr, so walk back one more just in case.  Dear future traveller,
6464          * first congrats on mastering time travel.  Now if it's not too much
6465          * trouble could you go back to 2006 and tell Chris to make the
6466          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6467          * EXTENT_ITEM_KEY please?
6468          */
6469         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6470                 if (path.slots[0] > 0) {
6471                         path.slots[0]--;
6472                 } else {
6473                         ret = btrfs_prev_leaf(root, &path);
6474                         if (ret < 0) {
6475                                 goto out;
6476                         } else if (ret > 0) {
6477                                 ret = 0;
6478                                 goto out;
6479                         }
6480                 }
6481                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6482         }
6483
6484         while (num_bytes) {
6485                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6486                         ret = btrfs_next_leaf(root, &path);
6487                         if (ret < 0) {
6488                                 fprintf(stderr, "Error going to next leaf "
6489                                         "%d\n", ret);
6490                                 btrfs_release_path(&path);
6491                                 return ret;
6492                         } else if (ret) {
6493                                 break;
6494                         }
6495                 }
6496                 leaf = path.nodes[0];
6497                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6498                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6499                         path.slots[0]++;
6500                         continue;
6501                 }
6502                 if (key.objectid + key.offset < bytenr) {
6503                         path.slots[0]++;
6504                         continue;
6505                 }
6506                 if (key.objectid > bytenr + num_bytes)
6507                         break;
6508
6509                 if (key.objectid == bytenr) {
6510                         if (key.offset >= num_bytes) {
6511                                 num_bytes = 0;
6512                                 break;
6513                         }
6514                         num_bytes -= key.offset;
6515                         bytenr += key.offset;
6516                 } else if (key.objectid < bytenr) {
6517                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6518                                 num_bytes = 0;
6519                                 break;
6520                         }
6521                         num_bytes = (bytenr + num_bytes) -
6522                                 (key.objectid + key.offset);
6523                         bytenr = key.objectid + key.offset;
6524                 } else {
6525                         if (key.objectid + key.offset < bytenr + num_bytes) {
6526                                 u64 new_start = key.objectid + key.offset;
6527                                 u64 new_bytes = bytenr + num_bytes - new_start;
6528
6529                                 /*
6530                                  * Weird case, the extent is in the middle of
6531                                  * our range, we'll have to search one side
6532                                  * and then the other.  Not sure if this happens
6533                                  * in real life, but no harm in coding it up
6534                                  * anyway just in case.
6535                                  */
6536                                 btrfs_release_path(&path);
6537                                 ret = check_extent_exists(root, new_start,
6538                                                           new_bytes);
6539                                 if (ret) {
6540                                         fprintf(stderr, "Right section didn't "
6541                                                 "have a record\n");
6542                                         break;
6543                                 }
6544                                 num_bytes = key.objectid - bytenr;
6545                                 goto again;
6546                         }
6547                         num_bytes = key.objectid - bytenr;
6548                 }
6549                 path.slots[0]++;
6550         }
6551         ret = 0;
6552
6553 out:
6554         if (num_bytes && !ret) {
6555                 fprintf(stderr, "There are no extents for csum range "
6556                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6557                 ret = 1;
6558         }
6559
6560         btrfs_release_path(&path);
6561         return ret;
6562 }
6563
6564 static int check_csums(struct btrfs_root *root)
6565 {
6566         struct btrfs_path path;
6567         struct extent_buffer *leaf;
6568         struct btrfs_key key;
6569         u64 offset = 0, num_bytes = 0;
6570         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6571         int errors = 0;
6572         int ret;
6573         u64 data_len;
6574         unsigned long leaf_offset;
6575
6576         root = root->fs_info->csum_root;
6577         if (!extent_buffer_uptodate(root->node)) {
6578                 fprintf(stderr, "No valid csum tree found\n");
6579                 return -ENOENT;
6580         }
6581
6582         btrfs_init_path(&path);
6583         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6584         key.type = BTRFS_EXTENT_CSUM_KEY;
6585         key.offset = 0;
6586         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6587         if (ret < 0) {
6588                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6589                 btrfs_release_path(&path);
6590                 return ret;
6591         }
6592
6593         if (ret > 0 && path.slots[0])
6594                 path.slots[0]--;
6595         ret = 0;
6596
6597         while (1) {
6598                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6599                         ret = btrfs_next_leaf(root, &path);
6600                         if (ret < 0) {
6601                                 fprintf(stderr, "Error going to next leaf "
6602                                         "%d\n", ret);
6603                                 break;
6604                         }
6605                         if (ret)
6606                                 break;
6607                 }
6608                 leaf = path.nodes[0];
6609
6610                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6611                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6612                         path.slots[0]++;
6613                         continue;
6614                 }
6615
6616                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6617                               csum_size) * root->sectorsize;
6618                 if (!check_data_csum)
6619                         goto skip_csum_check;
6620                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6621                 ret = check_extent_csums(root, key.offset, data_len,
6622                                          leaf_offset, leaf);
6623                 if (ret)
6624                         break;
6625 skip_csum_check:
6626                 if (!num_bytes) {
6627                         offset = key.offset;
6628                 } else if (key.offset != offset + num_bytes) {
6629                         ret = check_extent_exists(root, offset, num_bytes);
6630                         if (ret) {
6631                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6632                                         "there is no extent record\n",
6633                                         offset, offset+num_bytes);
6634                                 errors++;
6635                         }
6636                         offset = key.offset;
6637                         num_bytes = 0;
6638                 }
6639                 num_bytes += data_len;
6640                 path.slots[0]++;
6641         }
6642
6643         btrfs_release_path(&path);
6644         return errors;
6645 }
6646
6647 static int is_dropped_key(struct btrfs_key *key,
6648                           struct btrfs_key *drop_key) {
6649         if (key->objectid < drop_key->objectid)
6650                 return 1;
6651         else if (key->objectid == drop_key->objectid) {
6652                 if (key->type < drop_key->type)
6653                         return 1;
6654                 else if (key->type == drop_key->type) {
6655                         if (key->offset < drop_key->offset)
6656                                 return 1;
6657                 }
6658         }
6659         return 0;
6660 }
6661
6662 /*
6663  * Here are the rules for FULL_BACKREF.
6664  *
6665  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6666  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6667  *      FULL_BACKREF set.
6668  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6669  *    if it happened after the relocation occurred since we'll have dropped the
6670  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6671  *    have no real way to know for sure.
6672  *
6673  * We process the blocks one root at a time, and we start from the lowest root
6674  * objectid and go to the highest.  So we can just lookup the owner backref for
6675  * the record and if we don't find it then we know it doesn't exist and we have
6676  * a FULL BACKREF.
6677  *
6678  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6679  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6680  * be set or not and then we can check later once we've gathered all the refs.
6681  */
6682 static int calc_extent_flag(struct btrfs_root *root,
6683                            struct cache_tree *extent_cache,
6684                            struct extent_buffer *buf,
6685                            struct root_item_record *ri,
6686                            u64 *flags)
6687 {
6688         struct extent_record *rec;
6689         struct cache_extent *cache;
6690         struct tree_backref *tback;
6691         u64 owner = 0;
6692
6693         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6694         /* we have added this extent before */
6695         if (!cache)
6696                 return -ENOENT;
6697
6698         rec = container_of(cache, struct extent_record, cache);
6699
6700         /*
6701          * Except file/reloc tree, we can not have
6702          * FULL BACKREF MODE
6703          */
6704         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6705                 goto normal;
6706         /*
6707          * root node
6708          */
6709         if (buf->start == ri->bytenr)
6710                 goto normal;
6711
6712         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6713                 goto full_backref;
6714
6715         owner = btrfs_header_owner(buf);
6716         if (owner == ri->objectid)
6717                 goto normal;
6718
6719         tback = find_tree_backref(rec, 0, owner);
6720         if (!tback)
6721                 goto full_backref;
6722 normal:
6723         *flags = 0;
6724         if (rec->flag_block_full_backref != FLAG_UNSET &&
6725             rec->flag_block_full_backref != 0)
6726                 rec->bad_full_backref = 1;
6727         return 0;
6728 full_backref:
6729         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6730         if (rec->flag_block_full_backref != FLAG_UNSET &&
6731             rec->flag_block_full_backref != 1)
6732                 rec->bad_full_backref = 1;
6733         return 0;
6734 }
6735
6736 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6737 {
6738         fprintf(stderr, "Invalid key type(");
6739         print_key_type(stderr, 0, key_type);
6740         fprintf(stderr, ") found in root(");
6741         print_objectid(stderr, rootid, 0);
6742         fprintf(stderr, ")\n");
6743 }
6744
6745 /*
6746  * Check if the key is valid with its extent buffer.
6747  *
6748  * This is a early check in case invalid key exists in a extent buffer
6749  * This is not comprehensive yet, but should prevent wrong key/item passed
6750  * further
6751  */
6752 static int check_type_with_root(u64 rootid, u8 key_type)
6753 {
6754         switch (key_type) {
6755         /* Only valid in chunk tree */
6756         case BTRFS_DEV_ITEM_KEY:
6757         case BTRFS_CHUNK_ITEM_KEY:
6758                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6759                         goto err;
6760                 break;
6761         /* valid in csum and log tree */
6762         case BTRFS_CSUM_TREE_OBJECTID:
6763                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6764                       is_fstree(rootid)))
6765                         goto err;
6766                 break;
6767         case BTRFS_EXTENT_ITEM_KEY:
6768         case BTRFS_METADATA_ITEM_KEY:
6769         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6770                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6771                         goto err;
6772                 break;
6773         case BTRFS_ROOT_ITEM_KEY:
6774                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6775                         goto err;
6776                 break;
6777         case BTRFS_DEV_EXTENT_KEY:
6778                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6779                         goto err;
6780                 break;
6781         }
6782         return 0;
6783 err:
6784         report_mismatch_key_root(key_type, rootid);
6785         return -EINVAL;
6786 }
6787
6788 static int run_next_block(struct btrfs_root *root,
6789                           struct block_info *bits,
6790                           int bits_nr,
6791                           u64 *last,
6792                           struct cache_tree *pending,
6793                           struct cache_tree *seen,
6794                           struct cache_tree *reada,
6795                           struct cache_tree *nodes,
6796                           struct cache_tree *extent_cache,
6797                           struct cache_tree *chunk_cache,
6798                           struct rb_root *dev_cache,
6799                           struct block_group_tree *block_group_cache,
6800                           struct device_extent_tree *dev_extent_cache,
6801                           struct root_item_record *ri)
6802 {
6803         struct extent_buffer *buf;
6804         struct extent_record *rec = NULL;
6805         u64 bytenr;
6806         u32 size;
6807         u64 parent;
6808         u64 owner;
6809         u64 flags;
6810         u64 ptr;
6811         u64 gen = 0;
6812         int ret = 0;
6813         int i;
6814         int nritems;
6815         struct btrfs_key key;
6816         struct cache_extent *cache;
6817         int reada_bits;
6818
6819         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6820                                     bits_nr, &reada_bits);
6821         if (nritems == 0)
6822                 return 1;
6823
6824         if (!reada_bits) {
6825                 for(i = 0; i < nritems; i++) {
6826                         ret = add_cache_extent(reada, bits[i].start,
6827                                                bits[i].size);
6828                         if (ret == -EEXIST)
6829                                 continue;
6830
6831                         /* fixme, get the parent transid */
6832                         readahead_tree_block(root, bits[i].start,
6833                                              bits[i].size, 0);
6834                 }
6835         }
6836         *last = bits[0].start;
6837         bytenr = bits[0].start;
6838         size = bits[0].size;
6839
6840         cache = lookup_cache_extent(pending, bytenr, size);
6841         if (cache) {
6842                 remove_cache_extent(pending, cache);
6843                 free(cache);
6844         }
6845         cache = lookup_cache_extent(reada, bytenr, size);
6846         if (cache) {
6847                 remove_cache_extent(reada, cache);
6848                 free(cache);
6849         }
6850         cache = lookup_cache_extent(nodes, bytenr, size);
6851         if (cache) {
6852                 remove_cache_extent(nodes, cache);
6853                 free(cache);
6854         }
6855         cache = lookup_cache_extent(extent_cache, bytenr, size);
6856         if (cache) {
6857                 rec = container_of(cache, struct extent_record, cache);
6858                 gen = rec->parent_generation;
6859         }
6860
6861         /* fixme, get the real parent transid */
6862         buf = read_tree_block(root, bytenr, size, gen);
6863         if (!extent_buffer_uptodate(buf)) {
6864                 record_bad_block_io(root->fs_info,
6865                                     extent_cache, bytenr, size);
6866                 goto out;
6867         }
6868
6869         nritems = btrfs_header_nritems(buf);
6870
6871         flags = 0;
6872         if (!init_extent_tree) {
6873                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6874                                        btrfs_header_level(buf), 1, NULL,
6875                                        &flags);
6876                 if (ret < 0) {
6877                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6878                         if (ret < 0) {
6879                                 fprintf(stderr, "Couldn't calc extent flags\n");
6880                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6881                         }
6882                 }
6883         } else {
6884                 flags = 0;
6885                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6886                 if (ret < 0) {
6887                         fprintf(stderr, "Couldn't calc extent flags\n");
6888                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6889                 }
6890         }
6891
6892         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6893                 if (ri != NULL &&
6894                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6895                     ri->objectid == btrfs_header_owner(buf)) {
6896                         /*
6897                          * Ok we got to this block from it's original owner and
6898                          * we have FULL_BACKREF set.  Relocation can leave
6899                          * converted blocks over so this is altogether possible,
6900                          * however it's not possible if the generation > the
6901                          * last snapshot, so check for this case.
6902                          */
6903                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6904                             btrfs_header_generation(buf) > ri->last_snapshot) {
6905                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6906                                 rec->bad_full_backref = 1;
6907                         }
6908                 }
6909         } else {
6910                 if (ri != NULL &&
6911                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6912                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6913                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6914                         rec->bad_full_backref = 1;
6915                 }
6916         }
6917
6918         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6919                 rec->flag_block_full_backref = 1;
6920                 parent = bytenr;
6921                 owner = 0;
6922         } else {
6923                 rec->flag_block_full_backref = 0;
6924                 parent = 0;
6925                 owner = btrfs_header_owner(buf);
6926         }
6927
6928         ret = check_block(root, extent_cache, buf, flags);
6929         if (ret)
6930                 goto out;
6931
6932         if (btrfs_is_leaf(buf)) {
6933                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6934                 for (i = 0; i < nritems; i++) {
6935                         struct btrfs_file_extent_item *fi;
6936                         btrfs_item_key_to_cpu(buf, &key, i);
6937                         /*
6938                          * Check key type against the leaf owner.
6939                          * Could filter quite a lot of early error if
6940                          * owner is correct
6941                          */
6942                         if (check_type_with_root(btrfs_header_owner(buf),
6943                                                  key.type)) {
6944                                 fprintf(stderr, "ignoring invalid key\n");
6945                                 continue;
6946                         }
6947                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6948                                 process_extent_item(root, extent_cache, buf,
6949                                                     i);
6950                                 continue;
6951                         }
6952                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6953                                 process_extent_item(root, extent_cache, buf,
6954                                                     i);
6955                                 continue;
6956                         }
6957                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6958                                 total_csum_bytes +=
6959                                         btrfs_item_size_nr(buf, i);
6960                                 continue;
6961                         }
6962                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6963                                 process_chunk_item(chunk_cache, &key, buf, i);
6964                                 continue;
6965                         }
6966                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6967                                 process_device_item(dev_cache, &key, buf, i);
6968                                 continue;
6969                         }
6970                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6971                                 process_block_group_item(block_group_cache,
6972                                         &key, buf, i);
6973                                 continue;
6974                         }
6975                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6976                                 process_device_extent_item(dev_extent_cache,
6977                                         &key, buf, i);
6978                                 continue;
6979
6980                         }
6981                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6982 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6983                                 process_extent_ref_v0(extent_cache, buf, i);
6984 #else
6985                                 BUG();
6986 #endif
6987                                 continue;
6988                         }
6989
6990                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6991                                 ret = add_tree_backref(extent_cache,
6992                                                 key.objectid, 0, key.offset, 0);
6993                                 if (ret < 0)
6994                                         error("add_tree_backref failed: %s",
6995                                               strerror(-ret));
6996                                 continue;
6997                         }
6998                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6999                                 ret = add_tree_backref(extent_cache,
7000                                                 key.objectid, key.offset, 0, 0);
7001                                 if (ret < 0)
7002                                         error("add_tree_backref failed: %s",
7003                                               strerror(-ret));
7004                                 continue;
7005                         }
7006                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7007                                 struct btrfs_extent_data_ref *ref;
7008                                 ref = btrfs_item_ptr(buf, i,
7009                                                 struct btrfs_extent_data_ref);
7010                                 add_data_backref(extent_cache,
7011                                         key.objectid, 0,
7012                                         btrfs_extent_data_ref_root(buf, ref),
7013                                         btrfs_extent_data_ref_objectid(buf,
7014                                                                        ref),
7015                                         btrfs_extent_data_ref_offset(buf, ref),
7016                                         btrfs_extent_data_ref_count(buf, ref),
7017                                         0, root->sectorsize);
7018                                 continue;
7019                         }
7020                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7021                                 struct btrfs_shared_data_ref *ref;
7022                                 ref = btrfs_item_ptr(buf, i,
7023                                                 struct btrfs_shared_data_ref);
7024                                 add_data_backref(extent_cache,
7025                                         key.objectid, key.offset, 0, 0, 0,
7026                                         btrfs_shared_data_ref_count(buf, ref),
7027                                         0, root->sectorsize);
7028                                 continue;
7029                         }
7030                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7031                                 struct bad_item *bad;
7032
7033                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7034                                         continue;
7035                                 if (!owner)
7036                                         continue;
7037                                 bad = malloc(sizeof(struct bad_item));
7038                                 if (!bad)
7039                                         continue;
7040                                 INIT_LIST_HEAD(&bad->list);
7041                                 memcpy(&bad->key, &key,
7042                                        sizeof(struct btrfs_key));
7043                                 bad->root_id = owner;
7044                                 list_add_tail(&bad->list, &delete_items);
7045                                 continue;
7046                         }
7047                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7048                                 continue;
7049                         fi = btrfs_item_ptr(buf, i,
7050                                             struct btrfs_file_extent_item);
7051                         if (btrfs_file_extent_type(buf, fi) ==
7052                             BTRFS_FILE_EXTENT_INLINE)
7053                                 continue;
7054                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7055                                 continue;
7056
7057                         data_bytes_allocated +=
7058                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7059                         if (data_bytes_allocated < root->sectorsize) {
7060                                 abort();
7061                         }
7062                         data_bytes_referenced +=
7063                                 btrfs_file_extent_num_bytes(buf, fi);
7064                         add_data_backref(extent_cache,
7065                                 btrfs_file_extent_disk_bytenr(buf, fi),
7066                                 parent, owner, key.objectid, key.offset -
7067                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7068                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7069                 }
7070         } else {
7071                 int level;
7072                 struct btrfs_key first_key;
7073
7074                 first_key.objectid = 0;
7075
7076                 if (nritems > 0)
7077                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7078                 level = btrfs_header_level(buf);
7079                 for (i = 0; i < nritems; i++) {
7080                         struct extent_record tmpl;
7081
7082                         ptr = btrfs_node_blockptr(buf, i);
7083                         size = root->nodesize;
7084                         btrfs_node_key_to_cpu(buf, &key, i);
7085                         if (ri != NULL) {
7086                                 if ((level == ri->drop_level)
7087                                     && is_dropped_key(&key, &ri->drop_key)) {
7088                                         continue;
7089                                 }
7090                         }
7091
7092                         memset(&tmpl, 0, sizeof(tmpl));
7093                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7094                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7095                         tmpl.start = ptr;
7096                         tmpl.nr = size;
7097                         tmpl.refs = 1;
7098                         tmpl.metadata = 1;
7099                         tmpl.max_size = size;
7100                         ret = add_extent_rec(extent_cache, &tmpl);
7101                         if (ret < 0)
7102                                 goto out;
7103
7104                         ret = add_tree_backref(extent_cache, ptr, parent,
7105                                         owner, 1);
7106                         if (ret < 0) {
7107                                 error("add_tree_backref failed: %s",
7108                                       strerror(-ret));
7109                                 continue;
7110                         }
7111
7112                         if (level > 1) {
7113                                 add_pending(nodes, seen, ptr, size);
7114                         } else {
7115                                 add_pending(pending, seen, ptr, size);
7116                         }
7117                 }
7118                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7119                                       nritems) * sizeof(struct btrfs_key_ptr);
7120         }
7121         total_btree_bytes += buf->len;
7122         if (fs_root_objectid(btrfs_header_owner(buf)))
7123                 total_fs_tree_bytes += buf->len;
7124         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7125                 total_extent_tree_bytes += buf->len;
7126         if (!found_old_backref &&
7127             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7128             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7129             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7130                 found_old_backref = 1;
7131 out:
7132         free_extent_buffer(buf);
7133         return ret;
7134 }
7135
7136 static int add_root_to_pending(struct extent_buffer *buf,
7137                                struct cache_tree *extent_cache,
7138                                struct cache_tree *pending,
7139                                struct cache_tree *seen,
7140                                struct cache_tree *nodes,
7141                                u64 objectid)
7142 {
7143         struct extent_record tmpl;
7144         int ret;
7145
7146         if (btrfs_header_level(buf) > 0)
7147                 add_pending(nodes, seen, buf->start, buf->len);
7148         else
7149                 add_pending(pending, seen, buf->start, buf->len);
7150
7151         memset(&tmpl, 0, sizeof(tmpl));
7152         tmpl.start = buf->start;
7153         tmpl.nr = buf->len;
7154         tmpl.is_root = 1;
7155         tmpl.refs = 1;
7156         tmpl.metadata = 1;
7157         tmpl.max_size = buf->len;
7158         add_extent_rec(extent_cache, &tmpl);
7159
7160         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7161             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7162                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7163                                 0, 1);
7164         else
7165                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7166                                 1);
7167         return ret;
7168 }
7169
7170 /* as we fix the tree, we might be deleting blocks that
7171  * we're tracking for repair.  This hook makes sure we
7172  * remove any backrefs for blocks as we are fixing them.
7173  */
7174 static int free_extent_hook(struct btrfs_trans_handle *trans,
7175                             struct btrfs_root *root,
7176                             u64 bytenr, u64 num_bytes, u64 parent,
7177                             u64 root_objectid, u64 owner, u64 offset,
7178                             int refs_to_drop)
7179 {
7180         struct extent_record *rec;
7181         struct cache_extent *cache;
7182         int is_data;
7183         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7184
7185         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7186         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7187         if (!cache)
7188                 return 0;
7189
7190         rec = container_of(cache, struct extent_record, cache);
7191         if (is_data) {
7192                 struct data_backref *back;
7193                 back = find_data_backref(rec, parent, root_objectid, owner,
7194                                          offset, 1, bytenr, num_bytes);
7195                 if (!back)
7196                         goto out;
7197                 if (back->node.found_ref) {
7198                         back->found_ref -= refs_to_drop;
7199                         if (rec->refs)
7200                                 rec->refs -= refs_to_drop;
7201                 }
7202                 if (back->node.found_extent_tree) {
7203                         back->num_refs -= refs_to_drop;
7204                         if (rec->extent_item_refs)
7205                                 rec->extent_item_refs -= refs_to_drop;
7206                 }
7207                 if (back->found_ref == 0)
7208                         back->node.found_ref = 0;
7209                 if (back->num_refs == 0)
7210                         back->node.found_extent_tree = 0;
7211
7212                 if (!back->node.found_extent_tree && back->node.found_ref) {
7213                         list_del(&back->node.list);
7214                         free(back);
7215                 }
7216         } else {
7217                 struct tree_backref *back;
7218                 back = find_tree_backref(rec, parent, root_objectid);
7219                 if (!back)
7220                         goto out;
7221                 if (back->node.found_ref) {
7222                         if (rec->refs)
7223                                 rec->refs--;
7224                         back->node.found_ref = 0;
7225                 }
7226                 if (back->node.found_extent_tree) {
7227                         if (rec->extent_item_refs)
7228                                 rec->extent_item_refs--;
7229                         back->node.found_extent_tree = 0;
7230                 }
7231                 if (!back->node.found_extent_tree && back->node.found_ref) {
7232                         list_del(&back->node.list);
7233                         free(back);
7234                 }
7235         }
7236         maybe_free_extent_rec(extent_cache, rec);
7237 out:
7238         return 0;
7239 }
7240
7241 static int delete_extent_records(struct btrfs_trans_handle *trans,
7242                                  struct btrfs_root *root,
7243                                  struct btrfs_path *path,
7244                                  u64 bytenr, u64 new_len)
7245 {
7246         struct btrfs_key key;
7247         struct btrfs_key found_key;
7248         struct extent_buffer *leaf;
7249         int ret;
7250         int slot;
7251
7252
7253         key.objectid = bytenr;
7254         key.type = (u8)-1;
7255         key.offset = (u64)-1;
7256
7257         while(1) {
7258                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7259                                         &key, path, 0, 1);
7260                 if (ret < 0)
7261                         break;
7262
7263                 if (ret > 0) {
7264                         ret = 0;
7265                         if (path->slots[0] == 0)
7266                                 break;
7267                         path->slots[0]--;
7268                 }
7269                 ret = 0;
7270
7271                 leaf = path->nodes[0];
7272                 slot = path->slots[0];
7273
7274                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7275                 if (found_key.objectid != bytenr)
7276                         break;
7277
7278                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7279                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7280                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7281                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7282                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7283                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7284                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7285                         btrfs_release_path(path);
7286                         if (found_key.type == 0) {
7287                                 if (found_key.offset == 0)
7288                                         break;
7289                                 key.offset = found_key.offset - 1;
7290                                 key.type = found_key.type;
7291                         }
7292                         key.type = found_key.type - 1;
7293                         key.offset = (u64)-1;
7294                         continue;
7295                 }
7296
7297                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7298                         found_key.objectid, found_key.type, found_key.offset);
7299
7300                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7301                 if (ret)
7302                         break;
7303                 btrfs_release_path(path);
7304
7305                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7306                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7307                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7308                                 found_key.offset : root->nodesize;
7309
7310                         ret = btrfs_update_block_group(trans, root, bytenr,
7311                                                        bytes, 0, 0);
7312                         if (ret)
7313                                 break;
7314                 }
7315         }
7316
7317         btrfs_release_path(path);
7318         return ret;
7319 }
7320
7321 /*
7322  * for a single backref, this will allocate a new extent
7323  * and add the backref to it.
7324  */
7325 static int record_extent(struct btrfs_trans_handle *trans,
7326                          struct btrfs_fs_info *info,
7327                          struct btrfs_path *path,
7328                          struct extent_record *rec,
7329                          struct extent_backref *back,
7330                          int allocated, u64 flags)
7331 {
7332         int ret;
7333         struct btrfs_root *extent_root = info->extent_root;
7334         struct extent_buffer *leaf;
7335         struct btrfs_key ins_key;
7336         struct btrfs_extent_item *ei;
7337         struct data_backref *dback;
7338         struct btrfs_tree_block_info *bi;
7339
7340         if (!back->is_data)
7341                 rec->max_size = max_t(u64, rec->max_size,
7342                                     info->extent_root->nodesize);
7343
7344         if (!allocated) {
7345                 u32 item_size = sizeof(*ei);
7346
7347                 if (!back->is_data)
7348                         item_size += sizeof(*bi);
7349
7350                 ins_key.objectid = rec->start;
7351                 ins_key.offset = rec->max_size;
7352                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7353
7354                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7355                                         &ins_key, item_size);
7356                 if (ret)
7357                         goto fail;
7358
7359                 leaf = path->nodes[0];
7360                 ei = btrfs_item_ptr(leaf, path->slots[0],
7361                                     struct btrfs_extent_item);
7362
7363                 btrfs_set_extent_refs(leaf, ei, 0);
7364                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7365
7366                 if (back->is_data) {
7367                         btrfs_set_extent_flags(leaf, ei,
7368                                                BTRFS_EXTENT_FLAG_DATA);
7369                 } else {
7370                         struct btrfs_disk_key copy_key;;
7371
7372                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7373                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7374                                              sizeof(*bi));
7375
7376                         btrfs_set_disk_key_objectid(&copy_key,
7377                                                     rec->info_objectid);
7378                         btrfs_set_disk_key_type(&copy_key, 0);
7379                         btrfs_set_disk_key_offset(&copy_key, 0);
7380
7381                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7382                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7383
7384                         btrfs_set_extent_flags(leaf, ei,
7385                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7386                 }
7387
7388                 btrfs_mark_buffer_dirty(leaf);
7389                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7390                                                rec->max_size, 1, 0);
7391                 if (ret)
7392                         goto fail;
7393                 btrfs_release_path(path);
7394         }
7395
7396         if (back->is_data) {
7397                 u64 parent;
7398                 int i;
7399
7400                 dback = to_data_backref(back);
7401                 if (back->full_backref)
7402                         parent = dback->parent;
7403                 else
7404                         parent = 0;
7405
7406                 for (i = 0; i < dback->found_ref; i++) {
7407                         /* if parent != 0, we're doing a full backref
7408                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7409                          * just makes the backref allocator create a data
7410                          * backref
7411                          */
7412                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7413                                                    rec->start, rec->max_size,
7414                                                    parent,
7415                                                    dback->root,
7416                                                    parent ?
7417                                                    BTRFS_FIRST_FREE_OBJECTID :
7418                                                    dback->owner,
7419                                                    dback->offset);
7420                         if (ret)
7421                                 break;
7422                 }
7423                 fprintf(stderr, "adding new data backref"
7424                                 " on %llu %s %llu owner %llu"
7425                                 " offset %llu found %d\n",
7426                                 (unsigned long long)rec->start,
7427                                 back->full_backref ?
7428                                 "parent" : "root",
7429                                 back->full_backref ?
7430                                 (unsigned long long)parent :
7431                                 (unsigned long long)dback->root,
7432                                 (unsigned long long)dback->owner,
7433                                 (unsigned long long)dback->offset,
7434                                 dback->found_ref);
7435         } else {
7436                 u64 parent;
7437                 struct tree_backref *tback;
7438
7439                 tback = to_tree_backref(back);
7440                 if (back->full_backref)
7441                         parent = tback->parent;
7442                 else
7443                         parent = 0;
7444
7445                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7446                                            rec->start, rec->max_size,
7447                                            parent, tback->root, 0, 0);
7448                 fprintf(stderr, "adding new tree backref on "
7449                         "start %llu len %llu parent %llu root %llu\n",
7450                         rec->start, rec->max_size, parent, tback->root);
7451         }
7452 fail:
7453         btrfs_release_path(path);
7454         return ret;
7455 }
7456
7457 static struct extent_entry *find_entry(struct list_head *entries,
7458                                        u64 bytenr, u64 bytes)
7459 {
7460         struct extent_entry *entry = NULL;
7461
7462         list_for_each_entry(entry, entries, list) {
7463                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7464                         return entry;
7465         }
7466
7467         return NULL;
7468 }
7469
7470 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7471 {
7472         struct extent_entry *entry, *best = NULL, *prev = NULL;
7473
7474         list_for_each_entry(entry, entries, list) {
7475                 /*
7476                  * If there are as many broken entries as entries then we know
7477                  * not to trust this particular entry.
7478                  */
7479                 if (entry->broken == entry->count)
7480                         continue;
7481
7482                 /*
7483                  * Special case, when there are only two entries and 'best' is
7484                  * the first one
7485                  */
7486                 if (!prev) {
7487                         best = entry;
7488                         prev = entry;
7489                         continue;
7490                 }
7491
7492                 /*
7493                  * If our current entry == best then we can't be sure our best
7494                  * is really the best, so we need to keep searching.
7495                  */
7496                 if (best && best->count == entry->count) {
7497                         prev = entry;
7498                         best = NULL;
7499                         continue;
7500                 }
7501
7502                 /* Prev == entry, not good enough, have to keep searching */
7503                 if (!prev->broken && prev->count == entry->count)
7504                         continue;
7505
7506                 if (!best)
7507                         best = (prev->count > entry->count) ? prev : entry;
7508                 else if (best->count < entry->count)
7509                         best = entry;
7510                 prev = entry;
7511         }
7512
7513         return best;
7514 }
7515
7516 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7517                       struct data_backref *dback, struct extent_entry *entry)
7518 {
7519         struct btrfs_trans_handle *trans;
7520         struct btrfs_root *root;
7521         struct btrfs_file_extent_item *fi;
7522         struct extent_buffer *leaf;
7523         struct btrfs_key key;
7524         u64 bytenr, bytes;
7525         int ret, err;
7526
7527         key.objectid = dback->root;
7528         key.type = BTRFS_ROOT_ITEM_KEY;
7529         key.offset = (u64)-1;
7530         root = btrfs_read_fs_root(info, &key);
7531         if (IS_ERR(root)) {
7532                 fprintf(stderr, "Couldn't find root for our ref\n");
7533                 return -EINVAL;
7534         }
7535
7536         /*
7537          * The backref points to the original offset of the extent if it was
7538          * split, so we need to search down to the offset we have and then walk
7539          * forward until we find the backref we're looking for.
7540          */
7541         key.objectid = dback->owner;
7542         key.type = BTRFS_EXTENT_DATA_KEY;
7543         key.offset = dback->offset;
7544         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7545         if (ret < 0) {
7546                 fprintf(stderr, "Error looking up ref %d\n", ret);
7547                 return ret;
7548         }
7549
7550         while (1) {
7551                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7552                         ret = btrfs_next_leaf(root, path);
7553                         if (ret) {
7554                                 fprintf(stderr, "Couldn't find our ref, next\n");
7555                                 return -EINVAL;
7556                         }
7557                 }
7558                 leaf = path->nodes[0];
7559                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7560                 if (key.objectid != dback->owner ||
7561                     key.type != BTRFS_EXTENT_DATA_KEY) {
7562                         fprintf(stderr, "Couldn't find our ref, search\n");
7563                         return -EINVAL;
7564                 }
7565                 fi = btrfs_item_ptr(leaf, path->slots[0],
7566                                     struct btrfs_file_extent_item);
7567                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7568                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7569
7570                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7571                         break;
7572                 path->slots[0]++;
7573         }
7574
7575         btrfs_release_path(path);
7576
7577         trans = btrfs_start_transaction(root, 1);
7578         if (IS_ERR(trans))
7579                 return PTR_ERR(trans);
7580
7581         /*
7582          * Ok we have the key of the file extent we want to fix, now we can cow
7583          * down to the thing and fix it.
7584          */
7585         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7586         if (ret < 0) {
7587                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7588                         key.objectid, key.type, key.offset, ret);
7589                 goto out;
7590         }
7591         if (ret > 0) {
7592                 fprintf(stderr, "Well that's odd, we just found this key "
7593                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7594                         key.offset);
7595                 ret = -EINVAL;
7596                 goto out;
7597         }
7598         leaf = path->nodes[0];
7599         fi = btrfs_item_ptr(leaf, path->slots[0],
7600                             struct btrfs_file_extent_item);
7601
7602         if (btrfs_file_extent_compression(leaf, fi) &&
7603             dback->disk_bytenr != entry->bytenr) {
7604                 fprintf(stderr, "Ref doesn't match the record start and is "
7605                         "compressed, please take a btrfs-image of this file "
7606                         "system and send it to a btrfs developer so they can "
7607                         "complete this functionality for bytenr %Lu\n",
7608                         dback->disk_bytenr);
7609                 ret = -EINVAL;
7610                 goto out;
7611         }
7612
7613         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7614                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7615         } else if (dback->disk_bytenr > entry->bytenr) {
7616                 u64 off_diff, offset;
7617
7618                 off_diff = dback->disk_bytenr - entry->bytenr;
7619                 offset = btrfs_file_extent_offset(leaf, fi);
7620                 if (dback->disk_bytenr + offset +
7621                     btrfs_file_extent_num_bytes(leaf, fi) >
7622                     entry->bytenr + entry->bytes) {
7623                         fprintf(stderr, "Ref is past the entry end, please "
7624                                 "take a btrfs-image of this file system and "
7625                                 "send it to a btrfs developer, ref %Lu\n",
7626                                 dback->disk_bytenr);
7627                         ret = -EINVAL;
7628                         goto out;
7629                 }
7630                 offset += off_diff;
7631                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7632                 btrfs_set_file_extent_offset(leaf, fi, offset);
7633         } else if (dback->disk_bytenr < entry->bytenr) {
7634                 u64 offset;
7635
7636                 offset = btrfs_file_extent_offset(leaf, fi);
7637                 if (dback->disk_bytenr + offset < entry->bytenr) {
7638                         fprintf(stderr, "Ref is before the entry start, please"
7639                                 " take a btrfs-image of this file system and "
7640                                 "send it to a btrfs developer, ref %Lu\n",
7641                                 dback->disk_bytenr);
7642                         ret = -EINVAL;
7643                         goto out;
7644                 }
7645
7646                 offset += dback->disk_bytenr;
7647                 offset -= entry->bytenr;
7648                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7649                 btrfs_set_file_extent_offset(leaf, fi, offset);
7650         }
7651
7652         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7653
7654         /*
7655          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7656          * only do this if we aren't using compression, otherwise it's a
7657          * trickier case.
7658          */
7659         if (!btrfs_file_extent_compression(leaf, fi))
7660                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7661         else
7662                 printf("ram bytes may be wrong?\n");
7663         btrfs_mark_buffer_dirty(leaf);
7664 out:
7665         err = btrfs_commit_transaction(trans, root);
7666         btrfs_release_path(path);
7667         return ret ? ret : err;
7668 }
7669
7670 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7671                            struct extent_record *rec)
7672 {
7673         struct extent_backref *back;
7674         struct data_backref *dback;
7675         struct extent_entry *entry, *best = NULL;
7676         LIST_HEAD(entries);
7677         int nr_entries = 0;
7678         int broken_entries = 0;
7679         int ret = 0;
7680         short mismatch = 0;
7681
7682         /*
7683          * Metadata is easy and the backrefs should always agree on bytenr and
7684          * size, if not we've got bigger issues.
7685          */
7686         if (rec->metadata)
7687                 return 0;
7688
7689         list_for_each_entry(back, &rec->backrefs, list) {
7690                 if (back->full_backref || !back->is_data)
7691                         continue;
7692
7693                 dback = to_data_backref(back);
7694
7695                 /*
7696                  * We only pay attention to backrefs that we found a real
7697                  * backref for.
7698                  */
7699                 if (dback->found_ref == 0)
7700                         continue;
7701
7702                 /*
7703                  * For now we only catch when the bytes don't match, not the
7704                  * bytenr.  We can easily do this at the same time, but I want
7705                  * to have a fs image to test on before we just add repair
7706                  * functionality willy-nilly so we know we won't screw up the
7707                  * repair.
7708                  */
7709
7710                 entry = find_entry(&entries, dback->disk_bytenr,
7711                                    dback->bytes);
7712                 if (!entry) {
7713                         entry = malloc(sizeof(struct extent_entry));
7714                         if (!entry) {
7715                                 ret = -ENOMEM;
7716                                 goto out;
7717                         }
7718                         memset(entry, 0, sizeof(*entry));
7719                         entry->bytenr = dback->disk_bytenr;
7720                         entry->bytes = dback->bytes;
7721                         list_add_tail(&entry->list, &entries);
7722                         nr_entries++;
7723                 }
7724
7725                 /*
7726                  * If we only have on entry we may think the entries agree when
7727                  * in reality they don't so we have to do some extra checking.
7728                  */
7729                 if (dback->disk_bytenr != rec->start ||
7730                     dback->bytes != rec->nr || back->broken)
7731                         mismatch = 1;
7732
7733                 if (back->broken) {
7734                         entry->broken++;
7735                         broken_entries++;
7736                 }
7737
7738                 entry->count++;
7739         }
7740
7741         /* Yay all the backrefs agree, carry on good sir */
7742         if (nr_entries <= 1 && !mismatch)
7743                 goto out;
7744
7745         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7746                 "%Lu\n", rec->start);
7747
7748         /*
7749          * First we want to see if the backrefs can agree amongst themselves who
7750          * is right, so figure out which one of the entries has the highest
7751          * count.
7752          */
7753         best = find_most_right_entry(&entries);
7754
7755         /*
7756          * Ok so we may have an even split between what the backrefs think, so
7757          * this is where we use the extent ref to see what it thinks.
7758          */
7759         if (!best) {
7760                 entry = find_entry(&entries, rec->start, rec->nr);
7761                 if (!entry && (!broken_entries || !rec->found_rec)) {
7762                         fprintf(stderr, "Backrefs don't agree with each other "
7763                                 "and extent record doesn't agree with anybody,"
7764                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7765                                 rec->start, rec->nr);
7766                         ret = -EINVAL;
7767                         goto out;
7768                 } else if (!entry) {
7769                         /*
7770                          * Ok our backrefs were broken, we'll assume this is the
7771                          * correct value and add an entry for this range.
7772                          */
7773                         entry = malloc(sizeof(struct extent_entry));
7774                         if (!entry) {
7775                                 ret = -ENOMEM;
7776                                 goto out;
7777                         }
7778                         memset(entry, 0, sizeof(*entry));
7779                         entry->bytenr = rec->start;
7780                         entry->bytes = rec->nr;
7781                         list_add_tail(&entry->list, &entries);
7782                         nr_entries++;
7783                 }
7784                 entry->count++;
7785                 best = find_most_right_entry(&entries);
7786                 if (!best) {
7787                         fprintf(stderr, "Backrefs and extent record evenly "
7788                                 "split on who is right, this is going to "
7789                                 "require user input to fix bytenr %Lu bytes "
7790                                 "%Lu\n", rec->start, rec->nr);
7791                         ret = -EINVAL;
7792                         goto out;
7793                 }
7794         }
7795
7796         /*
7797          * I don't think this can happen currently as we'll abort() if we catch
7798          * this case higher up, but in case somebody removes that we still can't
7799          * deal with it properly here yet, so just bail out of that's the case.
7800          */
7801         if (best->bytenr != rec->start) {
7802                 fprintf(stderr, "Extent start and backref starts don't match, "
7803                         "please use btrfs-image on this file system and send "
7804                         "it to a btrfs developer so they can make fsck fix "
7805                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7806                         rec->start, rec->nr);
7807                 ret = -EINVAL;
7808                 goto out;
7809         }
7810
7811         /*
7812          * Ok great we all agreed on an extent record, let's go find the real
7813          * references and fix up the ones that don't match.
7814          */
7815         list_for_each_entry(back, &rec->backrefs, list) {
7816                 if (back->full_backref || !back->is_data)
7817                         continue;
7818
7819                 dback = to_data_backref(back);
7820
7821                 /*
7822                  * Still ignoring backrefs that don't have a real ref attached
7823                  * to them.
7824                  */
7825                 if (dback->found_ref == 0)
7826                         continue;
7827
7828                 if (dback->bytes == best->bytes &&
7829                     dback->disk_bytenr == best->bytenr)
7830                         continue;
7831
7832                 ret = repair_ref(info, path, dback, best);
7833                 if (ret)
7834                         goto out;
7835         }
7836
7837         /*
7838          * Ok we messed with the actual refs, which means we need to drop our
7839          * entire cache and go back and rescan.  I know this is a huge pain and
7840          * adds a lot of extra work, but it's the only way to be safe.  Once all
7841          * the backrefs agree we may not need to do anything to the extent
7842          * record itself.
7843          */
7844         ret = -EAGAIN;
7845 out:
7846         while (!list_empty(&entries)) {
7847                 entry = list_entry(entries.next, struct extent_entry, list);
7848                 list_del_init(&entry->list);
7849                 free(entry);
7850         }
7851         return ret;
7852 }
7853
7854 static int process_duplicates(struct btrfs_root *root,
7855                               struct cache_tree *extent_cache,
7856                               struct extent_record *rec)
7857 {
7858         struct extent_record *good, *tmp;
7859         struct cache_extent *cache;
7860         int ret;
7861
7862         /*
7863          * If we found a extent record for this extent then return, or if we
7864          * have more than one duplicate we are likely going to need to delete
7865          * something.
7866          */
7867         if (rec->found_rec || rec->num_duplicates > 1)
7868                 return 0;
7869
7870         /* Shouldn't happen but just in case */
7871         BUG_ON(!rec->num_duplicates);
7872
7873         /*
7874          * So this happens if we end up with a backref that doesn't match the
7875          * actual extent entry.  So either the backref is bad or the extent
7876          * entry is bad.  Either way we want to have the extent_record actually
7877          * reflect what we found in the extent_tree, so we need to take the
7878          * duplicate out and use that as the extent_record since the only way we
7879          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7880          */
7881         remove_cache_extent(extent_cache, &rec->cache);
7882
7883         good = to_extent_record(rec->dups.next);
7884         list_del_init(&good->list);
7885         INIT_LIST_HEAD(&good->backrefs);
7886         INIT_LIST_HEAD(&good->dups);
7887         good->cache.start = good->start;
7888         good->cache.size = good->nr;
7889         good->content_checked = 0;
7890         good->owner_ref_checked = 0;
7891         good->num_duplicates = 0;
7892         good->refs = rec->refs;
7893         list_splice_init(&rec->backrefs, &good->backrefs);
7894         while (1) {
7895                 cache = lookup_cache_extent(extent_cache, good->start,
7896                                             good->nr);
7897                 if (!cache)
7898                         break;
7899                 tmp = container_of(cache, struct extent_record, cache);
7900
7901                 /*
7902                  * If we find another overlapping extent and it's found_rec is
7903                  * set then it's a duplicate and we need to try and delete
7904                  * something.
7905                  */
7906                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7907                         if (list_empty(&good->list))
7908                                 list_add_tail(&good->list,
7909                                               &duplicate_extents);
7910                         good->num_duplicates += tmp->num_duplicates + 1;
7911                         list_splice_init(&tmp->dups, &good->dups);
7912                         list_del_init(&tmp->list);
7913                         list_add_tail(&tmp->list, &good->dups);
7914                         remove_cache_extent(extent_cache, &tmp->cache);
7915                         continue;
7916                 }
7917
7918                 /*
7919                  * Ok we have another non extent item backed extent rec, so lets
7920                  * just add it to this extent and carry on like we did above.
7921                  */
7922                 good->refs += tmp->refs;
7923                 list_splice_init(&tmp->backrefs, &good->backrefs);
7924                 remove_cache_extent(extent_cache, &tmp->cache);
7925                 free(tmp);
7926         }
7927         ret = insert_cache_extent(extent_cache, &good->cache);
7928         BUG_ON(ret);
7929         free(rec);
7930         return good->num_duplicates ? 0 : 1;
7931 }
7932
7933 static int delete_duplicate_records(struct btrfs_root *root,
7934                                     struct extent_record *rec)
7935 {
7936         struct btrfs_trans_handle *trans;
7937         LIST_HEAD(delete_list);
7938         struct btrfs_path path;
7939         struct extent_record *tmp, *good, *n;
7940         int nr_del = 0;
7941         int ret = 0, err;
7942         struct btrfs_key key;
7943
7944         btrfs_init_path(&path);
7945
7946         good = rec;
7947         /* Find the record that covers all of the duplicates. */
7948         list_for_each_entry(tmp, &rec->dups, list) {
7949                 if (good->start < tmp->start)
7950                         continue;
7951                 if (good->nr > tmp->nr)
7952                         continue;
7953
7954                 if (tmp->start + tmp->nr < good->start + good->nr) {
7955                         fprintf(stderr, "Ok we have overlapping extents that "
7956                                 "aren't completely covered by each other, this "
7957                                 "is going to require more careful thought.  "
7958                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7959                                 tmp->start, tmp->nr, good->start, good->nr);
7960                         abort();
7961                 }
7962                 good = tmp;
7963         }
7964
7965         if (good != rec)
7966                 list_add_tail(&rec->list, &delete_list);
7967
7968         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7969                 if (tmp == good)
7970                         continue;
7971                 list_move_tail(&tmp->list, &delete_list);
7972         }
7973
7974         root = root->fs_info->extent_root;
7975         trans = btrfs_start_transaction(root, 1);
7976         if (IS_ERR(trans)) {
7977                 ret = PTR_ERR(trans);
7978                 goto out;
7979         }
7980
7981         list_for_each_entry(tmp, &delete_list, list) {
7982                 if (tmp->found_rec == 0)
7983                         continue;
7984                 key.objectid = tmp->start;
7985                 key.type = BTRFS_EXTENT_ITEM_KEY;
7986                 key.offset = tmp->nr;
7987
7988                 /* Shouldn't happen but just in case */
7989                 if (tmp->metadata) {
7990                         fprintf(stderr, "Well this shouldn't happen, extent "
7991                                 "record overlaps but is metadata? "
7992                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7993                         abort();
7994                 }
7995
7996                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7997                 if (ret) {
7998                         if (ret > 0)
7999                                 ret = -EINVAL;
8000                         break;
8001                 }
8002                 ret = btrfs_del_item(trans, root, &path);
8003                 if (ret)
8004                         break;
8005                 btrfs_release_path(&path);
8006                 nr_del++;
8007         }
8008         err = btrfs_commit_transaction(trans, root);
8009         if (err && !ret)
8010                 ret = err;
8011 out:
8012         while (!list_empty(&delete_list)) {
8013                 tmp = to_extent_record(delete_list.next);
8014                 list_del_init(&tmp->list);
8015                 if (tmp == rec)
8016                         continue;
8017                 free(tmp);
8018         }
8019
8020         while (!list_empty(&rec->dups)) {
8021                 tmp = to_extent_record(rec->dups.next);
8022                 list_del_init(&tmp->list);
8023                 free(tmp);
8024         }
8025
8026         btrfs_release_path(&path);
8027
8028         if (!ret && !nr_del)
8029                 rec->num_duplicates = 0;
8030
8031         return ret ? ret : nr_del;
8032 }
8033
8034 static int find_possible_backrefs(struct btrfs_fs_info *info,
8035                                   struct btrfs_path *path,
8036                                   struct cache_tree *extent_cache,
8037                                   struct extent_record *rec)
8038 {
8039         struct btrfs_root *root;
8040         struct extent_backref *back;
8041         struct data_backref *dback;
8042         struct cache_extent *cache;
8043         struct btrfs_file_extent_item *fi;
8044         struct btrfs_key key;
8045         u64 bytenr, bytes;
8046         int ret;
8047
8048         list_for_each_entry(back, &rec->backrefs, list) {
8049                 /* Don't care about full backrefs (poor unloved backrefs) */
8050                 if (back->full_backref || !back->is_data)
8051                         continue;
8052
8053                 dback = to_data_backref(back);
8054
8055                 /* We found this one, we don't need to do a lookup */
8056                 if (dback->found_ref)
8057                         continue;
8058
8059                 key.objectid = dback->root;
8060                 key.type = BTRFS_ROOT_ITEM_KEY;
8061                 key.offset = (u64)-1;
8062
8063                 root = btrfs_read_fs_root(info, &key);
8064
8065                 /* No root, definitely a bad ref, skip */
8066                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8067                         continue;
8068                 /* Other err, exit */
8069                 if (IS_ERR(root))
8070                         return PTR_ERR(root);
8071
8072                 key.objectid = dback->owner;
8073                 key.type = BTRFS_EXTENT_DATA_KEY;
8074                 key.offset = dback->offset;
8075                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8076                 if (ret) {
8077                         btrfs_release_path(path);
8078                         if (ret < 0)
8079                                 return ret;
8080                         /* Didn't find it, we can carry on */
8081                         ret = 0;
8082                         continue;
8083                 }
8084
8085                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8086                                     struct btrfs_file_extent_item);
8087                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8088                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8089                 btrfs_release_path(path);
8090                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8091                 if (cache) {
8092                         struct extent_record *tmp;
8093                         tmp = container_of(cache, struct extent_record, cache);
8094
8095                         /*
8096                          * If we found an extent record for the bytenr for this
8097                          * particular backref then we can't add it to our
8098                          * current extent record.  We only want to add backrefs
8099                          * that don't have a corresponding extent item in the
8100                          * extent tree since they likely belong to this record
8101                          * and we need to fix it if it doesn't match bytenrs.
8102                          */
8103                         if  (tmp->found_rec)
8104                                 continue;
8105                 }
8106
8107                 dback->found_ref += 1;
8108                 dback->disk_bytenr = bytenr;
8109                 dback->bytes = bytes;
8110
8111                 /*
8112                  * Set this so the verify backref code knows not to trust the
8113                  * values in this backref.
8114                  */
8115                 back->broken = 1;
8116         }
8117
8118         return 0;
8119 }
8120
8121 /*
8122  * Record orphan data ref into corresponding root.
8123  *
8124  * Return 0 if the extent item contains data ref and recorded.
8125  * Return 1 if the extent item contains no useful data ref
8126  *   On that case, it may contains only shared_dataref or metadata backref
8127  *   or the file extent exists(this should be handled by the extent bytenr
8128  *   recovery routine)
8129  * Return <0 if something goes wrong.
8130  */
8131 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8132                                       struct extent_record *rec)
8133 {
8134         struct btrfs_key key;
8135         struct btrfs_root *dest_root;
8136         struct extent_backref *back;
8137         struct data_backref *dback;
8138         struct orphan_data_extent *orphan;
8139         struct btrfs_path path;
8140         int recorded_data_ref = 0;
8141         int ret = 0;
8142
8143         if (rec->metadata)
8144                 return 1;
8145         btrfs_init_path(&path);
8146         list_for_each_entry(back, &rec->backrefs, list) {
8147                 if (back->full_backref || !back->is_data ||
8148                     !back->found_extent_tree)
8149                         continue;
8150                 dback = to_data_backref(back);
8151                 if (dback->found_ref)
8152                         continue;
8153                 key.objectid = dback->root;
8154                 key.type = BTRFS_ROOT_ITEM_KEY;
8155                 key.offset = (u64)-1;
8156
8157                 dest_root = btrfs_read_fs_root(fs_info, &key);
8158
8159                 /* For non-exist root we just skip it */
8160                 if (IS_ERR(dest_root) || !dest_root)
8161                         continue;
8162
8163                 key.objectid = dback->owner;
8164                 key.type = BTRFS_EXTENT_DATA_KEY;
8165                 key.offset = dback->offset;
8166
8167                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8168                 btrfs_release_path(&path);
8169                 /*
8170                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8171                  * we need to record it for inode/file extent rebuild.
8172                  * For ret > 0, we record it only for file extent rebuild.
8173                  * For ret == 0, the file extent exists but only bytenr
8174                  * mismatch, let the original bytenr fix routine to handle,
8175                  * don't record it.
8176                  */
8177                 if (ret == 0)
8178                         continue;
8179                 ret = 0;
8180                 orphan = malloc(sizeof(*orphan));
8181                 if (!orphan) {
8182                         ret = -ENOMEM;
8183                         goto out;
8184                 }
8185                 INIT_LIST_HEAD(&orphan->list);
8186                 orphan->root = dback->root;
8187                 orphan->objectid = dback->owner;
8188                 orphan->offset = dback->offset;
8189                 orphan->disk_bytenr = rec->cache.start;
8190                 orphan->disk_len = rec->cache.size;
8191                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8192                 recorded_data_ref = 1;
8193         }
8194 out:
8195         btrfs_release_path(&path);
8196         if (!ret)
8197                 return !recorded_data_ref;
8198         else
8199                 return ret;
8200 }
8201
8202 /*
8203  * when an incorrect extent item is found, this will delete
8204  * all of the existing entries for it and recreate them
8205  * based on what the tree scan found.
8206  */
8207 static int fixup_extent_refs(struct btrfs_fs_info *info,
8208                              struct cache_tree *extent_cache,
8209                              struct extent_record *rec)
8210 {
8211         struct btrfs_trans_handle *trans = NULL;
8212         int ret;
8213         struct btrfs_path path;
8214         struct list_head *cur = rec->backrefs.next;
8215         struct cache_extent *cache;
8216         struct extent_backref *back;
8217         int allocated = 0;
8218         u64 flags = 0;
8219
8220         if (rec->flag_block_full_backref)
8221                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8222
8223         btrfs_init_path(&path);
8224         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8225                 /*
8226                  * Sometimes the backrefs themselves are so broken they don't
8227                  * get attached to any meaningful rec, so first go back and
8228                  * check any of our backrefs that we couldn't find and throw
8229                  * them into the list if we find the backref so that
8230                  * verify_backrefs can figure out what to do.
8231                  */
8232                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8233                 if (ret < 0)
8234                         goto out;
8235         }
8236
8237         /* step one, make sure all of the backrefs agree */
8238         ret = verify_backrefs(info, &path, rec);
8239         if (ret < 0)
8240                 goto out;
8241
8242         trans = btrfs_start_transaction(info->extent_root, 1);
8243         if (IS_ERR(trans)) {
8244                 ret = PTR_ERR(trans);
8245                 goto out;
8246         }
8247
8248         /* step two, delete all the existing records */
8249         ret = delete_extent_records(trans, info->extent_root, &path,
8250                                     rec->start, rec->max_size);
8251
8252         if (ret < 0)
8253                 goto out;
8254
8255         /* was this block corrupt?  If so, don't add references to it */
8256         cache = lookup_cache_extent(info->corrupt_blocks,
8257                                     rec->start, rec->max_size);
8258         if (cache) {
8259                 ret = 0;
8260                 goto out;
8261         }
8262
8263         /* step three, recreate all the refs we did find */
8264         while(cur != &rec->backrefs) {
8265                 back = to_extent_backref(cur);
8266                 cur = cur->next;
8267
8268                 /*
8269                  * if we didn't find any references, don't create a
8270                  * new extent record
8271                  */
8272                 if (!back->found_ref)
8273                         continue;
8274
8275                 rec->bad_full_backref = 0;
8276                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8277                 allocated = 1;
8278
8279                 if (ret)
8280                         goto out;
8281         }
8282 out:
8283         if (trans) {
8284                 int err = btrfs_commit_transaction(trans, info->extent_root);
8285                 if (!ret)
8286                         ret = err;
8287         }
8288
8289         btrfs_release_path(&path);
8290         return ret;
8291 }
8292
8293 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8294                               struct extent_record *rec)
8295 {
8296         struct btrfs_trans_handle *trans;
8297         struct btrfs_root *root = fs_info->extent_root;
8298         struct btrfs_path path;
8299         struct btrfs_extent_item *ei;
8300         struct btrfs_key key;
8301         u64 flags;
8302         int ret = 0;
8303
8304         key.objectid = rec->start;
8305         if (rec->metadata) {
8306                 key.type = BTRFS_METADATA_ITEM_KEY;
8307                 key.offset = rec->info_level;
8308         } else {
8309                 key.type = BTRFS_EXTENT_ITEM_KEY;
8310                 key.offset = rec->max_size;
8311         }
8312
8313         trans = btrfs_start_transaction(root, 0);
8314         if (IS_ERR(trans))
8315                 return PTR_ERR(trans);
8316
8317         btrfs_init_path(&path);
8318         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8319         if (ret < 0) {
8320                 btrfs_release_path(&path);
8321                 btrfs_commit_transaction(trans, root);
8322                 return ret;
8323         } else if (ret) {
8324                 fprintf(stderr, "Didn't find extent for %llu\n",
8325                         (unsigned long long)rec->start);
8326                 btrfs_release_path(&path);
8327                 btrfs_commit_transaction(trans, root);
8328                 return -ENOENT;
8329         }
8330
8331         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8332                             struct btrfs_extent_item);
8333         flags = btrfs_extent_flags(path.nodes[0], ei);
8334         if (rec->flag_block_full_backref) {
8335                 fprintf(stderr, "setting full backref on %llu\n",
8336                         (unsigned long long)key.objectid);
8337                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8338         } else {
8339                 fprintf(stderr, "clearing full backref on %llu\n",
8340                         (unsigned long long)key.objectid);
8341                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8342         }
8343         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8344         btrfs_mark_buffer_dirty(path.nodes[0]);
8345         btrfs_release_path(&path);
8346         return btrfs_commit_transaction(trans, root);
8347 }
8348
8349 /* right now we only prune from the extent allocation tree */
8350 static int prune_one_block(struct btrfs_trans_handle *trans,
8351                            struct btrfs_fs_info *info,
8352                            struct btrfs_corrupt_block *corrupt)
8353 {
8354         int ret;
8355         struct btrfs_path path;
8356         struct extent_buffer *eb;
8357         u64 found;
8358         int slot;
8359         int nritems;
8360         int level = corrupt->level + 1;
8361
8362         btrfs_init_path(&path);
8363 again:
8364         /* we want to stop at the parent to our busted block */
8365         path.lowest_level = level;
8366
8367         ret = btrfs_search_slot(trans, info->extent_root,
8368                                 &corrupt->key, &path, -1, 1);
8369
8370         if (ret < 0)
8371                 goto out;
8372
8373         eb = path.nodes[level];
8374         if (!eb) {
8375                 ret = -ENOENT;
8376                 goto out;
8377         }
8378
8379         /*
8380          * hopefully the search gave us the block we want to prune,
8381          * lets try that first
8382          */
8383         slot = path.slots[level];
8384         found =  btrfs_node_blockptr(eb, slot);
8385         if (found == corrupt->cache.start)
8386                 goto del_ptr;
8387
8388         nritems = btrfs_header_nritems(eb);
8389
8390         /* the search failed, lets scan this node and hope we find it */
8391         for (slot = 0; slot < nritems; slot++) {
8392                 found =  btrfs_node_blockptr(eb, slot);
8393                 if (found == corrupt->cache.start)
8394                         goto del_ptr;
8395         }
8396         /*
8397          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8398          * to this block
8399          */
8400         if (eb == info->extent_root->node) {
8401                 ret = -ENOENT;
8402                 goto out;
8403         } else {
8404                 level++;
8405                 btrfs_release_path(&path);
8406                 goto again;
8407         }
8408
8409 del_ptr:
8410         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8411         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8412
8413 out:
8414         btrfs_release_path(&path);
8415         return ret;
8416 }
8417
8418 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8419 {
8420         struct btrfs_trans_handle *trans = NULL;
8421         struct cache_extent *cache;
8422         struct btrfs_corrupt_block *corrupt;
8423
8424         while (1) {
8425                 cache = search_cache_extent(info->corrupt_blocks, 0);
8426                 if (!cache)
8427                         break;
8428                 if (!trans) {
8429                         trans = btrfs_start_transaction(info->extent_root, 1);
8430                         if (IS_ERR(trans))
8431                                 return PTR_ERR(trans);
8432                 }
8433                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8434                 prune_one_block(trans, info, corrupt);
8435                 remove_cache_extent(info->corrupt_blocks, cache);
8436         }
8437         if (trans)
8438                 return btrfs_commit_transaction(trans, info->extent_root);
8439         return 0;
8440 }
8441
8442 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8443 {
8444         struct btrfs_block_group_cache *cache;
8445         u64 start, end;
8446         int ret;
8447
8448         while (1) {
8449                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8450                                             &start, &end, EXTENT_DIRTY);
8451                 if (ret)
8452                         break;
8453                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8454                                    GFP_NOFS);
8455         }
8456
8457         start = 0;
8458         while (1) {
8459                 cache = btrfs_lookup_first_block_group(fs_info, start);
8460                 if (!cache)
8461                         break;
8462                 if (cache->cached)
8463                         cache->cached = 0;
8464                 start = cache->key.objectid + cache->key.offset;
8465         }
8466 }
8467
8468 static int check_extent_refs(struct btrfs_root *root,
8469                              struct cache_tree *extent_cache)
8470 {
8471         struct extent_record *rec;
8472         struct cache_extent *cache;
8473         int err = 0;
8474         int ret = 0;
8475         int fixed = 0;
8476         int had_dups = 0;
8477         int recorded = 0;
8478
8479         if (repair) {
8480                 /*
8481                  * if we're doing a repair, we have to make sure
8482                  * we don't allocate from the problem extents.
8483                  * In the worst case, this will be all the
8484                  * extents in the FS
8485                  */
8486                 cache = search_cache_extent(extent_cache, 0);
8487                 while(cache) {
8488                         rec = container_of(cache, struct extent_record, cache);
8489                         set_extent_dirty(root->fs_info->excluded_extents,
8490                                          rec->start,
8491                                          rec->start + rec->max_size - 1,
8492                                          GFP_NOFS);
8493                         cache = next_cache_extent(cache);
8494                 }
8495
8496                 /* pin down all the corrupted blocks too */
8497                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8498                 while(cache) {
8499                         set_extent_dirty(root->fs_info->excluded_extents,
8500                                          cache->start,
8501                                          cache->start + cache->size - 1,
8502                                          GFP_NOFS);
8503                         cache = next_cache_extent(cache);
8504                 }
8505                 prune_corrupt_blocks(root->fs_info);
8506                 reset_cached_block_groups(root->fs_info);
8507         }
8508
8509         reset_cached_block_groups(root->fs_info);
8510
8511         /*
8512          * We need to delete any duplicate entries we find first otherwise we
8513          * could mess up the extent tree when we have backrefs that actually
8514          * belong to a different extent item and not the weird duplicate one.
8515          */
8516         while (repair && !list_empty(&duplicate_extents)) {
8517                 rec = to_extent_record(duplicate_extents.next);
8518                 list_del_init(&rec->list);
8519
8520                 /* Sometimes we can find a backref before we find an actual
8521                  * extent, so we need to process it a little bit to see if there
8522                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8523                  * if this is a backref screwup.  If we need to delete stuff
8524                  * process_duplicates() will return 0, otherwise it will return
8525                  * 1 and we
8526                  */
8527                 if (process_duplicates(root, extent_cache, rec))
8528                         continue;
8529                 ret = delete_duplicate_records(root, rec);
8530                 if (ret < 0)
8531                         return ret;
8532                 /*
8533                  * delete_duplicate_records will return the number of entries
8534                  * deleted, so if it's greater than 0 then we know we actually
8535                  * did something and we need to remove.
8536                  */
8537                 if (ret)
8538                         had_dups = 1;
8539         }
8540
8541         if (had_dups)
8542                 return -EAGAIN;
8543
8544         while(1) {
8545                 int cur_err = 0;
8546
8547                 fixed = 0;
8548                 recorded = 0;
8549                 cache = search_cache_extent(extent_cache, 0);
8550                 if (!cache)
8551                         break;
8552                 rec = container_of(cache, struct extent_record, cache);
8553                 if (rec->num_duplicates) {
8554                         fprintf(stderr, "extent item %llu has multiple extent "
8555                                 "items\n", (unsigned long long)rec->start);
8556                         err = 1;
8557                         cur_err = 1;
8558                 }
8559
8560                 if (rec->refs != rec->extent_item_refs) {
8561                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8562                                 (unsigned long long)rec->start,
8563                                 (unsigned long long)rec->nr);
8564                         fprintf(stderr, "extent item %llu, found %llu\n",
8565                                 (unsigned long long)rec->extent_item_refs,
8566                                 (unsigned long long)rec->refs);
8567                         ret = record_orphan_data_extents(root->fs_info, rec);
8568                         if (ret < 0)
8569                                 goto repair_abort;
8570                         if (ret == 0) {
8571                                 recorded = 1;
8572                         } else {
8573                                 /*
8574                                  * we can't use the extent to repair file
8575                                  * extent, let the fallback method handle it.
8576                                  */
8577                                 if (!fixed && repair) {
8578                                         ret = fixup_extent_refs(
8579                                                         root->fs_info,
8580                                                         extent_cache, rec);
8581                                         if (ret)
8582                                                 goto repair_abort;
8583                                         fixed = 1;
8584                                 }
8585                         }
8586                         err = 1;
8587                         cur_err = 1;
8588                 }
8589                 if (all_backpointers_checked(rec, 1)) {
8590                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8591                                 (unsigned long long)rec->start,
8592                                 (unsigned long long)rec->nr);
8593
8594                         if (!fixed && !recorded && repair) {
8595                                 ret = fixup_extent_refs(root->fs_info,
8596                                                         extent_cache, rec);
8597                                 if (ret)
8598                                         goto repair_abort;
8599                                 fixed = 1;
8600                         }
8601                         cur_err = 1;
8602                         err = 1;
8603                 }
8604                 if (!rec->owner_ref_checked) {
8605                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8606                                 (unsigned long long)rec->start,
8607                                 (unsigned long long)rec->nr);
8608                         if (!fixed && !recorded && repair) {
8609                                 ret = fixup_extent_refs(root->fs_info,
8610                                                         extent_cache, rec);
8611                                 if (ret)
8612                                         goto repair_abort;
8613                                 fixed = 1;
8614                         }
8615                         err = 1;
8616                         cur_err = 1;
8617                 }
8618                 if (rec->bad_full_backref) {
8619                         fprintf(stderr, "bad full backref, on [%llu]\n",
8620                                 (unsigned long long)rec->start);
8621                         if (repair) {
8622                                 ret = fixup_extent_flags(root->fs_info, rec);
8623                                 if (ret)
8624                                         goto repair_abort;
8625                                 fixed = 1;
8626                         }
8627                         err = 1;
8628                         cur_err = 1;
8629                 }
8630                 /*
8631                  * Although it's not a extent ref's problem, we reuse this
8632                  * routine for error reporting.
8633                  * No repair function yet.
8634                  */
8635                 if (rec->crossing_stripes) {
8636                         fprintf(stderr,
8637                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8638                                 rec->start, rec->start + rec->max_size);
8639                         err = 1;
8640                         cur_err = 1;
8641                 }
8642
8643                 if (rec->wrong_chunk_type) {
8644                         fprintf(stderr,
8645                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8646                                 rec->start, rec->start + rec->max_size);
8647                         err = 1;
8648                         cur_err = 1;
8649                 }
8650
8651                 remove_cache_extent(extent_cache, cache);
8652                 free_all_extent_backrefs(rec);
8653                 if (!init_extent_tree && repair && (!cur_err || fixed))
8654                         clear_extent_dirty(root->fs_info->excluded_extents,
8655                                            rec->start,
8656                                            rec->start + rec->max_size - 1,
8657                                            GFP_NOFS);
8658                 free(rec);
8659         }
8660 repair_abort:
8661         if (repair) {
8662                 if (ret && ret != -EAGAIN) {
8663                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8664                         exit(1);
8665                 } else if (!ret) {
8666                         struct btrfs_trans_handle *trans;
8667
8668                         root = root->fs_info->extent_root;
8669                         trans = btrfs_start_transaction(root, 1);
8670                         if (IS_ERR(trans)) {
8671                                 ret = PTR_ERR(trans);
8672                                 goto repair_abort;
8673                         }
8674
8675                         btrfs_fix_block_accounting(trans, root);
8676                         ret = btrfs_commit_transaction(trans, root);
8677                         if (ret)
8678                                 goto repair_abort;
8679                 }
8680                 if (err)
8681                         fprintf(stderr, "repaired damaged extent references\n");
8682                 return ret;
8683         }
8684         return err;
8685 }
8686
8687 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8688 {
8689         u64 stripe_size;
8690
8691         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8692                 stripe_size = length;
8693                 stripe_size /= num_stripes;
8694         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8695                 stripe_size = length * 2;
8696                 stripe_size /= num_stripes;
8697         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8698                 stripe_size = length;
8699                 stripe_size /= (num_stripes - 1);
8700         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8701                 stripe_size = length;
8702                 stripe_size /= (num_stripes - 2);
8703         } else {
8704                 stripe_size = length;
8705         }
8706         return stripe_size;
8707 }
8708
8709 /*
8710  * Check the chunk with its block group/dev list ref:
8711  * Return 0 if all refs seems valid.
8712  * Return 1 if part of refs seems valid, need later check for rebuild ref
8713  * like missing block group and needs to search extent tree to rebuild them.
8714  * Return -1 if essential refs are missing and unable to rebuild.
8715  */
8716 static int check_chunk_refs(struct chunk_record *chunk_rec,
8717                             struct block_group_tree *block_group_cache,
8718                             struct device_extent_tree *dev_extent_cache,
8719                             int silent)
8720 {
8721         struct cache_extent *block_group_item;
8722         struct block_group_record *block_group_rec;
8723         struct cache_extent *dev_extent_item;
8724         struct device_extent_record *dev_extent_rec;
8725         u64 devid;
8726         u64 offset;
8727         u64 length;
8728         int metadump_v2 = 0;
8729         int i;
8730         int ret = 0;
8731
8732         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8733                                                chunk_rec->offset,
8734                                                chunk_rec->length);
8735         if (block_group_item) {
8736                 block_group_rec = container_of(block_group_item,
8737                                                struct block_group_record,
8738                                                cache);
8739                 if (chunk_rec->length != block_group_rec->offset ||
8740                     chunk_rec->offset != block_group_rec->objectid ||
8741                     (!metadump_v2 &&
8742                      chunk_rec->type_flags != block_group_rec->flags)) {
8743                         if (!silent)
8744                                 fprintf(stderr,
8745                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8746                                         chunk_rec->objectid,
8747                                         chunk_rec->type,
8748                                         chunk_rec->offset,
8749                                         chunk_rec->length,
8750                                         chunk_rec->offset,
8751                                         chunk_rec->type_flags,
8752                                         block_group_rec->objectid,
8753                                         block_group_rec->type,
8754                                         block_group_rec->offset,
8755                                         block_group_rec->offset,
8756                                         block_group_rec->objectid,
8757                                         block_group_rec->flags);
8758                         ret = -1;
8759                 } else {
8760                         list_del_init(&block_group_rec->list);
8761                         chunk_rec->bg_rec = block_group_rec;
8762                 }
8763         } else {
8764                 if (!silent)
8765                         fprintf(stderr,
8766                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8767                                 chunk_rec->objectid,
8768                                 chunk_rec->type,
8769                                 chunk_rec->offset,
8770                                 chunk_rec->length,
8771                                 chunk_rec->offset,
8772                                 chunk_rec->type_flags);
8773                 ret = 1;
8774         }
8775
8776         if (metadump_v2)
8777                 return ret;
8778
8779         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8780                                     chunk_rec->num_stripes);
8781         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8782                 devid = chunk_rec->stripes[i].devid;
8783                 offset = chunk_rec->stripes[i].offset;
8784                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8785                                                        devid, offset, length);
8786                 if (dev_extent_item) {
8787                         dev_extent_rec = container_of(dev_extent_item,
8788                                                 struct device_extent_record,
8789                                                 cache);
8790                         if (dev_extent_rec->objectid != devid ||
8791                             dev_extent_rec->offset != offset ||
8792                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8793                             dev_extent_rec->length != length) {
8794                                 if (!silent)
8795                                         fprintf(stderr,
8796                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8797                                                 chunk_rec->objectid,
8798                                                 chunk_rec->type,
8799                                                 chunk_rec->offset,
8800                                                 chunk_rec->stripes[i].devid,
8801                                                 chunk_rec->stripes[i].offset,
8802                                                 dev_extent_rec->objectid,
8803                                                 dev_extent_rec->offset,
8804                                                 dev_extent_rec->length);
8805                                 ret = -1;
8806                         } else {
8807                                 list_move(&dev_extent_rec->chunk_list,
8808                                           &chunk_rec->dextents);
8809                         }
8810                 } else {
8811                         if (!silent)
8812                                 fprintf(stderr,
8813                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8814                                         chunk_rec->objectid,
8815                                         chunk_rec->type,
8816                                         chunk_rec->offset,
8817                                         chunk_rec->stripes[i].devid,
8818                                         chunk_rec->stripes[i].offset);
8819                         ret = -1;
8820                 }
8821         }
8822         return ret;
8823 }
8824
8825 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8826 int check_chunks(struct cache_tree *chunk_cache,
8827                  struct block_group_tree *block_group_cache,
8828                  struct device_extent_tree *dev_extent_cache,
8829                  struct list_head *good, struct list_head *bad,
8830                  struct list_head *rebuild, int silent)
8831 {
8832         struct cache_extent *chunk_item;
8833         struct chunk_record *chunk_rec;
8834         struct block_group_record *bg_rec;
8835         struct device_extent_record *dext_rec;
8836         int err;
8837         int ret = 0;
8838
8839         chunk_item = first_cache_extent(chunk_cache);
8840         while (chunk_item) {
8841                 chunk_rec = container_of(chunk_item, struct chunk_record,
8842                                          cache);
8843                 err = check_chunk_refs(chunk_rec, block_group_cache,
8844                                        dev_extent_cache, silent);
8845                 if (err < 0)
8846                         ret = err;
8847                 if (err == 0 && good)
8848                         list_add_tail(&chunk_rec->list, good);
8849                 if (err > 0 && rebuild)
8850                         list_add_tail(&chunk_rec->list, rebuild);
8851                 if (err < 0 && bad)
8852                         list_add_tail(&chunk_rec->list, bad);
8853                 chunk_item = next_cache_extent(chunk_item);
8854         }
8855
8856         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8857                 if (!silent)
8858                         fprintf(stderr,
8859                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8860                                 bg_rec->objectid,
8861                                 bg_rec->offset,
8862                                 bg_rec->flags);
8863                 if (!ret)
8864                         ret = 1;
8865         }
8866
8867         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8868                             chunk_list) {
8869                 if (!silent)
8870                         fprintf(stderr,
8871                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8872                                 dext_rec->objectid,
8873                                 dext_rec->offset,
8874                                 dext_rec->length);
8875                 if (!ret)
8876                         ret = 1;
8877         }
8878         return ret;
8879 }
8880
8881
8882 static int check_device_used(struct device_record *dev_rec,
8883                              struct device_extent_tree *dext_cache)
8884 {
8885         struct cache_extent *cache;
8886         struct device_extent_record *dev_extent_rec;
8887         u64 total_byte = 0;
8888
8889         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8890         while (cache) {
8891                 dev_extent_rec = container_of(cache,
8892                                               struct device_extent_record,
8893                                               cache);
8894                 if (dev_extent_rec->objectid != dev_rec->devid)
8895                         break;
8896
8897                 list_del_init(&dev_extent_rec->device_list);
8898                 total_byte += dev_extent_rec->length;
8899                 cache = next_cache_extent(cache);
8900         }
8901
8902         if (total_byte != dev_rec->byte_used) {
8903                 fprintf(stderr,
8904                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8905                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8906                         dev_rec->type, dev_rec->offset);
8907                 return -1;
8908         } else {
8909                 return 0;
8910         }
8911 }
8912
8913 /* check btrfs_dev_item -> btrfs_dev_extent */
8914 static int check_devices(struct rb_root *dev_cache,
8915                          struct device_extent_tree *dev_extent_cache)
8916 {
8917         struct rb_node *dev_node;
8918         struct device_record *dev_rec;
8919         struct device_extent_record *dext_rec;
8920         int err;
8921         int ret = 0;
8922
8923         dev_node = rb_first(dev_cache);
8924         while (dev_node) {
8925                 dev_rec = container_of(dev_node, struct device_record, node);
8926                 err = check_device_used(dev_rec, dev_extent_cache);
8927                 if (err)
8928                         ret = err;
8929
8930                 dev_node = rb_next(dev_node);
8931         }
8932         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8933                             device_list) {
8934                 fprintf(stderr,
8935                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8936                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8937                 if (!ret)
8938                         ret = 1;
8939         }
8940         return ret;
8941 }
8942
8943 static int add_root_item_to_list(struct list_head *head,
8944                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8945                                   u8 level, u8 drop_level,
8946                                   int level_size, struct btrfs_key *drop_key)
8947 {
8948
8949         struct root_item_record *ri_rec;
8950         ri_rec = malloc(sizeof(*ri_rec));
8951         if (!ri_rec)
8952                 return -ENOMEM;
8953         ri_rec->bytenr = bytenr;
8954         ri_rec->objectid = objectid;
8955         ri_rec->level = level;
8956         ri_rec->level_size = level_size;
8957         ri_rec->drop_level = drop_level;
8958         ri_rec->last_snapshot = last_snapshot;
8959         if (drop_key)
8960                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8961         list_add_tail(&ri_rec->list, head);
8962
8963         return 0;
8964 }
8965
8966 static void free_root_item_list(struct list_head *list)
8967 {
8968         struct root_item_record *ri_rec;
8969
8970         while (!list_empty(list)) {
8971                 ri_rec = list_first_entry(list, struct root_item_record,
8972                                           list);
8973                 list_del_init(&ri_rec->list);
8974                 free(ri_rec);
8975         }
8976 }
8977
8978 static int deal_root_from_list(struct list_head *list,
8979                                struct btrfs_root *root,
8980                                struct block_info *bits,
8981                                int bits_nr,
8982                                struct cache_tree *pending,
8983                                struct cache_tree *seen,
8984                                struct cache_tree *reada,
8985                                struct cache_tree *nodes,
8986                                struct cache_tree *extent_cache,
8987                                struct cache_tree *chunk_cache,
8988                                struct rb_root *dev_cache,
8989                                struct block_group_tree *block_group_cache,
8990                                struct device_extent_tree *dev_extent_cache)
8991 {
8992         int ret = 0;
8993         u64 last;
8994
8995         while (!list_empty(list)) {
8996                 struct root_item_record *rec;
8997                 struct extent_buffer *buf;
8998                 rec = list_entry(list->next,
8999                                  struct root_item_record, list);
9000                 last = 0;
9001                 buf = read_tree_block(root->fs_info->tree_root,
9002                                       rec->bytenr, rec->level_size, 0);
9003                 if (!extent_buffer_uptodate(buf)) {
9004                         free_extent_buffer(buf);
9005                         ret = -EIO;
9006                         break;
9007                 }
9008                 ret = add_root_to_pending(buf, extent_cache, pending,
9009                                     seen, nodes, rec->objectid);
9010                 if (ret < 0)
9011                         break;
9012                 /*
9013                  * To rebuild extent tree, we need deal with snapshot
9014                  * one by one, otherwise we deal with node firstly which
9015                  * can maximize readahead.
9016                  */
9017                 while (1) {
9018                         ret = run_next_block(root, bits, bits_nr, &last,
9019                                              pending, seen, reada, nodes,
9020                                              extent_cache, chunk_cache,
9021                                              dev_cache, block_group_cache,
9022                                              dev_extent_cache, rec);
9023                         if (ret != 0)
9024                                 break;
9025                 }
9026                 free_extent_buffer(buf);
9027                 list_del(&rec->list);
9028                 free(rec);
9029                 if (ret < 0)
9030                         break;
9031         }
9032         while (ret >= 0) {
9033                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9034                                      reada, nodes, extent_cache, chunk_cache,
9035                                      dev_cache, block_group_cache,
9036                                      dev_extent_cache, NULL);
9037                 if (ret != 0) {
9038                         if (ret > 0)
9039                                 ret = 0;
9040                         break;
9041                 }
9042         }
9043         return ret;
9044 }
9045
9046 static int check_chunks_and_extents(struct btrfs_root *root)
9047 {
9048         struct rb_root dev_cache;
9049         struct cache_tree chunk_cache;
9050         struct block_group_tree block_group_cache;
9051         struct device_extent_tree dev_extent_cache;
9052         struct cache_tree extent_cache;
9053         struct cache_tree seen;
9054         struct cache_tree pending;
9055         struct cache_tree reada;
9056         struct cache_tree nodes;
9057         struct extent_io_tree excluded_extents;
9058         struct cache_tree corrupt_blocks;
9059         struct btrfs_path path;
9060         struct btrfs_key key;
9061         struct btrfs_key found_key;
9062         int ret, err = 0;
9063         struct block_info *bits;
9064         int bits_nr;
9065         struct extent_buffer *leaf;
9066         int slot;
9067         struct btrfs_root_item ri;
9068         struct list_head dropping_trees;
9069         struct list_head normal_trees;
9070         struct btrfs_root *root1;
9071         u64 objectid;
9072         u32 level_size;
9073         u8 level;
9074
9075         dev_cache = RB_ROOT;
9076         cache_tree_init(&chunk_cache);
9077         block_group_tree_init(&block_group_cache);
9078         device_extent_tree_init(&dev_extent_cache);
9079
9080         cache_tree_init(&extent_cache);
9081         cache_tree_init(&seen);
9082         cache_tree_init(&pending);
9083         cache_tree_init(&nodes);
9084         cache_tree_init(&reada);
9085         cache_tree_init(&corrupt_blocks);
9086         extent_io_tree_init(&excluded_extents);
9087         INIT_LIST_HEAD(&dropping_trees);
9088         INIT_LIST_HEAD(&normal_trees);
9089
9090         if (repair) {
9091                 root->fs_info->excluded_extents = &excluded_extents;
9092                 root->fs_info->fsck_extent_cache = &extent_cache;
9093                 root->fs_info->free_extent_hook = free_extent_hook;
9094                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9095         }
9096
9097         bits_nr = 1024;
9098         bits = malloc(bits_nr * sizeof(struct block_info));
9099         if (!bits) {
9100                 perror("malloc");
9101                 exit(1);
9102         }
9103
9104         if (ctx.progress_enabled) {
9105                 ctx.tp = TASK_EXTENTS;
9106                 task_start(ctx.info);
9107         }
9108
9109 again:
9110         root1 = root->fs_info->tree_root;
9111         level = btrfs_header_level(root1->node);
9112         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9113                                     root1->node->start, 0, level, 0,
9114                                     root1->nodesize, NULL);
9115         if (ret < 0)
9116                 goto out;
9117         root1 = root->fs_info->chunk_root;
9118         level = btrfs_header_level(root1->node);
9119         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9120                                     root1->node->start, 0, level, 0,
9121                                     root1->nodesize, NULL);
9122         if (ret < 0)
9123                 goto out;
9124         btrfs_init_path(&path);
9125         key.offset = 0;
9126         key.objectid = 0;
9127         key.type = BTRFS_ROOT_ITEM_KEY;
9128         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9129                                         &key, &path, 0, 0);
9130         if (ret < 0)
9131                 goto out;
9132         while(1) {
9133                 leaf = path.nodes[0];
9134                 slot = path.slots[0];
9135                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9136                         ret = btrfs_next_leaf(root, &path);
9137                         if (ret != 0)
9138                                 break;
9139                         leaf = path.nodes[0];
9140                         slot = path.slots[0];
9141                 }
9142                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9143                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9144                         unsigned long offset;
9145                         u64 last_snapshot;
9146
9147                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9148                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9149                         last_snapshot = btrfs_root_last_snapshot(&ri);
9150                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9151                                 level = btrfs_root_level(&ri);
9152                                 level_size = root->nodesize;
9153                                 ret = add_root_item_to_list(&normal_trees,
9154                                                 found_key.objectid,
9155                                                 btrfs_root_bytenr(&ri),
9156                                                 last_snapshot, level,
9157                                                 0, level_size, NULL);
9158                                 if (ret < 0)
9159                                         goto out;
9160                         } else {
9161                                 level = btrfs_root_level(&ri);
9162                                 level_size = root->nodesize;
9163                                 objectid = found_key.objectid;
9164                                 btrfs_disk_key_to_cpu(&found_key,
9165                                                       &ri.drop_progress);
9166                                 ret = add_root_item_to_list(&dropping_trees,
9167                                                 objectid,
9168                                                 btrfs_root_bytenr(&ri),
9169                                                 last_snapshot, level,
9170                                                 ri.drop_level,
9171                                                 level_size, &found_key);
9172                                 if (ret < 0)
9173                                         goto out;
9174                         }
9175                 }
9176                 path.slots[0]++;
9177         }
9178         btrfs_release_path(&path);
9179
9180         /*
9181          * check_block can return -EAGAIN if it fixes something, please keep
9182          * this in mind when dealing with return values from these functions, if
9183          * we get -EAGAIN we want to fall through and restart the loop.
9184          */
9185         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9186                                   &seen, &reada, &nodes, &extent_cache,
9187                                   &chunk_cache, &dev_cache, &block_group_cache,
9188                                   &dev_extent_cache);
9189         if (ret < 0) {
9190                 if (ret == -EAGAIN)
9191                         goto loop;
9192                 goto out;
9193         }
9194         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9195                                   &pending, &seen, &reada, &nodes,
9196                                   &extent_cache, &chunk_cache, &dev_cache,
9197                                   &block_group_cache, &dev_extent_cache);
9198         if (ret < 0) {
9199                 if (ret == -EAGAIN)
9200                         goto loop;
9201                 goto out;
9202         }
9203
9204         ret = check_chunks(&chunk_cache, &block_group_cache,
9205                            &dev_extent_cache, NULL, NULL, NULL, 0);
9206         if (ret) {
9207                 if (ret == -EAGAIN)
9208                         goto loop;
9209                 err = ret;
9210         }
9211
9212         ret = check_extent_refs(root, &extent_cache);
9213         if (ret < 0) {
9214                 if (ret == -EAGAIN)
9215                         goto loop;
9216                 goto out;
9217         }
9218
9219         ret = check_devices(&dev_cache, &dev_extent_cache);
9220         if (ret && err)
9221                 ret = err;
9222
9223 out:
9224         task_stop(ctx.info);
9225         if (repair) {
9226                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9227                 extent_io_tree_cleanup(&excluded_extents);
9228                 root->fs_info->fsck_extent_cache = NULL;
9229                 root->fs_info->free_extent_hook = NULL;
9230                 root->fs_info->corrupt_blocks = NULL;
9231                 root->fs_info->excluded_extents = NULL;
9232         }
9233         free(bits);
9234         free_chunk_cache_tree(&chunk_cache);
9235         free_device_cache_tree(&dev_cache);
9236         free_block_group_tree(&block_group_cache);
9237         free_device_extent_tree(&dev_extent_cache);
9238         free_extent_cache_tree(&seen);
9239         free_extent_cache_tree(&pending);
9240         free_extent_cache_tree(&reada);
9241         free_extent_cache_tree(&nodes);
9242         return ret;
9243 loop:
9244         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9245         free_extent_cache_tree(&seen);
9246         free_extent_cache_tree(&pending);
9247         free_extent_cache_tree(&reada);
9248         free_extent_cache_tree(&nodes);
9249         free_chunk_cache_tree(&chunk_cache);
9250         free_block_group_tree(&block_group_cache);
9251         free_device_cache_tree(&dev_cache);
9252         free_device_extent_tree(&dev_extent_cache);
9253         free_extent_record_cache(root->fs_info, &extent_cache);
9254         free_root_item_list(&normal_trees);
9255         free_root_item_list(&dropping_trees);
9256         extent_io_tree_cleanup(&excluded_extents);
9257         goto again;
9258 }
9259
9260 /*
9261  * Check backrefs of a tree block given by @bytenr or @eb.
9262  *
9263  * @root:       the root containing the @bytenr or @eb
9264  * @eb:         tree block extent buffer, can be NULL
9265  * @bytenr:     bytenr of the tree block to search
9266  * @level:      tree level of the tree block
9267  * @owner:      owner of the tree block
9268  *
9269  * Return >0 for any error found and output error message
9270  * Return 0 for no error found
9271  */
9272 static int check_tree_block_ref(struct btrfs_root *root,
9273                                 struct extent_buffer *eb, u64 bytenr,
9274                                 int level, u64 owner)
9275 {
9276         struct btrfs_key key;
9277         struct btrfs_root *extent_root = root->fs_info->extent_root;
9278         struct btrfs_path path;
9279         struct btrfs_extent_item *ei;
9280         struct btrfs_extent_inline_ref *iref;
9281         struct extent_buffer *leaf;
9282         unsigned long end;
9283         unsigned long ptr;
9284         int slot;
9285         int skinny_level;
9286         int type;
9287         u32 nodesize = root->nodesize;
9288         u32 item_size;
9289         u64 offset;
9290         int found_ref = 0;
9291         int err = 0;
9292         int ret;
9293
9294         btrfs_init_path(&path);
9295         key.objectid = bytenr;
9296         if (btrfs_fs_incompat(root->fs_info,
9297                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9298                 key.type = BTRFS_METADATA_ITEM_KEY;
9299         else
9300                 key.type = BTRFS_EXTENT_ITEM_KEY;
9301         key.offset = (u64)-1;
9302
9303         /* Search for the backref in extent tree */
9304         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9305         if (ret < 0) {
9306                 err |= BACKREF_MISSING;
9307                 goto out;
9308         }
9309         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9310         if (ret) {
9311                 err |= BACKREF_MISSING;
9312                 goto out;
9313         }
9314
9315         leaf = path.nodes[0];
9316         slot = path.slots[0];
9317         btrfs_item_key_to_cpu(leaf, &key, slot);
9318
9319         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9320
9321         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9322                 skinny_level = (int)key.offset;
9323                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9324         } else {
9325                 struct btrfs_tree_block_info *info;
9326
9327                 info = (struct btrfs_tree_block_info *)(ei + 1);
9328                 skinny_level = btrfs_tree_block_level(leaf, info);
9329                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9330         }
9331
9332         if (eb) {
9333                 u64 header_gen;
9334                 u64 extent_gen;
9335
9336                 if (!(btrfs_extent_flags(leaf, ei) &
9337                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9338                         error(
9339                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9340                                 key.objectid, nodesize,
9341                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9342                         err = BACKREF_MISMATCH;
9343                 }
9344                 header_gen = btrfs_header_generation(eb);
9345                 extent_gen = btrfs_extent_generation(leaf, ei);
9346                 if (header_gen != extent_gen) {
9347                         error(
9348         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9349                                 key.objectid, nodesize, header_gen,
9350                                 extent_gen);
9351                         err = BACKREF_MISMATCH;
9352                 }
9353                 if (level != skinny_level) {
9354                         error(
9355                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9356                                 key.objectid, nodesize, level, skinny_level);
9357                         err = BACKREF_MISMATCH;
9358                 }
9359                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9360                         error(
9361                         "extent[%llu %u] is referred by other roots than %llu",
9362                                 key.objectid, nodesize, root->objectid);
9363                         err = BACKREF_MISMATCH;
9364                 }
9365         }
9366
9367         /*
9368          * Iterate the extent/metadata item to find the exact backref
9369          */
9370         item_size = btrfs_item_size_nr(leaf, slot);
9371         ptr = (unsigned long)iref;
9372         end = (unsigned long)ei + item_size;
9373         while (ptr < end) {
9374                 iref = (struct btrfs_extent_inline_ref *)ptr;
9375                 type = btrfs_extent_inline_ref_type(leaf, iref);
9376                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9377
9378                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9379                         (offset == root->objectid || offset == owner)) {
9380                         found_ref = 1;
9381                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9382                         /* Check if the backref points to valid referencer */
9383                         found_ref = !check_tree_block_ref(root, NULL, offset,
9384                                                           level + 1, owner);
9385                 }
9386
9387                 if (found_ref)
9388                         break;
9389                 ptr += btrfs_extent_inline_ref_size(type);
9390         }
9391
9392         /*
9393          * Inlined extent item doesn't have what we need, check
9394          * TREE_BLOCK_REF_KEY
9395          */
9396         if (!found_ref) {
9397                 btrfs_release_path(&path);
9398                 key.objectid = bytenr;
9399                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9400                 key.offset = root->objectid;
9401
9402                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9403                 if (!ret)
9404                         found_ref = 1;
9405         }
9406         if (!found_ref)
9407                 err |= BACKREF_MISSING;
9408 out:
9409         btrfs_release_path(&path);
9410         if (eb && (err & BACKREF_MISSING))
9411                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9412                         bytenr, nodesize, owner, level);
9413         return err;
9414 }
9415
9416 /*
9417  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9418  *
9419  * Return >0 any error found and output error message
9420  * Return 0 for no error found
9421  */
9422 static int check_extent_data_item(struct btrfs_root *root,
9423                                   struct extent_buffer *eb, int slot)
9424 {
9425         struct btrfs_file_extent_item *fi;
9426         struct btrfs_path path;
9427         struct btrfs_root *extent_root = root->fs_info->extent_root;
9428         struct btrfs_key fi_key;
9429         struct btrfs_key dbref_key;
9430         struct extent_buffer *leaf;
9431         struct btrfs_extent_item *ei;
9432         struct btrfs_extent_inline_ref *iref;
9433         struct btrfs_extent_data_ref *dref;
9434         u64 owner;
9435         u64 file_extent_gen;
9436         u64 disk_bytenr;
9437         u64 disk_num_bytes;
9438         u64 extent_num_bytes;
9439         u64 extent_flags;
9440         u64 extent_gen;
9441         u32 item_size;
9442         unsigned long end;
9443         unsigned long ptr;
9444         int type;
9445         u64 ref_root;
9446         int found_dbackref = 0;
9447         int err = 0;
9448         int ret;
9449
9450         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9451         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9452         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9453
9454         /* Nothing to check for hole and inline data extents */
9455         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9456             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9457                 return 0;
9458
9459         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9460         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9461         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9462
9463         /* Check unaligned disk_num_bytes and num_bytes */
9464         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9465                 error(
9466 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9467                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9468                         root->sectorsize);
9469                 err |= BYTES_UNALIGNED;
9470         } else {
9471                 data_bytes_allocated += disk_num_bytes;
9472         }
9473         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9474                 error(
9475 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9476                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9477                         root->sectorsize);
9478                 err |= BYTES_UNALIGNED;
9479         } else {
9480                 data_bytes_referenced += extent_num_bytes;
9481         }
9482         owner = btrfs_header_owner(eb);
9483
9484         /* Check the extent item of the file extent in extent tree */
9485         btrfs_init_path(&path);
9486         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9487         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9488         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9489
9490         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9491         if (ret) {
9492                 err |= BACKREF_MISSING;
9493                 goto error;
9494         }
9495
9496         leaf = path.nodes[0];
9497         slot = path.slots[0];
9498         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9499
9500         extent_flags = btrfs_extent_flags(leaf, ei);
9501         extent_gen = btrfs_extent_generation(leaf, ei);
9502
9503         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9504                 error(
9505                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9506                     disk_bytenr, disk_num_bytes,
9507                     BTRFS_EXTENT_FLAG_DATA);
9508                 err |= BACKREF_MISMATCH;
9509         }
9510
9511         if (file_extent_gen < extent_gen) {
9512                 error(
9513 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9514                         disk_bytenr, disk_num_bytes, file_extent_gen,
9515                         extent_gen);
9516                 err |= BACKREF_MISMATCH;
9517         }
9518
9519         /* Check data backref inside that extent item */
9520         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9521         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9522         ptr = (unsigned long)iref;
9523         end = (unsigned long)ei + item_size;
9524         while (ptr < end) {
9525                 iref = (struct btrfs_extent_inline_ref *)ptr;
9526                 type = btrfs_extent_inline_ref_type(leaf, iref);
9527                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9528
9529                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9530                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9531                         if (ref_root == owner || ref_root == root->objectid)
9532                                 found_dbackref = 1;
9533                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9534                         found_dbackref = !check_tree_block_ref(root, NULL,
9535                                 btrfs_extent_inline_ref_offset(leaf, iref),
9536                                 0, owner);
9537                 }
9538
9539                 if (found_dbackref)
9540                         break;
9541                 ptr += btrfs_extent_inline_ref_size(type);
9542         }
9543
9544         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9545         if (!found_dbackref) {
9546                 btrfs_release_path(&path);
9547
9548                 btrfs_init_path(&path);
9549                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9550                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9551                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9552                                 fi_key.objectid, fi_key.offset);
9553
9554                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9555                                         &dbref_key, &path, 0, 0);
9556                 if (!ret)
9557                         found_dbackref = 1;
9558         }
9559
9560         if (!found_dbackref)
9561                 err |= BACKREF_MISSING;
9562 error:
9563         btrfs_release_path(&path);
9564         if (err & BACKREF_MISSING) {
9565                 error("data extent[%llu %llu] backref lost",
9566                       disk_bytenr, disk_num_bytes);
9567         }
9568         return err;
9569 }
9570
9571 /*
9572  * Get real tree block level for the case like shared block
9573  * Return >= 0 as tree level
9574  * Return <0 for error
9575  */
9576 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9577 {
9578         struct extent_buffer *eb;
9579         struct btrfs_path path;
9580         struct btrfs_key key;
9581         struct btrfs_extent_item *ei;
9582         u64 flags;
9583         u64 transid;
9584         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9585         u8 backref_level;
9586         u8 header_level;
9587         int ret;
9588
9589         /* Search extent tree for extent generation and level */
9590         key.objectid = bytenr;
9591         key.type = BTRFS_METADATA_ITEM_KEY;
9592         key.offset = (u64)-1;
9593
9594         btrfs_init_path(&path);
9595         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9596         if (ret < 0)
9597                 goto release_out;
9598         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9599         if (ret < 0)
9600                 goto release_out;
9601         if (ret > 0) {
9602                 ret = -ENOENT;
9603                 goto release_out;
9604         }
9605
9606         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9607         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9608                             struct btrfs_extent_item);
9609         flags = btrfs_extent_flags(path.nodes[0], ei);
9610         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9611                 ret = -ENOENT;
9612                 goto release_out;
9613         }
9614
9615         /* Get transid for later read_tree_block() check */
9616         transid = btrfs_extent_generation(path.nodes[0], ei);
9617
9618         /* Get backref level as one source */
9619         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9620                 backref_level = key.offset;
9621         } else {
9622                 struct btrfs_tree_block_info *info;
9623
9624                 info = (struct btrfs_tree_block_info *)(ei + 1);
9625                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9626         }
9627         btrfs_release_path(&path);
9628
9629         /* Get level from tree block as an alternative source */
9630         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9631         if (!extent_buffer_uptodate(eb)) {
9632                 free_extent_buffer(eb);
9633                 return -EIO;
9634         }
9635         header_level = btrfs_header_level(eb);
9636         free_extent_buffer(eb);
9637
9638         if (header_level != backref_level)
9639                 return -EIO;
9640         return header_level;
9641
9642 release_out:
9643         btrfs_release_path(&path);
9644         return ret;
9645 }
9646
9647 /*
9648  * Check if a tree block backref is valid (points to a valid tree block)
9649  * if level == -1, level will be resolved
9650  * Return >0 for any error found and print error message
9651  */
9652 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9653                                     u64 bytenr, int level)
9654 {
9655         struct btrfs_root *root;
9656         struct btrfs_key key;
9657         struct btrfs_path path;
9658         struct extent_buffer *eb;
9659         struct extent_buffer *node;
9660         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9661         int err = 0;
9662         int ret;
9663
9664         /* Query level for level == -1 special case */
9665         if (level == -1)
9666                 level = query_tree_block_level(fs_info, bytenr);
9667         if (level < 0) {
9668                 err |= REFERENCER_MISSING;
9669                 goto out;
9670         }
9671
9672         key.objectid = root_id;
9673         key.type = BTRFS_ROOT_ITEM_KEY;
9674         key.offset = (u64)-1;
9675
9676         root = btrfs_read_fs_root(fs_info, &key);
9677         if (IS_ERR(root)) {
9678                 err |= REFERENCER_MISSING;
9679                 goto out;
9680         }
9681
9682         /* Read out the tree block to get item/node key */
9683         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9684         if (!extent_buffer_uptodate(eb)) {
9685                 err |= REFERENCER_MISSING;
9686                 free_extent_buffer(eb);
9687                 goto out;
9688         }
9689
9690         /* Empty tree, no need to check key */
9691         if (!btrfs_header_nritems(eb) && !level) {
9692                 free_extent_buffer(eb);
9693                 goto out;
9694         }
9695
9696         if (level)
9697                 btrfs_node_key_to_cpu(eb, &key, 0);
9698         else
9699                 btrfs_item_key_to_cpu(eb, &key, 0);
9700
9701         free_extent_buffer(eb);
9702
9703         btrfs_init_path(&path);
9704         path.lowest_level = level;
9705         /* Search with the first key, to ensure we can reach it */
9706         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9707         if (ret < 0) {
9708                 err |= REFERENCER_MISSING;
9709                 goto release_out;
9710         }
9711
9712         node = path.nodes[level];
9713         if (btrfs_header_bytenr(node) != bytenr) {
9714                 error(
9715         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9716                         bytenr, nodesize, bytenr,
9717                         btrfs_header_bytenr(node));
9718                 err |= REFERENCER_MISMATCH;
9719         }
9720         if (btrfs_header_level(node) != level) {
9721                 error(
9722         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9723                         bytenr, nodesize, level,
9724                         btrfs_header_level(node));
9725                 err |= REFERENCER_MISMATCH;
9726         }
9727
9728 release_out:
9729         btrfs_release_path(&path);
9730 out:
9731         if (err & REFERENCER_MISSING) {
9732                 if (level < 0)
9733                         error("extent [%llu %d] lost referencer (owner: %llu)",
9734                                 bytenr, nodesize, root_id);
9735                 else
9736                         error(
9737                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9738                                 bytenr, nodesize, root_id, level);
9739         }
9740
9741         return err;
9742 }
9743
9744 /*
9745  * Check referencer for shared block backref
9746  * If level == -1, this function will resolve the level.
9747  */
9748 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9749                                      u64 parent, u64 bytenr, int level)
9750 {
9751         struct extent_buffer *eb;
9752         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9753         u32 nr;
9754         int found_parent = 0;
9755         int i;
9756
9757         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9758         if (!extent_buffer_uptodate(eb))
9759                 goto out;
9760
9761         if (level == -1)
9762                 level = query_tree_block_level(fs_info, bytenr);
9763         if (level < 0)
9764                 goto out;
9765
9766         if (level + 1 != btrfs_header_level(eb))
9767                 goto out;
9768
9769         nr = btrfs_header_nritems(eb);
9770         for (i = 0; i < nr; i++) {
9771                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9772                         found_parent = 1;
9773                         break;
9774                 }
9775         }
9776 out:
9777         free_extent_buffer(eb);
9778         if (!found_parent) {
9779                 error(
9780         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9781                         bytenr, nodesize, parent, level);
9782                 return REFERENCER_MISSING;
9783         }
9784         return 0;
9785 }
9786
9787 /*
9788  * Check referencer for normal (inlined) data ref
9789  * If len == 0, it will be resolved by searching in extent tree
9790  */
9791 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9792                                      u64 root_id, u64 objectid, u64 offset,
9793                                      u64 bytenr, u64 len, u32 count)
9794 {
9795         struct btrfs_root *root;
9796         struct btrfs_root *extent_root = fs_info->extent_root;
9797         struct btrfs_key key;
9798         struct btrfs_path path;
9799         struct extent_buffer *leaf;
9800         struct btrfs_file_extent_item *fi;
9801         u32 found_count = 0;
9802         int slot;
9803         int ret = 0;
9804
9805         if (!len) {
9806                 key.objectid = bytenr;
9807                 key.type = BTRFS_EXTENT_ITEM_KEY;
9808                 key.offset = (u64)-1;
9809
9810                 btrfs_init_path(&path);
9811                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9812                 if (ret < 0)
9813                         goto out;
9814                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9815                 if (ret)
9816                         goto out;
9817                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9818                 if (key.objectid != bytenr ||
9819                     key.type != BTRFS_EXTENT_ITEM_KEY)
9820                         goto out;
9821                 len = key.offset;
9822                 btrfs_release_path(&path);
9823         }
9824         key.objectid = root_id;
9825         key.type = BTRFS_ROOT_ITEM_KEY;
9826         key.offset = (u64)-1;
9827         btrfs_init_path(&path);
9828
9829         root = btrfs_read_fs_root(fs_info, &key);
9830         if (IS_ERR(root))
9831                 goto out;
9832
9833         key.objectid = objectid;
9834         key.type = BTRFS_EXTENT_DATA_KEY;
9835         /*
9836          * It can be nasty as data backref offset is
9837          * file offset - file extent offset, which is smaller or
9838          * equal to original backref offset.  The only special case is
9839          * overflow.  So we need to special check and do further search.
9840          */
9841         key.offset = offset & (1ULL << 63) ? 0 : offset;
9842
9843         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9844         if (ret < 0)
9845                 goto out;
9846
9847         /*
9848          * Search afterwards to get correct one
9849          * NOTE: As we must do a comprehensive check on the data backref to
9850          * make sure the dref count also matches, we must iterate all file
9851          * extents for that inode.
9852          */
9853         while (1) {
9854                 leaf = path.nodes[0];
9855                 slot = path.slots[0];
9856
9857                 btrfs_item_key_to_cpu(leaf, &key, slot);
9858                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9859                         break;
9860                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9861                 /*
9862                  * Except normal disk bytenr and disk num bytes, we still
9863                  * need to do extra check on dbackref offset as
9864                  * dbackref offset = file_offset - file_extent_offset
9865                  */
9866                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9867                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9868                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9869                     offset)
9870                         found_count++;
9871
9872                 ret = btrfs_next_item(root, &path);
9873                 if (ret)
9874                         break;
9875         }
9876 out:
9877         btrfs_release_path(&path);
9878         if (found_count != count) {
9879                 error(
9880 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9881                         bytenr, len, root_id, objectid, offset, count, found_count);
9882                 return REFERENCER_MISSING;
9883         }
9884         return 0;
9885 }
9886
9887 /*
9888  * Check if the referencer of a shared data backref exists
9889  */
9890 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9891                                      u64 parent, u64 bytenr)
9892 {
9893         struct extent_buffer *eb;
9894         struct btrfs_key key;
9895         struct btrfs_file_extent_item *fi;
9896         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9897         u32 nr;
9898         int found_parent = 0;
9899         int i;
9900
9901         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9902         if (!extent_buffer_uptodate(eb))
9903                 goto out;
9904
9905         nr = btrfs_header_nritems(eb);
9906         for (i = 0; i < nr; i++) {
9907                 btrfs_item_key_to_cpu(eb, &key, i);
9908                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9909                         continue;
9910
9911                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9912                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9913                         continue;
9914
9915                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9916                         found_parent = 1;
9917                         break;
9918                 }
9919         }
9920
9921 out:
9922         free_extent_buffer(eb);
9923         if (!found_parent) {
9924                 error("shared extent %llu referencer lost (parent: %llu)",
9925                         bytenr, parent);
9926                 return REFERENCER_MISSING;
9927         }
9928         return 0;
9929 }
9930
9931 /*
9932  * This function will check a given extent item, including its backref and
9933  * itself (like crossing stripe boundary and type)
9934  *
9935  * Since we don't use extent_record anymore, introduce new error bit
9936  */
9937 static int check_extent_item(struct btrfs_fs_info *fs_info,
9938                              struct extent_buffer *eb, int slot)
9939 {
9940         struct btrfs_extent_item *ei;
9941         struct btrfs_extent_inline_ref *iref;
9942         struct btrfs_extent_data_ref *dref;
9943         unsigned long end;
9944         unsigned long ptr;
9945         int type;
9946         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9947         u32 item_size = btrfs_item_size_nr(eb, slot);
9948         u64 flags;
9949         u64 offset;
9950         int metadata = 0;
9951         int level;
9952         struct btrfs_key key;
9953         int ret;
9954         int err = 0;
9955
9956         btrfs_item_key_to_cpu(eb, &key, slot);
9957         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9958                 bytes_used += key.offset;
9959         else
9960                 bytes_used += nodesize;
9961
9962         if (item_size < sizeof(*ei)) {
9963                 /*
9964                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9965                  * old thing when on disk format is still un-determined.
9966                  * No need to care about it anymore
9967                  */
9968                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9969                 return -ENOTTY;
9970         }
9971
9972         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9973         flags = btrfs_extent_flags(eb, ei);
9974
9975         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9976                 metadata = 1;
9977         if (metadata && check_crossing_stripes(global_info, key.objectid,
9978                                                eb->len)) {
9979                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9980                       key.objectid, key.objectid + nodesize);
9981                 err |= CROSSING_STRIPE_BOUNDARY;
9982         }
9983
9984         ptr = (unsigned long)(ei + 1);
9985
9986         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9987                 /* Old EXTENT_ITEM metadata */
9988                 struct btrfs_tree_block_info *info;
9989
9990                 info = (struct btrfs_tree_block_info *)ptr;
9991                 level = btrfs_tree_block_level(eb, info);
9992                 ptr += sizeof(struct btrfs_tree_block_info);
9993         } else {
9994                 /* New METADATA_ITEM */
9995                 level = key.offset;
9996         }
9997         end = (unsigned long)ei + item_size;
9998
9999         if (ptr >= end) {
10000                 err |= ITEM_SIZE_MISMATCH;
10001                 goto out;
10002         }
10003
10004         /* Now check every backref in this extent item */
10005 next:
10006         iref = (struct btrfs_extent_inline_ref *)ptr;
10007         type = btrfs_extent_inline_ref_type(eb, iref);
10008         offset = btrfs_extent_inline_ref_offset(eb, iref);
10009         switch (type) {
10010         case BTRFS_TREE_BLOCK_REF_KEY:
10011                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10012                                                level);
10013                 err |= ret;
10014                 break;
10015         case BTRFS_SHARED_BLOCK_REF_KEY:
10016                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10017                                                  level);
10018                 err |= ret;
10019                 break;
10020         case BTRFS_EXTENT_DATA_REF_KEY:
10021                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10022                 ret = check_extent_data_backref(fs_info,
10023                                 btrfs_extent_data_ref_root(eb, dref),
10024                                 btrfs_extent_data_ref_objectid(eb, dref),
10025                                 btrfs_extent_data_ref_offset(eb, dref),
10026                                 key.objectid, key.offset,
10027                                 btrfs_extent_data_ref_count(eb, dref));
10028                 err |= ret;
10029                 break;
10030         case BTRFS_SHARED_DATA_REF_KEY:
10031                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10032                 err |= ret;
10033                 break;
10034         default:
10035                 error("extent[%llu %d %llu] has unknown ref type: %d",
10036                         key.objectid, key.type, key.offset, type);
10037                 err |= UNKNOWN_TYPE;
10038                 goto out;
10039         }
10040
10041         ptr += btrfs_extent_inline_ref_size(type);
10042         if (ptr < end)
10043                 goto next;
10044
10045 out:
10046         return err;
10047 }
10048
10049 /*
10050  * Check if a dev extent item is referred correctly by its chunk
10051  */
10052 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10053                                  struct extent_buffer *eb, int slot)
10054 {
10055         struct btrfs_root *chunk_root = fs_info->chunk_root;
10056         struct btrfs_dev_extent *ptr;
10057         struct btrfs_path path;
10058         struct btrfs_key chunk_key;
10059         struct btrfs_key devext_key;
10060         struct btrfs_chunk *chunk;
10061         struct extent_buffer *l;
10062         int num_stripes;
10063         u64 length;
10064         int i;
10065         int found_chunk = 0;
10066         int ret;
10067
10068         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10069         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10070         length = btrfs_dev_extent_length(eb, ptr);
10071
10072         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10073         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10074         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10075
10076         btrfs_init_path(&path);
10077         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10078         if (ret)
10079                 goto out;
10080
10081         l = path.nodes[0];
10082         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10083         if (btrfs_chunk_length(l, chunk) != length)
10084                 goto out;
10085
10086         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10087         for (i = 0; i < num_stripes; i++) {
10088                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10089                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10090
10091                 if (devid == devext_key.objectid &&
10092                     offset == devext_key.offset) {
10093                         found_chunk = 1;
10094                         break;
10095                 }
10096         }
10097 out:
10098         btrfs_release_path(&path);
10099         if (!found_chunk) {
10100                 error(
10101                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10102                         devext_key.objectid, devext_key.offset, length);
10103                 return REFERENCER_MISSING;
10104         }
10105         return 0;
10106 }
10107
10108 /*
10109  * Check if the used space is correct with the dev item
10110  */
10111 static int check_dev_item(struct btrfs_fs_info *fs_info,
10112                           struct extent_buffer *eb, int slot)
10113 {
10114         struct btrfs_root *dev_root = fs_info->dev_root;
10115         struct btrfs_dev_item *dev_item;
10116         struct btrfs_path path;
10117         struct btrfs_key key;
10118         struct btrfs_dev_extent *ptr;
10119         u64 dev_id;
10120         u64 used;
10121         u64 total = 0;
10122         int ret;
10123
10124         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10125         dev_id = btrfs_device_id(eb, dev_item);
10126         used = btrfs_device_bytes_used(eb, dev_item);
10127
10128         key.objectid = dev_id;
10129         key.type = BTRFS_DEV_EXTENT_KEY;
10130         key.offset = 0;
10131
10132         btrfs_init_path(&path);
10133         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10134         if (ret < 0) {
10135                 btrfs_item_key_to_cpu(eb, &key, slot);
10136                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10137                         key.objectid, key.type, key.offset);
10138                 btrfs_release_path(&path);
10139                 return REFERENCER_MISSING;
10140         }
10141
10142         /* Iterate dev_extents to calculate the used space of a device */
10143         while (1) {
10144                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10145
10146                 if (key.objectid > dev_id)
10147                         break;
10148                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10149                         goto next;
10150
10151                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10152                                      struct btrfs_dev_extent);
10153                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10154 next:
10155                 ret = btrfs_next_item(dev_root, &path);
10156                 if (ret)
10157                         break;
10158         }
10159         btrfs_release_path(&path);
10160
10161         if (used != total) {
10162                 btrfs_item_key_to_cpu(eb, &key, slot);
10163                 error(
10164 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10165                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10166                         BTRFS_DEV_EXTENT_KEY, dev_id);
10167                 return ACCOUNTING_MISMATCH;
10168         }
10169         return 0;
10170 }
10171
10172 /*
10173  * Check a block group item with its referener (chunk) and its used space
10174  * with extent/metadata item
10175  */
10176 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10177                                   struct extent_buffer *eb, int slot)
10178 {
10179         struct btrfs_root *extent_root = fs_info->extent_root;
10180         struct btrfs_root *chunk_root = fs_info->chunk_root;
10181         struct btrfs_block_group_item *bi;
10182         struct btrfs_block_group_item bg_item;
10183         struct btrfs_path path;
10184         struct btrfs_key bg_key;
10185         struct btrfs_key chunk_key;
10186         struct btrfs_key extent_key;
10187         struct btrfs_chunk *chunk;
10188         struct extent_buffer *leaf;
10189         struct btrfs_extent_item *ei;
10190         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10191         u64 flags;
10192         u64 bg_flags;
10193         u64 used;
10194         u64 total = 0;
10195         int ret;
10196         int err = 0;
10197
10198         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10199         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10200         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10201         used = btrfs_block_group_used(&bg_item);
10202         bg_flags = btrfs_block_group_flags(&bg_item);
10203
10204         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10205         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10206         chunk_key.offset = bg_key.objectid;
10207
10208         btrfs_init_path(&path);
10209         /* Search for the referencer chunk */
10210         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10211         if (ret) {
10212                 error(
10213                 "block group[%llu %llu] did not find the related chunk item",
10214                         bg_key.objectid, bg_key.offset);
10215                 err |= REFERENCER_MISSING;
10216         } else {
10217                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10218                                         struct btrfs_chunk);
10219                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10220                                                 bg_key.offset) {
10221                         error(
10222         "block group[%llu %llu] related chunk item length does not match",
10223                                 bg_key.objectid, bg_key.offset);
10224                         err |= REFERENCER_MISMATCH;
10225                 }
10226         }
10227         btrfs_release_path(&path);
10228
10229         /* Search from the block group bytenr */
10230         extent_key.objectid = bg_key.objectid;
10231         extent_key.type = 0;
10232         extent_key.offset = 0;
10233
10234         btrfs_init_path(&path);
10235         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10236         if (ret < 0)
10237                 goto out;
10238
10239         /* Iterate extent tree to account used space */
10240         while (1) {
10241                 leaf = path.nodes[0];
10242                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10243                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10244                         break;
10245
10246                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10247                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10248                         goto next;
10249                 if (extent_key.objectid < bg_key.objectid)
10250                         goto next;
10251
10252                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10253                         total += nodesize;
10254                 else
10255                         total += extent_key.offset;
10256
10257                 ei = btrfs_item_ptr(leaf, path.slots[0],
10258                                     struct btrfs_extent_item);
10259                 flags = btrfs_extent_flags(leaf, ei);
10260                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10261                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10262                                 error(
10263                         "bad extent[%llu, %llu) type mismatch with chunk",
10264                                         extent_key.objectid,
10265                                         extent_key.objectid + extent_key.offset);
10266                                 err |= CHUNK_TYPE_MISMATCH;
10267                         }
10268                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10269                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10270                                     BTRFS_BLOCK_GROUP_METADATA))) {
10271                                 error(
10272                         "bad extent[%llu, %llu) type mismatch with chunk",
10273                                         extent_key.objectid,
10274                                         extent_key.objectid + nodesize);
10275                                 err |= CHUNK_TYPE_MISMATCH;
10276                         }
10277                 }
10278 next:
10279                 ret = btrfs_next_item(extent_root, &path);
10280                 if (ret)
10281                         break;
10282         }
10283
10284 out:
10285         btrfs_release_path(&path);
10286
10287         if (total != used) {
10288                 error(
10289                 "block group[%llu %llu] used %llu but extent items used %llu",
10290                         bg_key.objectid, bg_key.offset, used, total);
10291                 err |= ACCOUNTING_MISMATCH;
10292         }
10293         return err;
10294 }
10295
10296 /*
10297  * Check a chunk item.
10298  * Including checking all referred dev_extents and block group
10299  */
10300 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10301                             struct extent_buffer *eb, int slot)
10302 {
10303         struct btrfs_root *extent_root = fs_info->extent_root;
10304         struct btrfs_root *dev_root = fs_info->dev_root;
10305         struct btrfs_path path;
10306         struct btrfs_key chunk_key;
10307         struct btrfs_key bg_key;
10308         struct btrfs_key devext_key;
10309         struct btrfs_chunk *chunk;
10310         struct extent_buffer *leaf;
10311         struct btrfs_block_group_item *bi;
10312         struct btrfs_block_group_item bg_item;
10313         struct btrfs_dev_extent *ptr;
10314         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10315         u64 length;
10316         u64 chunk_end;
10317         u64 type;
10318         u64 profile;
10319         int num_stripes;
10320         u64 offset;
10321         u64 objectid;
10322         int i;
10323         int ret;
10324         int err = 0;
10325
10326         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10327         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10328         length = btrfs_chunk_length(eb, chunk);
10329         chunk_end = chunk_key.offset + length;
10330         if (!IS_ALIGNED(length, sectorsize)) {
10331                 error("chunk[%llu %llu) not aligned to %u",
10332                         chunk_key.offset, chunk_end, sectorsize);
10333                 err |= BYTES_UNALIGNED;
10334                 goto out;
10335         }
10336
10337         type = btrfs_chunk_type(eb, chunk);
10338         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10339         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10340                 error("chunk[%llu %llu) has no chunk type",
10341                         chunk_key.offset, chunk_end);
10342                 err |= UNKNOWN_TYPE;
10343         }
10344         if (profile && (profile & (profile - 1))) {
10345                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10346                         chunk_key.offset, chunk_end, profile);
10347                 err |= UNKNOWN_TYPE;
10348         }
10349
10350         bg_key.objectid = chunk_key.offset;
10351         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10352         bg_key.offset = length;
10353
10354         btrfs_init_path(&path);
10355         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10356         if (ret) {
10357                 error(
10358                 "chunk[%llu %llu) did not find the related block group item",
10359                         chunk_key.offset, chunk_end);
10360                 err |= REFERENCER_MISSING;
10361         } else{
10362                 leaf = path.nodes[0];
10363                 bi = btrfs_item_ptr(leaf, path.slots[0],
10364                                     struct btrfs_block_group_item);
10365                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10366                                    sizeof(bg_item));
10367                 if (btrfs_block_group_flags(&bg_item) != type) {
10368                         error(
10369 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10370                                 chunk_key.offset, chunk_end, type,
10371                                 btrfs_block_group_flags(&bg_item));
10372                         err |= REFERENCER_MISSING;
10373                 }
10374         }
10375
10376         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10377         for (i = 0; i < num_stripes; i++) {
10378                 btrfs_release_path(&path);
10379                 btrfs_init_path(&path);
10380                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10381                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10382                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10383
10384                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10385                                         0, 0);
10386                 if (ret)
10387                         goto not_match_dev;
10388
10389                 leaf = path.nodes[0];
10390                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10391                                      struct btrfs_dev_extent);
10392                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10393                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10394                 if (objectid != chunk_key.objectid ||
10395                     offset != chunk_key.offset ||
10396                     btrfs_dev_extent_length(leaf, ptr) != length)
10397                         goto not_match_dev;
10398                 continue;
10399 not_match_dev:
10400                 err |= BACKREF_MISSING;
10401                 error(
10402                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10403                         chunk_key.objectid, chunk_end, i);
10404                 continue;
10405         }
10406         btrfs_release_path(&path);
10407 out:
10408         return err;
10409 }
10410
10411 /*
10412  * Main entry function to check known items and update related accounting info
10413  */
10414 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10415 {
10416         struct btrfs_fs_info *fs_info = root->fs_info;
10417         struct btrfs_key key;
10418         int slot = 0;
10419         int type;
10420         struct btrfs_extent_data_ref *dref;
10421         int ret;
10422         int err = 0;
10423
10424 next:
10425         btrfs_item_key_to_cpu(eb, &key, slot);
10426         type = key.type;
10427
10428         switch (type) {
10429         case BTRFS_EXTENT_DATA_KEY:
10430                 ret = check_extent_data_item(root, eb, slot);
10431                 err |= ret;
10432                 break;
10433         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10434                 ret = check_block_group_item(fs_info, eb, slot);
10435                 err |= ret;
10436                 break;
10437         case BTRFS_DEV_ITEM_KEY:
10438                 ret = check_dev_item(fs_info, eb, slot);
10439                 err |= ret;
10440                 break;
10441         case BTRFS_CHUNK_ITEM_KEY:
10442                 ret = check_chunk_item(fs_info, eb, slot);
10443                 err |= ret;
10444                 break;
10445         case BTRFS_DEV_EXTENT_KEY:
10446                 ret = check_dev_extent_item(fs_info, eb, slot);
10447                 err |= ret;
10448                 break;
10449         case BTRFS_EXTENT_ITEM_KEY:
10450         case BTRFS_METADATA_ITEM_KEY:
10451                 ret = check_extent_item(fs_info, eb, slot);
10452                 err |= ret;
10453                 break;
10454         case BTRFS_EXTENT_CSUM_KEY:
10455                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10456                 break;
10457         case BTRFS_TREE_BLOCK_REF_KEY:
10458                 ret = check_tree_block_backref(fs_info, key.offset,
10459                                                key.objectid, -1);
10460                 err |= ret;
10461                 break;
10462         case BTRFS_EXTENT_DATA_REF_KEY:
10463                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10464                 ret = check_extent_data_backref(fs_info,
10465                                 btrfs_extent_data_ref_root(eb, dref),
10466                                 btrfs_extent_data_ref_objectid(eb, dref),
10467                                 btrfs_extent_data_ref_offset(eb, dref),
10468                                 key.objectid, 0,
10469                                 btrfs_extent_data_ref_count(eb, dref));
10470                 err |= ret;
10471                 break;
10472         case BTRFS_SHARED_BLOCK_REF_KEY:
10473                 ret = check_shared_block_backref(fs_info, key.offset,
10474                                                  key.objectid, -1);
10475                 err |= ret;
10476                 break;
10477         case BTRFS_SHARED_DATA_REF_KEY:
10478                 ret = check_shared_data_backref(fs_info, key.offset,
10479                                                 key.objectid);
10480                 err |= ret;
10481                 break;
10482         default:
10483                 break;
10484         }
10485
10486         if (++slot < btrfs_header_nritems(eb))
10487                 goto next;
10488
10489         return err;
10490 }
10491
10492 /*
10493  * Helper function for later fs/subvol tree check.  To determine if a tree
10494  * block should be checked.
10495  * This function will ensure only the direct referencer with lowest rootid to
10496  * check a fs/subvolume tree block.
10497  *
10498  * Backref check at extent tree would detect errors like missing subvolume
10499  * tree, so we can do aggressive check to reduce duplicated checks.
10500  */
10501 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10502 {
10503         struct btrfs_root *extent_root = root->fs_info->extent_root;
10504         struct btrfs_key key;
10505         struct btrfs_path path;
10506         struct extent_buffer *leaf;
10507         int slot;
10508         struct btrfs_extent_item *ei;
10509         unsigned long ptr;
10510         unsigned long end;
10511         int type;
10512         u32 item_size;
10513         u64 offset;
10514         struct btrfs_extent_inline_ref *iref;
10515         int ret;
10516
10517         btrfs_init_path(&path);
10518         key.objectid = btrfs_header_bytenr(eb);
10519         key.type = BTRFS_METADATA_ITEM_KEY;
10520         key.offset = (u64)-1;
10521
10522         /*
10523          * Any failure in backref resolving means we can't determine
10524          * whom the tree block belongs to.
10525          * So in that case, we need to check that tree block
10526          */
10527         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10528         if (ret < 0)
10529                 goto need_check;
10530
10531         ret = btrfs_previous_extent_item(extent_root, &path,
10532                                          btrfs_header_bytenr(eb));
10533         if (ret)
10534                 goto need_check;
10535
10536         leaf = path.nodes[0];
10537         slot = path.slots[0];
10538         btrfs_item_key_to_cpu(leaf, &key, slot);
10539         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10540
10541         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10542                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10543         } else {
10544                 struct btrfs_tree_block_info *info;
10545
10546                 info = (struct btrfs_tree_block_info *)(ei + 1);
10547                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10548         }
10549
10550         item_size = btrfs_item_size_nr(leaf, slot);
10551         ptr = (unsigned long)iref;
10552         end = (unsigned long)ei + item_size;
10553         while (ptr < end) {
10554                 iref = (struct btrfs_extent_inline_ref *)ptr;
10555                 type = btrfs_extent_inline_ref_type(leaf, iref);
10556                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10557
10558                 /*
10559                  * We only check the tree block if current root is
10560                  * the lowest referencer of it.
10561                  */
10562                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10563                     offset < root->objectid) {
10564                         btrfs_release_path(&path);
10565                         return 0;
10566                 }
10567
10568                 ptr += btrfs_extent_inline_ref_size(type);
10569         }
10570         /*
10571          * Normally we should also check keyed tree block ref, but that may be
10572          * very time consuming.  Inlined ref should already make us skip a lot
10573          * of refs now.  So skip search keyed tree block ref.
10574          */
10575
10576 need_check:
10577         btrfs_release_path(&path);
10578         return 1;
10579 }
10580
10581 /*
10582  * Traversal function for tree block. We will do:
10583  * 1) Skip shared fs/subvolume tree blocks
10584  * 2) Update related bytes accounting
10585  * 3) Pre-order traversal
10586  */
10587 static int traverse_tree_block(struct btrfs_root *root,
10588                                 struct extent_buffer *node)
10589 {
10590         struct extent_buffer *eb;
10591         struct btrfs_key key;
10592         struct btrfs_key drop_key;
10593         int level;
10594         u64 nr;
10595         int i;
10596         int err = 0;
10597         int ret;
10598
10599         /*
10600          * Skip shared fs/subvolume tree block, in that case they will
10601          * be checked by referencer with lowest rootid
10602          */
10603         if (is_fstree(root->objectid) && !should_check(root, node))
10604                 return 0;
10605
10606         /* Update bytes accounting */
10607         total_btree_bytes += node->len;
10608         if (fs_root_objectid(btrfs_header_owner(node)))
10609                 total_fs_tree_bytes += node->len;
10610         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10611                 total_extent_tree_bytes += node->len;
10612         if (!found_old_backref &&
10613             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10614             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10615             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10616                 found_old_backref = 1;
10617
10618         /* pre-order tranversal, check itself first */
10619         level = btrfs_header_level(node);
10620         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10621                                    btrfs_header_level(node),
10622                                    btrfs_header_owner(node));
10623         err |= ret;
10624         if (err)
10625                 error(
10626         "check %s failed root %llu bytenr %llu level %d, force continue check",
10627                         level ? "node":"leaf", root->objectid,
10628                         btrfs_header_bytenr(node), btrfs_header_level(node));
10629
10630         if (!level) {
10631                 btree_space_waste += btrfs_leaf_free_space(root, node);
10632                 ret = check_leaf_items(root, node);
10633                 err |= ret;
10634                 return err;
10635         }
10636
10637         nr = btrfs_header_nritems(node);
10638         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10639         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10640                 sizeof(struct btrfs_key_ptr);
10641
10642         /* Then check all its children */
10643         for (i = 0; i < nr; i++) {
10644                 u64 blocknr = btrfs_node_blockptr(node, i);
10645
10646                 btrfs_node_key_to_cpu(node, &key, i);
10647                 if (level == root->root_item.drop_level &&
10648                     is_dropped_key(&key, &drop_key))
10649                         continue;
10650
10651                 /*
10652                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10653                  * to call the function itself.
10654                  */
10655                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10656                 if (extent_buffer_uptodate(eb)) {
10657                         ret = traverse_tree_block(root, eb);
10658                         err |= ret;
10659                 }
10660                 free_extent_buffer(eb);
10661         }
10662
10663         return err;
10664 }
10665
10666 /*
10667  * Low memory usage version check_chunks_and_extents.
10668  */
10669 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10670 {
10671         struct btrfs_path path;
10672         struct btrfs_key key;
10673         struct btrfs_root *root1;
10674         struct btrfs_root *cur_root;
10675         int err = 0;
10676         int ret;
10677
10678         root1 = root->fs_info->chunk_root;
10679         ret = traverse_tree_block(root1, root1->node);
10680         err |= ret;
10681
10682         root1 = root->fs_info->tree_root;
10683         ret = traverse_tree_block(root1, root1->node);
10684         err |= ret;
10685
10686         btrfs_init_path(&path);
10687         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10688         key.offset = 0;
10689         key.type = BTRFS_ROOT_ITEM_KEY;
10690
10691         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10692         if (ret) {
10693                 error("cannot find extent treet in tree_root");
10694                 goto out;
10695         }
10696
10697         while (1) {
10698                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10699                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10700                         goto next;
10701                 key.offset = (u64)-1;
10702
10703                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10704                 if (IS_ERR(cur_root) || !cur_root) {
10705                         error("failed to read tree: %lld", key.objectid);
10706                         goto next;
10707                 }
10708
10709                 ret = traverse_tree_block(cur_root, cur_root->node);
10710                 err |= ret;
10711
10712 next:
10713                 ret = btrfs_next_item(root1, &path);
10714                 if (ret)
10715                         goto out;
10716         }
10717
10718 out:
10719         btrfs_release_path(&path);
10720         return err;
10721 }
10722
10723 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10724                            struct btrfs_root *root, int overwrite)
10725 {
10726         struct extent_buffer *c;
10727         struct extent_buffer *old = root->node;
10728         int level;
10729         int ret;
10730         struct btrfs_disk_key disk_key = {0,0,0};
10731
10732         level = 0;
10733
10734         if (overwrite) {
10735                 c = old;
10736                 extent_buffer_get(c);
10737                 goto init;
10738         }
10739         c = btrfs_alloc_free_block(trans, root,
10740                                    root->nodesize,
10741                                    root->root_key.objectid,
10742                                    &disk_key, level, 0, 0);
10743         if (IS_ERR(c)) {
10744                 c = old;
10745                 extent_buffer_get(c);
10746                 overwrite = 1;
10747         }
10748 init:
10749         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10750         btrfs_set_header_level(c, level);
10751         btrfs_set_header_bytenr(c, c->start);
10752         btrfs_set_header_generation(c, trans->transid);
10753         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10754         btrfs_set_header_owner(c, root->root_key.objectid);
10755
10756         write_extent_buffer(c, root->fs_info->fsid,
10757                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10758
10759         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10760                             btrfs_header_chunk_tree_uuid(c),
10761                             BTRFS_UUID_SIZE);
10762
10763         btrfs_mark_buffer_dirty(c);
10764         /*
10765          * this case can happen in the following case:
10766          *
10767          * 1.overwrite previous root.
10768          *
10769          * 2.reinit reloc data root, this is because we skip pin
10770          * down reloc data tree before which means we can allocate
10771          * same block bytenr here.
10772          */
10773         if (old->start == c->start) {
10774                 btrfs_set_root_generation(&root->root_item,
10775                                           trans->transid);
10776                 root->root_item.level = btrfs_header_level(root->node);
10777                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10778                                         &root->root_key, &root->root_item);
10779                 if (ret) {
10780                         free_extent_buffer(c);
10781                         return ret;
10782                 }
10783         }
10784         free_extent_buffer(old);
10785         root->node = c;
10786         add_root_to_dirty_list(root);
10787         return 0;
10788 }
10789
10790 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10791                                 struct extent_buffer *eb, int tree_root)
10792 {
10793         struct extent_buffer *tmp;
10794         struct btrfs_root_item *ri;
10795         struct btrfs_key key;
10796         u64 bytenr;
10797         u32 nodesize;
10798         int level = btrfs_header_level(eb);
10799         int nritems;
10800         int ret;
10801         int i;
10802
10803         /*
10804          * If we have pinned this block before, don't pin it again.
10805          * This can not only avoid forever loop with broken filesystem
10806          * but also give us some speedups.
10807          */
10808         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10809                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10810                 return 0;
10811
10812         btrfs_pin_extent(fs_info, eb->start, eb->len);
10813
10814         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10815         nritems = btrfs_header_nritems(eb);
10816         for (i = 0; i < nritems; i++) {
10817                 if (level == 0) {
10818                         btrfs_item_key_to_cpu(eb, &key, i);
10819                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10820                                 continue;
10821                         /* Skip the extent root and reloc roots */
10822                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10823                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10824                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10825                                 continue;
10826                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10827                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10828
10829                         /*
10830                          * If at any point we start needing the real root we
10831                          * will have to build a stump root for the root we are
10832                          * in, but for now this doesn't actually use the root so
10833                          * just pass in extent_root.
10834                          */
10835                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10836                                               nodesize, 0);
10837                         if (!extent_buffer_uptodate(tmp)) {
10838                                 fprintf(stderr, "Error reading root block\n");
10839                                 return -EIO;
10840                         }
10841                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10842                         free_extent_buffer(tmp);
10843                         if (ret)
10844                                 return ret;
10845                 } else {
10846                         bytenr = btrfs_node_blockptr(eb, i);
10847
10848                         /* If we aren't the tree root don't read the block */
10849                         if (level == 1 && !tree_root) {
10850                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10851                                 continue;
10852                         }
10853
10854                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10855                                               nodesize, 0);
10856                         if (!extent_buffer_uptodate(tmp)) {
10857                                 fprintf(stderr, "Error reading tree block\n");
10858                                 return -EIO;
10859                         }
10860                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10861                         free_extent_buffer(tmp);
10862                         if (ret)
10863                                 return ret;
10864                 }
10865         }
10866
10867         return 0;
10868 }
10869
10870 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10871 {
10872         int ret;
10873
10874         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10875         if (ret)
10876                 return ret;
10877
10878         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10879 }
10880
10881 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10882 {
10883         struct btrfs_block_group_cache *cache;
10884         struct btrfs_path path;
10885         struct extent_buffer *leaf;
10886         struct btrfs_chunk *chunk;
10887         struct btrfs_key key;
10888         int ret;
10889         u64 start;
10890
10891         btrfs_init_path(&path);
10892         key.objectid = 0;
10893         key.type = BTRFS_CHUNK_ITEM_KEY;
10894         key.offset = 0;
10895         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10896         if (ret < 0) {
10897                 btrfs_release_path(&path);
10898                 return ret;
10899         }
10900
10901         /*
10902          * We do this in case the block groups were screwed up and had alloc
10903          * bits that aren't actually set on the chunks.  This happens with
10904          * restored images every time and could happen in real life I guess.
10905          */
10906         fs_info->avail_data_alloc_bits = 0;
10907         fs_info->avail_metadata_alloc_bits = 0;
10908         fs_info->avail_system_alloc_bits = 0;
10909
10910         /* First we need to create the in-memory block groups */
10911         while (1) {
10912                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10913                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10914                         if (ret < 0) {
10915                                 btrfs_release_path(&path);
10916                                 return ret;
10917                         }
10918                         if (ret) {
10919                                 ret = 0;
10920                                 break;
10921                         }
10922                 }
10923                 leaf = path.nodes[0];
10924                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10925                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10926                         path.slots[0]++;
10927                         continue;
10928                 }
10929
10930                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10931                 btrfs_add_block_group(fs_info, 0,
10932                                       btrfs_chunk_type(leaf, chunk),
10933                                       key.objectid, key.offset,
10934                                       btrfs_chunk_length(leaf, chunk));
10935                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10936                                  key.offset + btrfs_chunk_length(leaf, chunk),
10937                                  GFP_NOFS);
10938                 path.slots[0]++;
10939         }
10940         start = 0;
10941         while (1) {
10942                 cache = btrfs_lookup_first_block_group(fs_info, start);
10943                 if (!cache)
10944                         break;
10945                 cache->cached = 1;
10946                 start = cache->key.objectid + cache->key.offset;
10947         }
10948
10949         btrfs_release_path(&path);
10950         return 0;
10951 }
10952
10953 static int reset_balance(struct btrfs_trans_handle *trans,
10954                          struct btrfs_fs_info *fs_info)
10955 {
10956         struct btrfs_root *root = fs_info->tree_root;
10957         struct btrfs_path path;
10958         struct extent_buffer *leaf;
10959         struct btrfs_key key;
10960         int del_slot, del_nr = 0;
10961         int ret;
10962         int found = 0;
10963
10964         btrfs_init_path(&path);
10965         key.objectid = BTRFS_BALANCE_OBJECTID;
10966         key.type = BTRFS_BALANCE_ITEM_KEY;
10967         key.offset = 0;
10968         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10969         if (ret) {
10970                 if (ret > 0)
10971                         ret = 0;
10972                 if (!ret)
10973                         goto reinit_data_reloc;
10974                 else
10975                         goto out;
10976         }
10977
10978         ret = btrfs_del_item(trans, root, &path);
10979         if (ret)
10980                 goto out;
10981         btrfs_release_path(&path);
10982
10983         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10984         key.type = BTRFS_ROOT_ITEM_KEY;
10985         key.offset = 0;
10986         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10987         if (ret < 0)
10988                 goto out;
10989         while (1) {
10990                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10991                         if (!found)
10992                                 break;
10993
10994                         if (del_nr) {
10995                                 ret = btrfs_del_items(trans, root, &path,
10996                                                       del_slot, del_nr);
10997                                 del_nr = 0;
10998                                 if (ret)
10999                                         goto out;
11000                         }
11001                         key.offset++;
11002                         btrfs_release_path(&path);
11003
11004                         found = 0;
11005                         ret = btrfs_search_slot(trans, root, &key, &path,
11006                                                 -1, 1);
11007                         if (ret < 0)
11008                                 goto out;
11009                         continue;
11010                 }
11011                 found = 1;
11012                 leaf = path.nodes[0];
11013                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11014                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11015                         break;
11016                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11017                         path.slots[0]++;
11018                         continue;
11019                 }
11020                 if (!del_nr) {
11021                         del_slot = path.slots[0];
11022                         del_nr = 1;
11023                 } else {
11024                         del_nr++;
11025                 }
11026                 path.slots[0]++;
11027         }
11028
11029         if (del_nr) {
11030                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11031                 if (ret)
11032                         goto out;
11033         }
11034         btrfs_release_path(&path);
11035
11036 reinit_data_reloc:
11037         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11038         key.type = BTRFS_ROOT_ITEM_KEY;
11039         key.offset = (u64)-1;
11040         root = btrfs_read_fs_root(fs_info, &key);
11041         if (IS_ERR(root)) {
11042                 fprintf(stderr, "Error reading data reloc tree\n");
11043                 ret = PTR_ERR(root);
11044                 goto out;
11045         }
11046         record_root_in_trans(trans, root);
11047         ret = btrfs_fsck_reinit_root(trans, root, 0);
11048         if (ret)
11049                 goto out;
11050         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11051 out:
11052         btrfs_release_path(&path);
11053         return ret;
11054 }
11055
11056 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11057                               struct btrfs_fs_info *fs_info)
11058 {
11059         u64 start = 0;
11060         int ret;
11061
11062         /*
11063          * The only reason we don't do this is because right now we're just
11064          * walking the trees we find and pinning down their bytes, we don't look
11065          * at any of the leaves.  In order to do mixed groups we'd have to check
11066          * the leaves of any fs roots and pin down the bytes for any file
11067          * extents we find.  Not hard but why do it if we don't have to?
11068          */
11069         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11070                 fprintf(stderr, "We don't support re-initing the extent tree "
11071                         "for mixed block groups yet, please notify a btrfs "
11072                         "developer you want to do this so they can add this "
11073                         "functionality.\n");
11074                 return -EINVAL;
11075         }
11076
11077         /*
11078          * first we need to walk all of the trees except the extent tree and pin
11079          * down the bytes that are in use so we don't overwrite any existing
11080          * metadata.
11081          */
11082         ret = pin_metadata_blocks(fs_info);
11083         if (ret) {
11084                 fprintf(stderr, "error pinning down used bytes\n");
11085                 return ret;
11086         }
11087
11088         /*
11089          * Need to drop all the block groups since we're going to recreate all
11090          * of them again.
11091          */
11092         btrfs_free_block_groups(fs_info);
11093         ret = reset_block_groups(fs_info);
11094         if (ret) {
11095                 fprintf(stderr, "error resetting the block groups\n");
11096                 return ret;
11097         }
11098
11099         /* Ok we can allocate now, reinit the extent root */
11100         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11101         if (ret) {
11102                 fprintf(stderr, "extent root initialization failed\n");
11103                 /*
11104                  * When the transaction code is updated we should end the
11105                  * transaction, but for now progs only knows about commit so
11106                  * just return an error.
11107                  */
11108                 return ret;
11109         }
11110
11111         /*
11112          * Now we have all the in-memory block groups setup so we can make
11113          * allocations properly, and the metadata we care about is safe since we
11114          * pinned all of it above.
11115          */
11116         while (1) {
11117                 struct btrfs_block_group_cache *cache;
11118
11119                 cache = btrfs_lookup_first_block_group(fs_info, start);
11120                 if (!cache)
11121                         break;
11122                 start = cache->key.objectid + cache->key.offset;
11123                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11124                                         &cache->key, &cache->item,
11125                                         sizeof(cache->item));
11126                 if (ret) {
11127                         fprintf(stderr, "Error adding block group\n");
11128                         return ret;
11129                 }
11130                 btrfs_extent_post_op(trans, fs_info->extent_root);
11131         }
11132
11133         ret = reset_balance(trans, fs_info);
11134         if (ret)
11135                 fprintf(stderr, "error resetting the pending balance\n");
11136
11137         return ret;
11138 }
11139
11140 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11141 {
11142         struct btrfs_path path;
11143         struct btrfs_trans_handle *trans;
11144         struct btrfs_key key;
11145         int ret;
11146
11147         printf("Recowing metadata block %llu\n", eb->start);
11148         key.objectid = btrfs_header_owner(eb);
11149         key.type = BTRFS_ROOT_ITEM_KEY;
11150         key.offset = (u64)-1;
11151
11152         root = btrfs_read_fs_root(root->fs_info, &key);
11153         if (IS_ERR(root)) {
11154                 fprintf(stderr, "Couldn't find owner root %llu\n",
11155                         key.objectid);
11156                 return PTR_ERR(root);
11157         }
11158
11159         trans = btrfs_start_transaction(root, 1);
11160         if (IS_ERR(trans))
11161                 return PTR_ERR(trans);
11162
11163         btrfs_init_path(&path);
11164         path.lowest_level = btrfs_header_level(eb);
11165         if (path.lowest_level)
11166                 btrfs_node_key_to_cpu(eb, &key, 0);
11167         else
11168                 btrfs_item_key_to_cpu(eb, &key, 0);
11169
11170         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11171         btrfs_commit_transaction(trans, root);
11172         btrfs_release_path(&path);
11173         return ret;
11174 }
11175
11176 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11177 {
11178         struct btrfs_path path;
11179         struct btrfs_trans_handle *trans;
11180         struct btrfs_key key;
11181         int ret;
11182
11183         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11184                bad->key.type, bad->key.offset);
11185         key.objectid = bad->root_id;
11186         key.type = BTRFS_ROOT_ITEM_KEY;
11187         key.offset = (u64)-1;
11188
11189         root = btrfs_read_fs_root(root->fs_info, &key);
11190         if (IS_ERR(root)) {
11191                 fprintf(stderr, "Couldn't find owner root %llu\n",
11192                         key.objectid);
11193                 return PTR_ERR(root);
11194         }
11195
11196         trans = btrfs_start_transaction(root, 1);
11197         if (IS_ERR(trans))
11198                 return PTR_ERR(trans);
11199
11200         btrfs_init_path(&path);
11201         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11202         if (ret) {
11203                 if (ret > 0)
11204                         ret = 0;
11205                 goto out;
11206         }
11207         ret = btrfs_del_item(trans, root, &path);
11208 out:
11209         btrfs_commit_transaction(trans, root);
11210         btrfs_release_path(&path);
11211         return ret;
11212 }
11213
11214 static int zero_log_tree(struct btrfs_root *root)
11215 {
11216         struct btrfs_trans_handle *trans;
11217         int ret;
11218
11219         trans = btrfs_start_transaction(root, 1);
11220         if (IS_ERR(trans)) {
11221                 ret = PTR_ERR(trans);
11222                 return ret;
11223         }
11224         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11225         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11226         ret = btrfs_commit_transaction(trans, root);
11227         return ret;
11228 }
11229
11230 static int populate_csum(struct btrfs_trans_handle *trans,
11231                          struct btrfs_root *csum_root, char *buf, u64 start,
11232                          u64 len)
11233 {
11234         u64 offset = 0;
11235         u64 sectorsize;
11236         int ret = 0;
11237
11238         while (offset < len) {
11239                 sectorsize = csum_root->sectorsize;
11240                 ret = read_extent_data(csum_root, buf, start + offset,
11241                                        &sectorsize, 0);
11242                 if (ret)
11243                         break;
11244                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11245                                             start + offset, buf, sectorsize);
11246                 if (ret)
11247                         break;
11248                 offset += sectorsize;
11249         }
11250         return ret;
11251 }
11252
11253 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11254                                       struct btrfs_root *csum_root,
11255                                       struct btrfs_root *cur_root)
11256 {
11257         struct btrfs_path path;
11258         struct btrfs_key key;
11259         struct extent_buffer *node;
11260         struct btrfs_file_extent_item *fi;
11261         char *buf = NULL;
11262         u64 start = 0;
11263         u64 len = 0;
11264         int slot = 0;
11265         int ret = 0;
11266
11267         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11268         if (!buf)
11269                 return -ENOMEM;
11270
11271         btrfs_init_path(&path);
11272         key.objectid = 0;
11273         key.offset = 0;
11274         key.type = 0;
11275         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11276         if (ret < 0)
11277                 goto out;
11278         /* Iterate all regular file extents and fill its csum */
11279         while (1) {
11280                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11281
11282                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11283                         goto next;
11284                 node = path.nodes[0];
11285                 slot = path.slots[0];
11286                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11287                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11288                         goto next;
11289                 start = btrfs_file_extent_disk_bytenr(node, fi);
11290                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11291
11292                 ret = populate_csum(trans, csum_root, buf, start, len);
11293                 if (ret == -EEXIST)
11294                         ret = 0;
11295                 if (ret < 0)
11296                         goto out;
11297 next:
11298                 /*
11299                  * TODO: if next leaf is corrupted, jump to nearest next valid
11300                  * leaf.
11301                  */
11302                 ret = btrfs_next_item(cur_root, &path);
11303                 if (ret < 0)
11304                         goto out;
11305                 if (ret > 0) {
11306                         ret = 0;
11307                         goto out;
11308                 }
11309         }
11310
11311 out:
11312         btrfs_release_path(&path);
11313         free(buf);
11314         return ret;
11315 }
11316
11317 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11318                                   struct btrfs_root *csum_root)
11319 {
11320         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11321         struct btrfs_path path;
11322         struct btrfs_root *tree_root = fs_info->tree_root;
11323         struct btrfs_root *cur_root;
11324         struct extent_buffer *node;
11325         struct btrfs_key key;
11326         int slot = 0;
11327         int ret = 0;
11328
11329         btrfs_init_path(&path);
11330         key.objectid = BTRFS_FS_TREE_OBJECTID;
11331         key.offset = 0;
11332         key.type = BTRFS_ROOT_ITEM_KEY;
11333         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11334         if (ret < 0)
11335                 goto out;
11336         if (ret > 0) {
11337                 ret = -ENOENT;
11338                 goto out;
11339         }
11340
11341         while (1) {
11342                 node = path.nodes[0];
11343                 slot = path.slots[0];
11344                 btrfs_item_key_to_cpu(node, &key, slot);
11345                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11346                         goto out;
11347                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11348                         goto next;
11349                 if (!is_fstree(key.objectid))
11350                         goto next;
11351                 key.offset = (u64)-1;
11352
11353                 cur_root = btrfs_read_fs_root(fs_info, &key);
11354                 if (IS_ERR(cur_root) || !cur_root) {
11355                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11356                                 key.objectid);
11357                         goto out;
11358                 }
11359                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11360                                 cur_root);
11361                 if (ret < 0)
11362                         goto out;
11363 next:
11364                 ret = btrfs_next_item(tree_root, &path);
11365                 if (ret > 0) {
11366                         ret = 0;
11367                         goto out;
11368                 }
11369                 if (ret < 0)
11370                         goto out;
11371         }
11372
11373 out:
11374         btrfs_release_path(&path);
11375         return ret;
11376 }
11377
11378 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11379                                       struct btrfs_root *csum_root)
11380 {
11381         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11382         struct btrfs_path path;
11383         struct btrfs_extent_item *ei;
11384         struct extent_buffer *leaf;
11385         char *buf;
11386         struct btrfs_key key;
11387         int ret;
11388
11389         btrfs_init_path(&path);
11390         key.objectid = 0;
11391         key.type = BTRFS_EXTENT_ITEM_KEY;
11392         key.offset = 0;
11393         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11394         if (ret < 0) {
11395                 btrfs_release_path(&path);
11396                 return ret;
11397         }
11398
11399         buf = malloc(csum_root->sectorsize);
11400         if (!buf) {
11401                 btrfs_release_path(&path);
11402                 return -ENOMEM;
11403         }
11404
11405         while (1) {
11406                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11407                         ret = btrfs_next_leaf(extent_root, &path);
11408                         if (ret < 0)
11409                                 break;
11410                         if (ret) {
11411                                 ret = 0;
11412                                 break;
11413                         }
11414                 }
11415                 leaf = path.nodes[0];
11416
11417                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11418                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11419                         path.slots[0]++;
11420                         continue;
11421                 }
11422
11423                 ei = btrfs_item_ptr(leaf, path.slots[0],
11424                                     struct btrfs_extent_item);
11425                 if (!(btrfs_extent_flags(leaf, ei) &
11426                       BTRFS_EXTENT_FLAG_DATA)) {
11427                         path.slots[0]++;
11428                         continue;
11429                 }
11430
11431                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11432                                     key.offset);
11433                 if (ret)
11434                         break;
11435                 path.slots[0]++;
11436         }
11437
11438         btrfs_release_path(&path);
11439         free(buf);
11440         return ret;
11441 }
11442
11443 /*
11444  * Recalculate the csum and put it into the csum tree.
11445  *
11446  * Extent tree init will wipe out all the extent info, so in that case, we
11447  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11448  * will use fs/subvol trees to init the csum tree.
11449  */
11450 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11451                           struct btrfs_root *csum_root,
11452                           int search_fs_tree)
11453 {
11454         if (search_fs_tree)
11455                 return fill_csum_tree_from_fs(trans, csum_root);
11456         else
11457                 return fill_csum_tree_from_extent(trans, csum_root);
11458 }
11459
11460 static void free_roots_info_cache(void)
11461 {
11462         if (!roots_info_cache)
11463                 return;
11464
11465         while (!cache_tree_empty(roots_info_cache)) {
11466                 struct cache_extent *entry;
11467                 struct root_item_info *rii;
11468
11469                 entry = first_cache_extent(roots_info_cache);
11470                 if (!entry)
11471                         break;
11472                 remove_cache_extent(roots_info_cache, entry);
11473                 rii = container_of(entry, struct root_item_info, cache_extent);
11474                 free(rii);
11475         }
11476
11477         free(roots_info_cache);
11478         roots_info_cache = NULL;
11479 }
11480
11481 static int build_roots_info_cache(struct btrfs_fs_info *info)
11482 {
11483         int ret = 0;
11484         struct btrfs_key key;
11485         struct extent_buffer *leaf;
11486         struct btrfs_path path;
11487
11488         if (!roots_info_cache) {
11489                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11490                 if (!roots_info_cache)
11491                         return -ENOMEM;
11492                 cache_tree_init(roots_info_cache);
11493         }
11494
11495         btrfs_init_path(&path);
11496         key.objectid = 0;
11497         key.type = BTRFS_EXTENT_ITEM_KEY;
11498         key.offset = 0;
11499         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11500         if (ret < 0)
11501                 goto out;
11502         leaf = path.nodes[0];
11503
11504         while (1) {
11505                 struct btrfs_key found_key;
11506                 struct btrfs_extent_item *ei;
11507                 struct btrfs_extent_inline_ref *iref;
11508                 int slot = path.slots[0];
11509                 int type;
11510                 u64 flags;
11511                 u64 root_id;
11512                 u8 level;
11513                 struct cache_extent *entry;
11514                 struct root_item_info *rii;
11515
11516                 if (slot >= btrfs_header_nritems(leaf)) {
11517                         ret = btrfs_next_leaf(info->extent_root, &path);
11518                         if (ret < 0) {
11519                                 break;
11520                         } else if (ret) {
11521                                 ret = 0;
11522                                 break;
11523                         }
11524                         leaf = path.nodes[0];
11525                         slot = path.slots[0];
11526                 }
11527
11528                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11529
11530                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11531                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11532                         goto next;
11533
11534                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11535                 flags = btrfs_extent_flags(leaf, ei);
11536
11537                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11538                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11539                         goto next;
11540
11541                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11542                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11543                         level = found_key.offset;
11544                 } else {
11545                         struct btrfs_tree_block_info *binfo;
11546
11547                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11548                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11549                         level = btrfs_tree_block_level(leaf, binfo);
11550                 }
11551
11552                 /*
11553                  * For a root extent, it must be of the following type and the
11554                  * first (and only one) iref in the item.
11555                  */
11556                 type = btrfs_extent_inline_ref_type(leaf, iref);
11557                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11558                         goto next;
11559
11560                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11561                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11562                 if (!entry) {
11563                         rii = malloc(sizeof(struct root_item_info));
11564                         if (!rii) {
11565                                 ret = -ENOMEM;
11566                                 goto out;
11567                         }
11568                         rii->cache_extent.start = root_id;
11569                         rii->cache_extent.size = 1;
11570                         rii->level = (u8)-1;
11571                         entry = &rii->cache_extent;
11572                         ret = insert_cache_extent(roots_info_cache, entry);
11573                         ASSERT(ret == 0);
11574                 } else {
11575                         rii = container_of(entry, struct root_item_info,
11576                                            cache_extent);
11577                 }
11578
11579                 ASSERT(rii->cache_extent.start == root_id);
11580                 ASSERT(rii->cache_extent.size == 1);
11581
11582                 if (level > rii->level || rii->level == (u8)-1) {
11583                         rii->level = level;
11584                         rii->bytenr = found_key.objectid;
11585                         rii->gen = btrfs_extent_generation(leaf, ei);
11586                         rii->node_count = 1;
11587                 } else if (level == rii->level) {
11588                         rii->node_count++;
11589                 }
11590 next:
11591                 path.slots[0]++;
11592         }
11593
11594 out:
11595         btrfs_release_path(&path);
11596
11597         return ret;
11598 }
11599
11600 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11601                                   struct btrfs_path *path,
11602                                   const struct btrfs_key *root_key,
11603                                   const int read_only_mode)
11604 {
11605         const u64 root_id = root_key->objectid;
11606         struct cache_extent *entry;
11607         struct root_item_info *rii;
11608         struct btrfs_root_item ri;
11609         unsigned long offset;
11610
11611         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11612         if (!entry) {
11613                 fprintf(stderr,
11614                         "Error: could not find extent items for root %llu\n",
11615                         root_key->objectid);
11616                 return -ENOENT;
11617         }
11618
11619         rii = container_of(entry, struct root_item_info, cache_extent);
11620         ASSERT(rii->cache_extent.start == root_id);
11621         ASSERT(rii->cache_extent.size == 1);
11622
11623         if (rii->node_count != 1) {
11624                 fprintf(stderr,
11625                         "Error: could not find btree root extent for root %llu\n",
11626                         root_id);
11627                 return -ENOENT;
11628         }
11629
11630         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11631         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11632
11633         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11634             btrfs_root_level(&ri) != rii->level ||
11635             btrfs_root_generation(&ri) != rii->gen) {
11636
11637                 /*
11638                  * If we're in repair mode but our caller told us to not update
11639                  * the root item, i.e. just check if it needs to be updated, don't
11640                  * print this message, since the caller will call us again shortly
11641                  * for the same root item without read only mode (the caller will
11642                  * open a transaction first).
11643                  */
11644                 if (!(read_only_mode && repair))
11645                         fprintf(stderr,
11646                                 "%sroot item for root %llu,"
11647                                 " current bytenr %llu, current gen %llu, current level %u,"
11648                                 " new bytenr %llu, new gen %llu, new level %u\n",
11649                                 (read_only_mode ? "" : "fixing "),
11650                                 root_id,
11651                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11652                                 btrfs_root_level(&ri),
11653                                 rii->bytenr, rii->gen, rii->level);
11654
11655                 if (btrfs_root_generation(&ri) > rii->gen) {
11656                         fprintf(stderr,
11657                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11658                                 root_id, btrfs_root_generation(&ri), rii->gen);
11659                         return -EINVAL;
11660                 }
11661
11662                 if (!read_only_mode) {
11663                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11664                         btrfs_set_root_level(&ri, rii->level);
11665                         btrfs_set_root_generation(&ri, rii->gen);
11666                         write_extent_buffer(path->nodes[0], &ri,
11667                                             offset, sizeof(ri));
11668                 }
11669
11670                 return 1;
11671         }
11672
11673         return 0;
11674 }
11675
11676 /*
11677  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11678  * caused read-only snapshots to be corrupted if they were created at a moment
11679  * when the source subvolume/snapshot had orphan items. The issue was that the
11680  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11681  * node instead of the post orphan cleanup root node.
11682  * So this function, and its callees, just detects and fixes those cases. Even
11683  * though the regression was for read-only snapshots, this function applies to
11684  * any snapshot/subvolume root.
11685  * This must be run before any other repair code - not doing it so, makes other
11686  * repair code delete or modify backrefs in the extent tree for example, which
11687  * will result in an inconsistent fs after repairing the root items.
11688  */
11689 static int repair_root_items(struct btrfs_fs_info *info)
11690 {
11691         struct btrfs_path path;
11692         struct btrfs_key key;
11693         struct extent_buffer *leaf;
11694         struct btrfs_trans_handle *trans = NULL;
11695         int ret = 0;
11696         int bad_roots = 0;
11697         int need_trans = 0;
11698
11699         btrfs_init_path(&path);
11700
11701         ret = build_roots_info_cache(info);
11702         if (ret)
11703                 goto out;
11704
11705         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11706         key.type = BTRFS_ROOT_ITEM_KEY;
11707         key.offset = 0;
11708
11709 again:
11710         /*
11711          * Avoid opening and committing transactions if a leaf doesn't have
11712          * any root items that need to be fixed, so that we avoid rotating
11713          * backup roots unnecessarily.
11714          */
11715         if (need_trans) {
11716                 trans = btrfs_start_transaction(info->tree_root, 1);
11717                 if (IS_ERR(trans)) {
11718                         ret = PTR_ERR(trans);
11719                         goto out;
11720                 }
11721         }
11722
11723         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11724                                 0, trans ? 1 : 0);
11725         if (ret < 0)
11726                 goto out;
11727         leaf = path.nodes[0];
11728
11729         while (1) {
11730                 struct btrfs_key found_key;
11731
11732                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11733                         int no_more_keys = find_next_key(&path, &key);
11734
11735                         btrfs_release_path(&path);
11736                         if (trans) {
11737                                 ret = btrfs_commit_transaction(trans,
11738                                                                info->tree_root);
11739                                 trans = NULL;
11740                                 if (ret < 0)
11741                                         goto out;
11742                         }
11743                         need_trans = 0;
11744                         if (no_more_keys)
11745                                 break;
11746                         goto again;
11747                 }
11748
11749                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11750
11751                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11752                         goto next;
11753                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11754                         goto next;
11755
11756                 ret = maybe_repair_root_item(info, &path, &found_key,
11757                                              trans ? 0 : 1);
11758                 if (ret < 0)
11759                         goto out;
11760                 if (ret) {
11761                         if (!trans && repair) {
11762                                 need_trans = 1;
11763                                 key = found_key;
11764                                 btrfs_release_path(&path);
11765                                 goto again;
11766                         }
11767                         bad_roots++;
11768                 }
11769 next:
11770                 path.slots[0]++;
11771         }
11772         ret = 0;
11773 out:
11774         free_roots_info_cache();
11775         btrfs_release_path(&path);
11776         if (trans)
11777                 btrfs_commit_transaction(trans, info->tree_root);
11778         if (ret < 0)
11779                 return ret;
11780
11781         return bad_roots;
11782 }
11783
11784 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11785 {
11786         struct btrfs_trans_handle *trans;
11787         struct btrfs_block_group_cache *bg_cache;
11788         u64 current = 0;
11789         int ret = 0;
11790
11791         /* Clear all free space cache inodes and its extent data */
11792         while (1) {
11793                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11794                 if (!bg_cache)
11795                         break;
11796                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11797                 if (ret < 0)
11798                         return ret;
11799                 current = bg_cache->key.objectid + bg_cache->key.offset;
11800         }
11801
11802         /* Don't forget to set cache_generation to -1 */
11803         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11804         if (IS_ERR(trans)) {
11805                 error("failed to update super block cache generation");
11806                 return PTR_ERR(trans);
11807         }
11808         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11809         btrfs_commit_transaction(trans, fs_info->tree_root);
11810
11811         return ret;
11812 }
11813
11814 const char * const cmd_check_usage[] = {
11815         "btrfs check [options] <device>",
11816         "Check structural integrity of a filesystem (unmounted).",
11817         "Check structural integrity of an unmounted filesystem. Verify internal",
11818         "trees' consistency and item connectivity. In the repair mode try to",
11819         "fix the problems found. ",
11820         "WARNING: the repair mode is considered dangerous",
11821         "",
11822         "-s|--super <superblock>     use this superblock copy",
11823         "-b|--backup                 use the first valid backup root copy",
11824         "--repair                    try to repair the filesystem",
11825         "--readonly                  run in read-only mode (default)",
11826         "--init-csum-tree            create a new CRC tree",
11827         "--init-extent-tree          create a new extent tree",
11828         "--mode <MODE>               allows choice of memory/IO trade-offs",
11829         "                            where MODE is one of:",
11830         "                            original - read inodes and extents to memory (requires",
11831         "                                       more memory, does less IO)",
11832         "                            lowmem   - try to use less memory but read blocks again",
11833         "                                       when needed",
11834         "--check-data-csum           verify checksums of data blocks",
11835         "-Q|--qgroup-report          print a report on qgroup consistency",
11836         "-E|--subvol-extents <subvolid>",
11837         "                            print subvolume extents and sharing state",
11838         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11839         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11840         "-p|--progress               indicate progress",
11841         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11842         NULL
11843 };
11844
11845 int cmd_check(int argc, char **argv)
11846 {
11847         struct cache_tree root_cache;
11848         struct btrfs_root *root;
11849         struct btrfs_fs_info *info;
11850         u64 bytenr = 0;
11851         u64 subvolid = 0;
11852         u64 tree_root_bytenr = 0;
11853         u64 chunk_root_bytenr = 0;
11854         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11855         int ret;
11856         u64 num;
11857         int init_csum_tree = 0;
11858         int readonly = 0;
11859         int clear_space_cache = 0;
11860         int qgroup_report = 0;
11861         int qgroups_repaired = 0;
11862         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11863
11864         while(1) {
11865                 int c;
11866                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11867                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11868                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11869                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11870                 static const struct option long_options[] = {
11871                         { "super", required_argument, NULL, 's' },
11872                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11873                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11874                         { "init-csum-tree", no_argument, NULL,
11875                                 GETOPT_VAL_INIT_CSUM },
11876                         { "init-extent-tree", no_argument, NULL,
11877                                 GETOPT_VAL_INIT_EXTENT },
11878                         { "check-data-csum", no_argument, NULL,
11879                                 GETOPT_VAL_CHECK_CSUM },
11880                         { "backup", no_argument, NULL, 'b' },
11881                         { "subvol-extents", required_argument, NULL, 'E' },
11882                         { "qgroup-report", no_argument, NULL, 'Q' },
11883                         { "tree-root", required_argument, NULL, 'r' },
11884                         { "chunk-root", required_argument, NULL,
11885                                 GETOPT_VAL_CHUNK_TREE },
11886                         { "progress", no_argument, NULL, 'p' },
11887                         { "mode", required_argument, NULL,
11888                                 GETOPT_VAL_MODE },
11889                         { "clear-space-cache", required_argument, NULL,
11890                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11891                         { NULL, 0, NULL, 0}
11892                 };
11893
11894                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11895                 if (c < 0)
11896                         break;
11897                 switch(c) {
11898                         case 'a': /* ignored */ break;
11899                         case 'b':
11900                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11901                                 break;
11902                         case 's':
11903                                 num = arg_strtou64(optarg);
11904                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11905                                         error(
11906                                         "super mirror should be less than %d",
11907                                                 BTRFS_SUPER_MIRROR_MAX);
11908                                         exit(1);
11909                                 }
11910                                 bytenr = btrfs_sb_offset(((int)num));
11911                                 printf("using SB copy %llu, bytenr %llu\n", num,
11912                                        (unsigned long long)bytenr);
11913                                 break;
11914                         case 'Q':
11915                                 qgroup_report = 1;
11916                                 break;
11917                         case 'E':
11918                                 subvolid = arg_strtou64(optarg);
11919                                 break;
11920                         case 'r':
11921                                 tree_root_bytenr = arg_strtou64(optarg);
11922                                 break;
11923                         case GETOPT_VAL_CHUNK_TREE:
11924                                 chunk_root_bytenr = arg_strtou64(optarg);
11925                                 break;
11926                         case 'p':
11927                                 ctx.progress_enabled = true;
11928                                 break;
11929                         case '?':
11930                         case 'h':
11931                                 usage(cmd_check_usage);
11932                         case GETOPT_VAL_REPAIR:
11933                                 printf("enabling repair mode\n");
11934                                 repair = 1;
11935                                 ctree_flags |= OPEN_CTREE_WRITES;
11936                                 break;
11937                         case GETOPT_VAL_READONLY:
11938                                 readonly = 1;
11939                                 break;
11940                         case GETOPT_VAL_INIT_CSUM:
11941                                 printf("Creating a new CRC tree\n");
11942                                 init_csum_tree = 1;
11943                                 repair = 1;
11944                                 ctree_flags |= OPEN_CTREE_WRITES;
11945                                 break;
11946                         case GETOPT_VAL_INIT_EXTENT:
11947                                 init_extent_tree = 1;
11948                                 ctree_flags |= (OPEN_CTREE_WRITES |
11949                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11950                                 repair = 1;
11951                                 break;
11952                         case GETOPT_VAL_CHECK_CSUM:
11953                                 check_data_csum = 1;
11954                                 break;
11955                         case GETOPT_VAL_MODE:
11956                                 check_mode = parse_check_mode(optarg);
11957                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11958                                         error("unknown mode: %s", optarg);
11959                                         exit(1);
11960                                 }
11961                                 break;
11962                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11963                                 if (strcmp(optarg, "v1") == 0) {
11964                                         clear_space_cache = 1;
11965                                 } else if (strcmp(optarg, "v2") == 0) {
11966                                         clear_space_cache = 2;
11967                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11968                                 } else {
11969                                         error(
11970                 "invalid argument to --clear-space-cache, must be v1 or v2");
11971                                         exit(1);
11972                                 }
11973                                 ctree_flags |= OPEN_CTREE_WRITES;
11974                                 break;
11975                 }
11976         }
11977
11978         if (check_argc_exact(argc - optind, 1))
11979                 usage(cmd_check_usage);
11980
11981         if (ctx.progress_enabled) {
11982                 ctx.tp = TASK_NOTHING;
11983                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11984         }
11985
11986         /* This check is the only reason for --readonly to exist */
11987         if (readonly && repair) {
11988                 error("repair options are not compatible with --readonly");
11989                 exit(1);
11990         }
11991
11992         /*
11993          * Not supported yet
11994          */
11995         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11996                 error("low memory mode doesn't support repair yet");
11997                 exit(1);
11998         }
11999
12000         radix_tree_init();
12001         cache_tree_init(&root_cache);
12002
12003         if((ret = check_mounted(argv[optind])) < 0) {
12004                 error("could not check mount status: %s", strerror(-ret));
12005                 goto err_out;
12006         } else if(ret) {
12007                 error("%s is currently mounted, aborting", argv[optind]);
12008                 ret = -EBUSY;
12009                 goto err_out;
12010         }
12011
12012         /* only allow partial opening under repair mode */
12013         if (repair)
12014                 ctree_flags |= OPEN_CTREE_PARTIAL;
12015
12016         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12017                                   chunk_root_bytenr, ctree_flags);
12018         if (!info) {
12019                 error("cannot open file system");
12020                 ret = -EIO;
12021                 goto err_out;
12022         }
12023
12024         global_info = info;
12025         root = info->fs_root;
12026         if (clear_space_cache == 1) {
12027                 if (btrfs_fs_compat_ro(info,
12028                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12029                         error(
12030                 "free space cache v2 detected, use --clear-space-cache v2");
12031                         ret = 1;
12032                         goto close_out;
12033                 }
12034                 printf("Clearing free space cache\n");
12035                 ret = clear_free_space_cache(info);
12036                 if (ret) {
12037                         error("failed to clear free space cache");
12038                         ret = 1;
12039                 } else {
12040                         printf("Free space cache cleared\n");
12041                 }
12042                 goto close_out;
12043         } else if (clear_space_cache == 2) {
12044                 if (!btrfs_fs_compat_ro(info,
12045                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12046                         printf("no free space cache v2 to clear\n");
12047                         ret = 0;
12048                         goto close_out;
12049                 }
12050                 printf("Clear free space cache v2\n");
12051                 ret = btrfs_clear_free_space_tree(info);
12052                 if (ret) {
12053                         error("failed to clear free space cache v2: %d", ret);
12054                         ret = 1;
12055                 } else {
12056                         printf("free space cache v2 cleared\n");
12057                 }
12058                 goto close_out;
12059         }
12060
12061         /*
12062          * repair mode will force us to commit transaction which
12063          * will make us fail to load log tree when mounting.
12064          */
12065         if (repair && btrfs_super_log_root(info->super_copy)) {
12066                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12067                 if (!ret) {
12068                         ret = 1;
12069                         goto close_out;
12070                 }
12071                 ret = zero_log_tree(root);
12072                 if (ret) {
12073                         error("failed to zero log tree: %d", ret);
12074                         goto close_out;
12075                 }
12076         }
12077
12078         uuid_unparse(info->super_copy->fsid, uuidbuf);
12079         if (qgroup_report) {
12080                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12081                        uuidbuf);
12082                 ret = qgroup_verify_all(info);
12083                 if (ret == 0)
12084                         report_qgroups(1);
12085                 goto close_out;
12086         }
12087         if (subvolid) {
12088                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12089                        subvolid, argv[optind], uuidbuf);
12090                 ret = print_extent_state(info, subvolid);
12091                 goto close_out;
12092         }
12093         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12094
12095         if (!extent_buffer_uptodate(info->tree_root->node) ||
12096             !extent_buffer_uptodate(info->dev_root->node) ||
12097             !extent_buffer_uptodate(info->chunk_root->node)) {
12098                 error("critical roots corrupted, unable to check the filesystem");
12099                 ret = -EIO;
12100                 goto close_out;
12101         }
12102
12103         if (init_extent_tree || init_csum_tree) {
12104                 struct btrfs_trans_handle *trans;
12105
12106                 trans = btrfs_start_transaction(info->extent_root, 0);
12107                 if (IS_ERR(trans)) {
12108                         error("error starting transaction");
12109                         ret = PTR_ERR(trans);
12110                         goto close_out;
12111                 }
12112
12113                 if (init_extent_tree) {
12114                         printf("Creating a new extent tree\n");
12115                         ret = reinit_extent_tree(trans, info);
12116                         if (ret)
12117                                 goto close_out;
12118                 }
12119
12120                 if (init_csum_tree) {
12121                         printf("Reinitialize checksum tree\n");
12122                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12123                         if (ret) {
12124                                 error("checksum tree initialization failed: %d",
12125                                                 ret);
12126                                 ret = -EIO;
12127                                 goto close_out;
12128                         }
12129
12130                         ret = fill_csum_tree(trans, info->csum_root,
12131                                              init_extent_tree);
12132                         if (ret) {
12133                                 error("checksum tree refilling failed: %d", ret);
12134                                 return -EIO;
12135                         }
12136                 }
12137                 /*
12138                  * Ok now we commit and run the normal fsck, which will add
12139                  * extent entries for all of the items it finds.
12140                  */
12141                 ret = btrfs_commit_transaction(trans, info->extent_root);
12142                 if (ret)
12143                         goto close_out;
12144         }
12145         if (!extent_buffer_uptodate(info->extent_root->node)) {
12146                 error("critical: extent_root, unable to check the filesystem");
12147                 ret = -EIO;
12148                 goto close_out;
12149         }
12150         if (!extent_buffer_uptodate(info->csum_root->node)) {
12151                 error("critical: csum_root, unable to check the filesystem");
12152                 ret = -EIO;
12153                 goto close_out;
12154         }
12155
12156         if (!ctx.progress_enabled)
12157                 fprintf(stderr, "checking extents\n");
12158         if (check_mode == CHECK_MODE_LOWMEM)
12159                 ret = check_chunks_and_extents_v2(root);
12160         else
12161                 ret = check_chunks_and_extents(root);
12162         if (ret)
12163                 error(
12164                 "errors found in extent allocation tree or chunk allocation");
12165
12166         ret = repair_root_items(info);
12167         if (ret < 0)
12168                 goto close_out;
12169         if (repair) {
12170                 fprintf(stderr, "Fixed %d roots.\n", ret);
12171                 ret = 0;
12172         } else if (ret > 0) {
12173                 fprintf(stderr,
12174                        "Found %d roots with an outdated root item.\n",
12175                        ret);
12176                 fprintf(stderr,
12177                         "Please run a filesystem check with the option --repair to fix them.\n");
12178                 ret = 1;
12179                 goto close_out;
12180         }
12181
12182         if (!ctx.progress_enabled) {
12183                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12184                         fprintf(stderr, "checking free space tree\n");
12185                 else
12186                         fprintf(stderr, "checking free space cache\n");
12187         }
12188         ret = check_space_cache(root);
12189         if (ret)
12190                 goto out;
12191
12192         /*
12193          * We used to have to have these hole extents in between our real
12194          * extents so if we don't have this flag set we need to make sure there
12195          * are no gaps in the file extents for inodes, otherwise we can just
12196          * ignore it when this happens.
12197          */
12198         no_holes = btrfs_fs_incompat(root->fs_info,
12199                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12200         if (!ctx.progress_enabled)
12201                 fprintf(stderr, "checking fs roots\n");
12202         ret = check_fs_roots(root, &root_cache);
12203         if (ret)
12204                 goto out;
12205
12206         fprintf(stderr, "checking csums\n");
12207         ret = check_csums(root);
12208         if (ret)
12209                 goto out;
12210
12211         fprintf(stderr, "checking root refs\n");
12212         ret = check_root_refs(root, &root_cache);
12213         if (ret)
12214                 goto out;
12215
12216         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12217                 struct extent_buffer *eb;
12218
12219                 eb = list_first_entry(&root->fs_info->recow_ebs,
12220                                       struct extent_buffer, recow);
12221                 list_del_init(&eb->recow);
12222                 ret = recow_extent_buffer(root, eb);
12223                 if (ret)
12224                         break;
12225         }
12226
12227         while (!list_empty(&delete_items)) {
12228                 struct bad_item *bad;
12229
12230                 bad = list_first_entry(&delete_items, struct bad_item, list);
12231                 list_del_init(&bad->list);
12232                 if (repair)
12233                         ret = delete_bad_item(root, bad);
12234                 free(bad);
12235         }
12236
12237         if (info->quota_enabled) {
12238                 int err;
12239                 fprintf(stderr, "checking quota groups\n");
12240                 err = qgroup_verify_all(info);
12241                 if (err)
12242                         goto out;
12243                 report_qgroups(0);
12244                 err = repair_qgroups(info, &qgroups_repaired);
12245                 if (err)
12246                         goto out;
12247         }
12248
12249         if (!list_empty(&root->fs_info->recow_ebs)) {
12250                 error("transid errors in file system");
12251                 ret = 1;
12252         }
12253 out:
12254         /* Don't override original ret */
12255         if (!ret && qgroups_repaired)
12256                 ret = qgroups_repaired;
12257
12258         if (found_old_backref) { /*
12259                  * there was a disk format change when mixed
12260                  * backref was in testing tree. The old format
12261                  * existed about one week.
12262                  */
12263                 printf("\n * Found old mixed backref format. "
12264                        "The old format is not supported! *"
12265                        "\n * Please mount the FS in readonly mode, "
12266                        "backup data and re-format the FS. *\n\n");
12267                 ret = 1;
12268         }
12269         printf("found %llu bytes used err is %d\n",
12270                (unsigned long long)bytes_used, ret);
12271         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12272         printf("total tree bytes: %llu\n",
12273                (unsigned long long)total_btree_bytes);
12274         printf("total fs tree bytes: %llu\n",
12275                (unsigned long long)total_fs_tree_bytes);
12276         printf("total extent tree bytes: %llu\n",
12277                (unsigned long long)total_extent_tree_bytes);
12278         printf("btree space waste bytes: %llu\n",
12279                (unsigned long long)btree_space_waste);
12280         printf("file data blocks allocated: %llu\n referenced %llu\n",
12281                 (unsigned long long)data_bytes_allocated,
12282                 (unsigned long long)data_bytes_referenced);
12283
12284         free_qgroup_counts();
12285         free_root_recs_tree(&root_cache);
12286 close_out:
12287         close_ctree(root);
12288 err_out:
12289         if (ctx.progress_enabled)
12290                 task_deinit(ctx.info);
12291
12292         return ret;
12293 }