btrfs-progs: check: introduce function to check fs root
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
3843 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
3844 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
3845
3846 /*
3847  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3848  * INODE_REF/INODE_EXTREF match.
3849  *
3850  * @root:       the root of the fs/file tree
3851  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3852  * @key:        the key of the DIR_ITEM/DIR_INDEX
3853  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3854  *              distinguish root_dir between normal dir/file
3855  * @name:       the name in the INODE_REF/INODE_EXTREF
3856  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3857  * @mode:       the st_mode of INODE_ITEM
3858  *
3859  * Return 0 if no error occurred.
3860  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3861  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3862  * dir/file.
3863  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3864  * not match for normal dir/file.
3865  */
3866 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3867                          struct btrfs_key *key, u64 index, char *name,
3868                          u32 namelen, u32 mode)
3869 {
3870         struct btrfs_path path;
3871         struct extent_buffer *node;
3872         struct btrfs_dir_item *di;
3873         struct btrfs_key location;
3874         char namebuf[BTRFS_NAME_LEN] = {0};
3875         u32 total;
3876         u32 cur = 0;
3877         u32 len;
3878         u32 name_len;
3879         u32 data_len;
3880         u8 filetype;
3881         int slot;
3882         int ret;
3883
3884         btrfs_init_path(&path);
3885         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3886         if (ret < 0) {
3887                 ret = DIR_ITEM_MISSING;
3888                 goto out;
3889         }
3890
3891         /* Process root dir and goto out*/
3892         if (index == 0) {
3893                 if (ret == 0) {
3894                         ret = ROOT_DIR_ERROR;
3895                         error(
3896                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3897                                 root->objectid,
3898                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3899                                         "REF" : "EXTREF",
3900                                 ref_key->objectid, ref_key->offset,
3901                                 key->type == BTRFS_DIR_ITEM_KEY ?
3902                                         "DIR_ITEM" : "DIR_INDEX");
3903                 } else {
3904                         ret = 0;
3905                 }
3906
3907                 goto out;
3908         }
3909
3910         /* Process normal file/dir */
3911         if (ret > 0) {
3912                 ret = DIR_ITEM_MISSING;
3913                 error(
3914                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3915                         root->objectid,
3916                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3917                         ref_key->objectid, ref_key->offset,
3918                         key->type == BTRFS_DIR_ITEM_KEY ?
3919                                 "DIR_ITEM" : "DIR_INDEX",
3920                         key->objectid, key->offset, namelen, name,
3921                         imode_to_type(mode));
3922                 goto out;
3923         }
3924
3925         /* Check whether inode_id/filetype/name match */
3926         node = path.nodes[0];
3927         slot = path.slots[0];
3928         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3929         total = btrfs_item_size_nr(node, slot);
3930         while (cur < total) {
3931                 ret = DIR_ITEM_MISMATCH;
3932                 name_len = btrfs_dir_name_len(node, di);
3933                 data_len = btrfs_dir_data_len(node, di);
3934
3935                 btrfs_dir_item_key_to_cpu(node, di, &location);
3936                 if (location.objectid != ref_key->objectid ||
3937                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3938                     location.offset != 0)
3939                         goto next;
3940
3941                 filetype = btrfs_dir_type(node, di);
3942                 if (imode_to_type(mode) != filetype)
3943                         goto next;
3944
3945                 if (name_len <= BTRFS_NAME_LEN) {
3946                         len = name_len;
3947                 } else {
3948                         len = BTRFS_NAME_LEN;
3949                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3950                         root->objectid,
3951                         key->type == BTRFS_DIR_ITEM_KEY ?
3952                         "DIR_ITEM" : "DIR_INDEX",
3953                         key->objectid, key->offset, name_len);
3954                 }
3955                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3956                 if (len != namelen || strncmp(namebuf, name, len))
3957                         goto next;
3958
3959                 ret = 0;
3960                 goto out;
3961 next:
3962                 len = sizeof(*di) + name_len + data_len;
3963                 di = (struct btrfs_dir_item *)((char *)di + len);
3964                 cur += len;
3965         }
3966         if (ret == DIR_ITEM_MISMATCH)
3967                 error(
3968                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3969                         root->objectid,
3970                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3971                         ref_key->objectid, ref_key->offset,
3972                         key->type == BTRFS_DIR_ITEM_KEY ?
3973                                 "DIR_ITEM" : "DIR_INDEX",
3974                         key->objectid, key->offset, namelen, name,
3975                         imode_to_type(mode));
3976 out:
3977         btrfs_release_path(&path);
3978         return ret;
3979 }
3980
3981 /*
3982  * Traverse the given INODE_REF and call find_dir_item() to find related
3983  * DIR_ITEM/DIR_INDEX.
3984  *
3985  * @root:       the root of the fs/file tree
3986  * @ref_key:    the key of the INODE_REF
3987  * @refs:       the count of INODE_REF
3988  * @mode:       the st_mode of INODE_ITEM
3989  *
3990  * Return 0 if no error occurred.
3991  */
3992 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3993                            struct extent_buffer *node, int slot, u64 *refs,
3994                            int mode)
3995 {
3996         struct btrfs_key key;
3997         struct btrfs_inode_ref *ref;
3998         char namebuf[BTRFS_NAME_LEN] = {0};
3999         u32 total;
4000         u32 cur = 0;
4001         u32 len;
4002         u32 name_len;
4003         u64 index;
4004         int ret, err = 0;
4005
4006         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4007         total = btrfs_item_size_nr(node, slot);
4008
4009 next:
4010         /* Update inode ref count */
4011         (*refs)++;
4012
4013         index = btrfs_inode_ref_index(node, ref);
4014         name_len = btrfs_inode_ref_name_len(node, ref);
4015         if (name_len <= BTRFS_NAME_LEN) {
4016                 len = name_len;
4017         } else {
4018                 len = BTRFS_NAME_LEN;
4019                 warning("root %llu INODE_REF[%llu %llu] name too long",
4020                         root->objectid, ref_key->objectid, ref_key->offset);
4021         }
4022
4023         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4024
4025         /* Check root dir ref name */
4026         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4027                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4028                       root->objectid, ref_key->objectid, ref_key->offset,
4029                       namebuf);
4030                 err |= ROOT_DIR_ERROR;
4031         }
4032
4033         /* Find related DIR_INDEX */
4034         key.objectid = ref_key->offset;
4035         key.type = BTRFS_DIR_INDEX_KEY;
4036         key.offset = index;
4037         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4038         err |= ret;
4039
4040         /* Find related dir_item */
4041         key.objectid = ref_key->offset;
4042         key.type = BTRFS_DIR_ITEM_KEY;
4043         key.offset = btrfs_name_hash(namebuf, len);
4044         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4045         err |= ret;
4046
4047         len = sizeof(*ref) + name_len;
4048         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4049         cur += len;
4050         if (cur < total)
4051                 goto next;
4052
4053         return err;
4054 }
4055
4056 /*
4057  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4058  * DIR_ITEM/DIR_INDEX.
4059  *
4060  * @root:       the root of the fs/file tree
4061  * @ref_key:    the key of the INODE_EXTREF
4062  * @refs:       the count of INODE_EXTREF
4063  * @mode:       the st_mode of INODE_ITEM
4064  *
4065  * Return 0 if no error occurred.
4066  */
4067 static int check_inode_extref(struct btrfs_root *root,
4068                               struct btrfs_key *ref_key,
4069                               struct extent_buffer *node, int slot, u64 *refs,
4070                               int mode)
4071 {
4072         struct btrfs_key key;
4073         struct btrfs_inode_extref *extref;
4074         char namebuf[BTRFS_NAME_LEN] = {0};
4075         u32 total;
4076         u32 cur = 0;
4077         u32 len;
4078         u32 name_len;
4079         u64 index;
4080         u64 parent;
4081         int ret;
4082         int err = 0;
4083
4084         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4085         total = btrfs_item_size_nr(node, slot);
4086
4087 next:
4088         /* update inode ref count */
4089         (*refs)++;
4090         name_len = btrfs_inode_extref_name_len(node, extref);
4091         index = btrfs_inode_extref_index(node, extref);
4092         parent = btrfs_inode_extref_parent(node, extref);
4093         if (name_len <= BTRFS_NAME_LEN) {
4094                 len = name_len;
4095         } else {
4096                 len = BTRFS_NAME_LEN;
4097                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4098                         root->objectid, ref_key->objectid, ref_key->offset);
4099         }
4100         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4101
4102         /* Check root dir ref name */
4103         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4104                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4105                       root->objectid, ref_key->objectid, ref_key->offset,
4106                       namebuf);
4107                 err |= ROOT_DIR_ERROR;
4108         }
4109
4110         /* find related dir_index */
4111         key.objectid = parent;
4112         key.type = BTRFS_DIR_INDEX_KEY;
4113         key.offset = index;
4114         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4115         err |= ret;
4116
4117         /* find related dir_item */
4118         key.objectid = parent;
4119         key.type = BTRFS_DIR_ITEM_KEY;
4120         key.offset = btrfs_name_hash(namebuf, len);
4121         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4122         err |= ret;
4123
4124         len = sizeof(*extref) + name_len;
4125         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4126         cur += len;
4127
4128         if (cur < total)
4129                 goto next;
4130
4131         return err;
4132 }
4133
4134 /*
4135  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4136  * DIR_ITEM/DIR_INDEX match.
4137  *
4138  * @root:       the root of the fs/file tree
4139  * @key:        the key of the INODE_REF/INODE_EXTREF
4140  * @name:       the name in the INODE_REF/INODE_EXTREF
4141  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4142  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4143  * to (u64)-1
4144  * @ext_ref:    the EXTENDED_IREF feature
4145  *
4146  * Return 0 if no error occurred.
4147  * Return >0 for error bitmap
4148  */
4149 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4150                           char *name, int namelen, u64 index,
4151                           unsigned int ext_ref)
4152 {
4153         struct btrfs_path path;
4154         struct btrfs_inode_ref *ref;
4155         struct btrfs_inode_extref *extref;
4156         struct extent_buffer *node;
4157         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4158         u32 total;
4159         u32 cur = 0;
4160         u32 len;
4161         u32 ref_namelen;
4162         u64 ref_index;
4163         u64 parent;
4164         u64 dir_id;
4165         int slot;
4166         int ret;
4167
4168         btrfs_init_path(&path);
4169         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170         if (ret) {
4171                 ret = INODE_REF_MISSING;
4172                 goto extref;
4173         }
4174
4175         node = path.nodes[0];
4176         slot = path.slots[0];
4177
4178         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4179         total = btrfs_item_size_nr(node, slot);
4180
4181         /* Iterate all entry of INODE_REF */
4182         while (cur < total) {
4183                 ret = INODE_REF_MISSING;
4184
4185                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4186                 ref_index = btrfs_inode_ref_index(node, ref);
4187                 if (index != (u64)-1 && index != ref_index)
4188                         goto next_ref;
4189
4190                 if (ref_namelen <= BTRFS_NAME_LEN) {
4191                         len = ref_namelen;
4192                 } else {
4193                         len = BTRFS_NAME_LEN;
4194                         warning("root %llu INODE %s[%llu %llu] name too long",
4195                                 root->objectid,
4196                                 key->type == BTRFS_INODE_REF_KEY ?
4197                                         "REF" : "EXTREF",
4198                                 key->objectid, key->offset);
4199                 }
4200                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4201                                    len);
4202
4203                 if (len != namelen || strncmp(ref_namebuf, name, len))
4204                         goto next_ref;
4205
4206                 ret = 0;
4207                 goto out;
4208 next_ref:
4209                 len = sizeof(*ref) + ref_namelen;
4210                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4211                 cur += len;
4212         }
4213
4214 extref:
4215         /* Skip if not support EXTENDED_IREF feature */
4216         if (!ext_ref)
4217                 goto out;
4218
4219         btrfs_release_path(&path);
4220         btrfs_init_path(&path);
4221
4222         dir_id = key->offset;
4223         key->type = BTRFS_INODE_EXTREF_KEY;
4224         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4225
4226         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4227         if (ret) {
4228                 ret = INODE_REF_MISSING;
4229                 goto out;
4230         }
4231
4232         node = path.nodes[0];
4233         slot = path.slots[0];
4234
4235         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4236         cur = 0;
4237         total = btrfs_item_size_nr(node, slot);
4238
4239         /* Iterate all entry of INODE_EXTREF */
4240         while (cur < total) {
4241                 ret = INODE_REF_MISSING;
4242
4243                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4244                 ref_index = btrfs_inode_extref_index(node, extref);
4245                 parent = btrfs_inode_extref_parent(node, extref);
4246                 if (index != (u64)-1 && index != ref_index)
4247                         goto next_extref;
4248
4249                 if (parent != dir_id)
4250                         goto next_extref;
4251
4252                 if (ref_namelen <= BTRFS_NAME_LEN) {
4253                         len = ref_namelen;
4254                 } else {
4255                         len = BTRFS_NAME_LEN;
4256                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4257                                 root->objectid,
4258                                 key->type == BTRFS_INODE_REF_KEY ?
4259                                         "REF" : "EXTREF",
4260                                 key->objectid, key->offset);
4261                 }
4262                 read_extent_buffer(node, ref_namebuf,
4263                                    (unsigned long)(extref + 1), len);
4264
4265                 if (len != namelen || strncmp(ref_namebuf, name, len))
4266                         goto next_extref;
4267
4268                 ret = 0;
4269                 goto out;
4270
4271 next_extref:
4272                 len = sizeof(*extref) + ref_namelen;
4273                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4274                 cur += len;
4275
4276         }
4277 out:
4278         btrfs_release_path(&path);
4279         return ret;
4280 }
4281
4282 /*
4283  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4284  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4285  *
4286  * @root:       the root of the fs/file tree
4287  * @key:        the key of the INODE_REF/INODE_EXTREF
4288  * @size:       the st_size of the INODE_ITEM
4289  * @ext_ref:    the EXTENDED_IREF feature
4290  *
4291  * Return 0 if no error occurred.
4292  */
4293 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4294                           struct extent_buffer *node, int slot, u64 *size,
4295                           unsigned int ext_ref)
4296 {
4297         struct btrfs_dir_item *di;
4298         struct btrfs_inode_item *ii;
4299         struct btrfs_path path;
4300         struct btrfs_key location;
4301         char namebuf[BTRFS_NAME_LEN] = {0};
4302         u32 total;
4303         u32 cur = 0;
4304         u32 len;
4305         u32 name_len;
4306         u32 data_len;
4307         u8 filetype;
4308         u32 mode;
4309         u64 index;
4310         int ret;
4311         int err = 0;
4312
4313         /*
4314          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4315          * ignore index check.
4316          */
4317         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4318
4319         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4320         total = btrfs_item_size_nr(node, slot);
4321
4322         while (cur < total) {
4323                 data_len = btrfs_dir_data_len(node, di);
4324                 if (data_len)
4325                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4326                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4327                               "DIR_ITEM" : "DIR_INDEX",
4328                               key->objectid, key->offset, data_len);
4329
4330                 name_len = btrfs_dir_name_len(node, di);
4331                 if (name_len <= BTRFS_NAME_LEN) {
4332                         len = name_len;
4333                 } else {
4334                         len = BTRFS_NAME_LEN;
4335                         warning("root %llu %s[%llu %llu] name too long",
4336                                 root->objectid,
4337                                 key->type == BTRFS_DIR_ITEM_KEY ?
4338                                 "DIR_ITEM" : "DIR_INDEX",
4339                                 key->objectid, key->offset);
4340                 }
4341                 (*size) += name_len;
4342
4343                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4344                 filetype = btrfs_dir_type(node, di);
4345
4346                 btrfs_init_path(&path);
4347                 btrfs_dir_item_key_to_cpu(node, di, &location);
4348
4349                 /* Ignore related ROOT_ITEM check */
4350                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4351                         goto next;
4352
4353                 /* Check relative INODE_ITEM(existence/filetype) */
4354                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4355                 if (ret) {
4356                         err |= INODE_ITEM_MISSING;
4357                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4358                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4359                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4360                               key->offset, location.objectid, name_len,
4361                               namebuf, filetype);
4362                         goto next;
4363                 }
4364
4365                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4366                                     struct btrfs_inode_item);
4367                 mode = btrfs_inode_mode(path.nodes[0], ii);
4368
4369                 if (imode_to_type(mode) != filetype) {
4370                         err |= INODE_ITEM_MISMATCH;
4371                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4372                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4373                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4374                               key->offset, name_len, namebuf, filetype);
4375                 }
4376
4377                 /* Check relative INODE_REF/INODE_EXTREF */
4378                 location.type = BTRFS_INODE_REF_KEY;
4379                 location.offset = key->objectid;
4380                 ret = find_inode_ref(root, &location, namebuf, len,
4381                                        index, ext_ref);
4382                 err |= ret;
4383                 if (ret & INODE_REF_MISSING)
4384                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4385                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4386                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4387                               key->offset, name_len, namebuf, filetype);
4388
4389 next:
4390                 btrfs_release_path(&path);
4391                 len = sizeof(*di) + name_len + data_len;
4392                 di = (struct btrfs_dir_item *)((char *)di + len);
4393                 cur += len;
4394
4395                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4396                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4397                               root->objectid, key->objectid, key->offset);
4398                         break;
4399                 }
4400         }
4401
4402         return err;
4403 }
4404
4405 /*
4406  * Check file extent datasum/hole, update the size of the file extents,
4407  * check and update the last offset of the file extent.
4408  *
4409  * @root:       the root of fs/file tree.
4410  * @fkey:       the key of the file extent.
4411  * @nodatasum:  INODE_NODATASUM feature.
4412  * @size:       the sum of all EXTENT_DATA items size for this inode.
4413  * @end:        the offset of the last extent.
4414  *
4415  * Return 0 if no error occurred.
4416  */
4417 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4418                              struct extent_buffer *node, int slot,
4419                              unsigned int nodatasum, u64 *size, u64 *end)
4420 {
4421         struct btrfs_file_extent_item *fi;
4422         u64 disk_bytenr;
4423         u64 disk_num_bytes;
4424         u64 extent_num_bytes;
4425         u64 found;
4426         unsigned int extent_type;
4427         unsigned int is_hole;
4428         int ret;
4429         int err = 0;
4430
4431         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4432
4433         extent_type = btrfs_file_extent_type(node, fi);
4434         /* Skip if file extent is inline */
4435         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4436                 struct btrfs_item *e = btrfs_item_nr(slot);
4437                 u32 item_inline_len;
4438
4439                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4440                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4441                 if (extent_num_bytes == 0 ||
4442                     extent_num_bytes != item_inline_len)
4443                         err |= FILE_EXTENT_ERROR;
4444                 *size += extent_num_bytes;
4445                 return err;
4446         }
4447
4448         /* Check extent type */
4449         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4450                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4451                 err |= FILE_EXTENT_ERROR;
4452                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4453                       root->objectid, fkey->objectid, fkey->offset);
4454                 return err;
4455         }
4456
4457         /* Check REG_EXTENT/PREALLOC_EXTENT */
4458         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4459         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4460         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4461         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4462
4463         /* Check EXTENT_DATA datasum */
4464         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4465         if (found > 0 && nodatasum) {
4466                 err |= ODD_CSUM_ITEM;
4467                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4468                       root->objectid, fkey->objectid, fkey->offset);
4469         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4470                    !is_hole &&
4471                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4472                 err |= CSUM_ITEM_MISSING;
4473                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4474                       root->objectid, fkey->objectid, fkey->offset);
4475         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4476                 err |= ODD_CSUM_ITEM;
4477                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4478                       root->objectid, fkey->objectid, fkey->offset);
4479         }
4480
4481         /* Check EXTENT_DATA hole */
4482         if (no_holes && is_hole) {
4483                 err |= FILE_EXTENT_ERROR;
4484                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4485                       root->objectid, fkey->objectid, fkey->offset);
4486         } else if (!no_holes && *end != fkey->offset) {
4487                 err |= FILE_EXTENT_ERROR;
4488                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4489                       root->objectid, fkey->objectid, fkey->offset);
4490         }
4491
4492         *end += extent_num_bytes;
4493         if (!is_hole)
4494                 *size += extent_num_bytes;
4495
4496         return err;
4497 }
4498
4499 /*
4500  * Check INODE_ITEM and related ITEMs (the same inode number)
4501  * 1. check link count
4502  * 2. check inode ref/extref
4503  * 3. check dir item/index
4504  *
4505  * @ext_ref:    the EXTENDED_IREF feature
4506  *
4507  * Return 0 if no error occurred.
4508  * Return >0 for error or hit the traversal is done(by error bitmap)
4509  */
4510 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4511                             unsigned int ext_ref)
4512 {
4513         struct extent_buffer *node;
4514         struct btrfs_inode_item *ii;
4515         struct btrfs_key key;
4516         u64 inode_id;
4517         u32 mode;
4518         u64 nlink;
4519         u64 nbytes;
4520         u64 isize;
4521         u64 size = 0;
4522         u64 refs = 0;
4523         u64 extent_end = 0;
4524         u64 extent_size = 0;
4525         unsigned int dir;
4526         unsigned int nodatasum;
4527         int slot;
4528         int ret;
4529         int err = 0;
4530
4531         node = path->nodes[0];
4532         slot = path->slots[0];
4533
4534         btrfs_item_key_to_cpu(node, &key, slot);
4535         inode_id = key.objectid;
4536
4537         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4538                 ret = btrfs_next_item(root, path);
4539                 if (ret > 0)
4540                         err |= LAST_ITEM;
4541                 return err;
4542         }
4543
4544         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4545         isize = btrfs_inode_size(node, ii);
4546         nbytes = btrfs_inode_nbytes(node, ii);
4547         mode = btrfs_inode_mode(node, ii);
4548         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4549         nlink = btrfs_inode_nlink(node, ii);
4550         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4551
4552         while (1) {
4553                 ret = btrfs_next_item(root, path);
4554                 if (ret < 0) {
4555                         /* out will fill 'err' rusing current statistics */
4556                         goto out;
4557                 } else if (ret > 0) {
4558                         err |= LAST_ITEM;
4559                         goto out;
4560                 }
4561
4562                 node = path->nodes[0];
4563                 slot = path->slots[0];
4564                 btrfs_item_key_to_cpu(node, &key, slot);
4565                 if (key.objectid != inode_id)
4566                         goto out;
4567
4568                 switch (key.type) {
4569                 case BTRFS_INODE_REF_KEY:
4570                         ret = check_inode_ref(root, &key, node, slot, &refs,
4571                                               mode);
4572                         err |= ret;
4573                         break;
4574                 case BTRFS_INODE_EXTREF_KEY:
4575                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4576                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4577                                         root->objectid, key.objectid,
4578                                         key.offset);
4579                         ret = check_inode_extref(root, &key, node, slot, &refs,
4580                                                  mode);
4581                         err |= ret;
4582                         break;
4583                 case BTRFS_DIR_ITEM_KEY:
4584                 case BTRFS_DIR_INDEX_KEY:
4585                         if (!dir) {
4586                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4587                                         root->objectid, inode_id,
4588                                         imode_to_type(mode), key.objectid,
4589                                         key.offset);
4590                         }
4591                         ret = check_dir_item(root, &key, node, slot, &size,
4592                                              ext_ref);
4593                         err |= ret;
4594                         break;
4595                 case BTRFS_EXTENT_DATA_KEY:
4596                         if (dir) {
4597                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4598                                         root->objectid, inode_id, key.objectid,
4599                                         key.offset);
4600                         }
4601                         ret = check_file_extent(root, &key, node, slot,
4602                                                 nodatasum, &extent_size,
4603                                                 &extent_end);
4604                         err |= ret;
4605                         break;
4606                 case BTRFS_XATTR_ITEM_KEY:
4607                         break;
4608                 default:
4609                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4610                               key.objectid, key.type, key.offset);
4611                 }
4612         }
4613
4614 out:
4615         /* verify INODE_ITEM nlink/isize/nbytes */
4616         if (dir) {
4617                 if (nlink != 1) {
4618                         err |= LINK_COUNT_ERROR;
4619                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4620                               root->objectid, inode_id, nlink);
4621                 }
4622
4623                 /*
4624                  * Just a warning, as dir inode nbytes is just an
4625                  * instructive value.
4626                  */
4627                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4628                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4629                                 root->objectid, inode_id, root->nodesize);
4630                 }
4631
4632                 if (isize != size) {
4633                         err |= ISIZE_ERROR;
4634                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4635                               root->objectid, inode_id, isize, size);
4636                 }
4637         } else {
4638                 if (nlink != refs) {
4639                         err |= LINK_COUNT_ERROR;
4640                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4641                               root->objectid, inode_id, nlink, refs);
4642                 } else if (!nlink) {
4643                         err |= ORPHAN_ITEM;
4644                 }
4645
4646                 if (!nbytes && !no_holes && extent_end < isize) {
4647                         err |= NBYTES_ERROR;
4648                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4649                               root->objectid, inode_id, isize);
4650                 }
4651
4652                 if (nbytes != extent_size) {
4653                         err |= NBYTES_ERROR;
4654                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4655                               root->objectid, inode_id, nbytes, extent_size);
4656                 }
4657         }
4658
4659         return err;
4660 }
4661
4662 /*
4663  * Iterate all item on the tree and call check_inode_item() to check.
4664  *
4665  * @root:       the root of the tree to be checked.
4666  * @ext_ref:    the EXTENDED_IREF feature
4667  *
4668  * Return 0 if no error found.
4669  * Return <0 for error.
4670  * All internal error bitmap will be converted to -EIO, to avoid
4671  * mixing negative and postive return value.
4672  */
4673 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4674 {
4675         struct btrfs_path *path;
4676         struct btrfs_key key;
4677         u64 inode_id;
4678         int ret, err = 0;
4679
4680         path = btrfs_alloc_path();
4681         if (!path)
4682                 return -ENOMEM;
4683
4684         key.objectid = 0;
4685         key.type = 0;
4686         key.offset = 0;
4687
4688         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4689         if (ret < 0)
4690                 goto out;
4691
4692         while (1) {
4693                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4694
4695                 /*
4696                  * All check must start with inode item, skip if not
4697                  */
4698                 if (key.type == BTRFS_INODE_ITEM_KEY) {
4699                         ret = check_inode_item(root, path, ext_ref);
4700                         err |= ret;
4701                         if (err & LAST_ITEM)
4702                                 goto out;
4703                         continue;
4704                 }
4705                 error("root %llu ITEM[%llu %u %llu] isn't INODE_ITEM, skip to next inode",
4706                       root->objectid, key.objectid, key.type,
4707                       key.offset);
4708
4709                 err |= NO_INODE_ITEM;
4710                 inode_id = key.objectid;
4711
4712                 /*
4713                  * skip to next inode
4714                  * TODO: Maybe search_slot() will be faster?
4715                  */
4716                 do {
4717                         ret = btrfs_next_item(root, path);
4718                         if (ret > 0) {
4719                                 goto out;
4720                         } else if (ret < 0) {
4721                                 err = ret;
4722                                 goto out;
4723                         }
4724                         btrfs_item_key_to_cpu(path->nodes[0], &key,
4725                                               path->slots[0]);
4726                 } while (inode_id == key.objectid);
4727         }
4728
4729 out:
4730         err &= ~LAST_ITEM;
4731         if (err && !ret)
4732                 ret = -EIO;
4733         btrfs_free_path(path);
4734         return ret;
4735 }
4736
4737 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4738 {
4739         struct list_head *cur = rec->backrefs.next;
4740         struct extent_backref *back;
4741         struct tree_backref *tback;
4742         struct data_backref *dback;
4743         u64 found = 0;
4744         int err = 0;
4745
4746         while(cur != &rec->backrefs) {
4747                 back = to_extent_backref(cur);
4748                 cur = cur->next;
4749                 if (!back->found_extent_tree) {
4750                         err = 1;
4751                         if (!print_errs)
4752                                 goto out;
4753                         if (back->is_data) {
4754                                 dback = to_data_backref(back);
4755                                 fprintf(stderr, "Backref %llu %s %llu"
4756                                         " owner %llu offset %llu num_refs %lu"
4757                                         " not found in extent tree\n",
4758                                         (unsigned long long)rec->start,
4759                                         back->full_backref ?
4760                                         "parent" : "root",
4761                                         back->full_backref ?
4762                                         (unsigned long long)dback->parent:
4763                                         (unsigned long long)dback->root,
4764                                         (unsigned long long)dback->owner,
4765                                         (unsigned long long)dback->offset,
4766                                         (unsigned long)dback->num_refs);
4767                         } else {
4768                                 tback = to_tree_backref(back);
4769                                 fprintf(stderr, "Backref %llu parent %llu"
4770                                         " root %llu not found in extent tree\n",
4771                                         (unsigned long long)rec->start,
4772                                         (unsigned long long)tback->parent,
4773                                         (unsigned long long)tback->root);
4774                         }
4775                 }
4776                 if (!back->is_data && !back->found_ref) {
4777                         err = 1;
4778                         if (!print_errs)
4779                                 goto out;
4780                         tback = to_tree_backref(back);
4781                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4782                                 (unsigned long long)rec->start,
4783                                 back->full_backref ? "parent" : "root",
4784                                 back->full_backref ?
4785                                 (unsigned long long)tback->parent :
4786                                 (unsigned long long)tback->root, back);
4787                 }
4788                 if (back->is_data) {
4789                         dback = to_data_backref(back);
4790                         if (dback->found_ref != dback->num_refs) {
4791                                 err = 1;
4792                                 if (!print_errs)
4793                                         goto out;
4794                                 fprintf(stderr, "Incorrect local backref count"
4795                                         " on %llu %s %llu owner %llu"
4796                                         " offset %llu found %u wanted %u back %p\n",
4797                                         (unsigned long long)rec->start,
4798                                         back->full_backref ?
4799                                         "parent" : "root",
4800                                         back->full_backref ?
4801                                         (unsigned long long)dback->parent:
4802                                         (unsigned long long)dback->root,
4803                                         (unsigned long long)dback->owner,
4804                                         (unsigned long long)dback->offset,
4805                                         dback->found_ref, dback->num_refs, back);
4806                         }
4807                         if (dback->disk_bytenr != rec->start) {
4808                                 err = 1;
4809                                 if (!print_errs)
4810                                         goto out;
4811                                 fprintf(stderr, "Backref disk bytenr does not"
4812                                         " match extent record, bytenr=%llu, "
4813                                         "ref bytenr=%llu\n",
4814                                         (unsigned long long)rec->start,
4815                                         (unsigned long long)dback->disk_bytenr);
4816                         }
4817
4818                         if (dback->bytes != rec->nr) {
4819                                 err = 1;
4820                                 if (!print_errs)
4821                                         goto out;
4822                                 fprintf(stderr, "Backref bytes do not match "
4823                                         "extent backref, bytenr=%llu, ref "
4824                                         "bytes=%llu, backref bytes=%llu\n",
4825                                         (unsigned long long)rec->start,
4826                                         (unsigned long long)rec->nr,
4827                                         (unsigned long long)dback->bytes);
4828                         }
4829                 }
4830                 if (!back->is_data) {
4831                         found += 1;
4832                 } else {
4833                         dback = to_data_backref(back);
4834                         found += dback->found_ref;
4835                 }
4836         }
4837         if (found != rec->refs) {
4838                 err = 1;
4839                 if (!print_errs)
4840                         goto out;
4841                 fprintf(stderr, "Incorrect global backref count "
4842                         "on %llu found %llu wanted %llu\n",
4843                         (unsigned long long)rec->start,
4844                         (unsigned long long)found,
4845                         (unsigned long long)rec->refs);
4846         }
4847 out:
4848         return err;
4849 }
4850
4851 static int free_all_extent_backrefs(struct extent_record *rec)
4852 {
4853         struct extent_backref *back;
4854         struct list_head *cur;
4855         while (!list_empty(&rec->backrefs)) {
4856                 cur = rec->backrefs.next;
4857                 back = to_extent_backref(cur);
4858                 list_del(cur);
4859                 free(back);
4860         }
4861         return 0;
4862 }
4863
4864 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4865                                      struct cache_tree *extent_cache)
4866 {
4867         struct cache_extent *cache;
4868         struct extent_record *rec;
4869
4870         while (1) {
4871                 cache = first_cache_extent(extent_cache);
4872                 if (!cache)
4873                         break;
4874                 rec = container_of(cache, struct extent_record, cache);
4875                 remove_cache_extent(extent_cache, cache);
4876                 free_all_extent_backrefs(rec);
4877                 free(rec);
4878         }
4879 }
4880
4881 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4882                                  struct extent_record *rec)
4883 {
4884         if (rec->content_checked && rec->owner_ref_checked &&
4885             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4886             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4887             !rec->bad_full_backref && !rec->crossing_stripes &&
4888             !rec->wrong_chunk_type) {
4889                 remove_cache_extent(extent_cache, &rec->cache);
4890                 free_all_extent_backrefs(rec);
4891                 list_del_init(&rec->list);
4892                 free(rec);
4893         }
4894         return 0;
4895 }
4896
4897 static int check_owner_ref(struct btrfs_root *root,
4898                             struct extent_record *rec,
4899                             struct extent_buffer *buf)
4900 {
4901         struct extent_backref *node;
4902         struct tree_backref *back;
4903         struct btrfs_root *ref_root;
4904         struct btrfs_key key;
4905         struct btrfs_path path;
4906         struct extent_buffer *parent;
4907         int level;
4908         int found = 0;
4909         int ret;
4910
4911         list_for_each_entry(node, &rec->backrefs, list) {
4912                 if (node->is_data)
4913                         continue;
4914                 if (!node->found_ref)
4915                         continue;
4916                 if (node->full_backref)
4917                         continue;
4918                 back = to_tree_backref(node);
4919                 if (btrfs_header_owner(buf) == back->root)
4920                         return 0;
4921         }
4922         BUG_ON(rec->is_root);
4923
4924         /* try to find the block by search corresponding fs tree */
4925         key.objectid = btrfs_header_owner(buf);
4926         key.type = BTRFS_ROOT_ITEM_KEY;
4927         key.offset = (u64)-1;
4928
4929         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4930         if (IS_ERR(ref_root))
4931                 return 1;
4932
4933         level = btrfs_header_level(buf);
4934         if (level == 0)
4935                 btrfs_item_key_to_cpu(buf, &key, 0);
4936         else
4937                 btrfs_node_key_to_cpu(buf, &key, 0);
4938
4939         btrfs_init_path(&path);
4940         path.lowest_level = level + 1;
4941         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4942         if (ret < 0)
4943                 return 0;
4944
4945         parent = path.nodes[level + 1];
4946         if (parent && buf->start == btrfs_node_blockptr(parent,
4947                                                         path.slots[level + 1]))
4948                 found = 1;
4949
4950         btrfs_release_path(&path);
4951         return found ? 0 : 1;
4952 }
4953
4954 static int is_extent_tree_record(struct extent_record *rec)
4955 {
4956         struct list_head *cur = rec->backrefs.next;
4957         struct extent_backref *node;
4958         struct tree_backref *back;
4959         int is_extent = 0;
4960
4961         while(cur != &rec->backrefs) {
4962                 node = to_extent_backref(cur);
4963                 cur = cur->next;
4964                 if (node->is_data)
4965                         return 0;
4966                 back = to_tree_backref(node);
4967                 if (node->full_backref)
4968                         return 0;
4969                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4970                         is_extent = 1;
4971         }
4972         return is_extent;
4973 }
4974
4975
4976 static int record_bad_block_io(struct btrfs_fs_info *info,
4977                                struct cache_tree *extent_cache,
4978                                u64 start, u64 len)
4979 {
4980         struct extent_record *rec;
4981         struct cache_extent *cache;
4982         struct btrfs_key key;
4983
4984         cache = lookup_cache_extent(extent_cache, start, len);
4985         if (!cache)
4986                 return 0;
4987
4988         rec = container_of(cache, struct extent_record, cache);
4989         if (!is_extent_tree_record(rec))
4990                 return 0;
4991
4992         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4993         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4994 }
4995
4996 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4997                        struct extent_buffer *buf, int slot)
4998 {
4999         if (btrfs_header_level(buf)) {
5000                 struct btrfs_key_ptr ptr1, ptr2;
5001
5002                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5003                                    sizeof(struct btrfs_key_ptr));
5004                 read_extent_buffer(buf, &ptr2,
5005                                    btrfs_node_key_ptr_offset(slot + 1),
5006                                    sizeof(struct btrfs_key_ptr));
5007                 write_extent_buffer(buf, &ptr1,
5008                                     btrfs_node_key_ptr_offset(slot + 1),
5009                                     sizeof(struct btrfs_key_ptr));
5010                 write_extent_buffer(buf, &ptr2,
5011                                     btrfs_node_key_ptr_offset(slot),
5012                                     sizeof(struct btrfs_key_ptr));
5013                 if (slot == 0) {
5014                         struct btrfs_disk_key key;
5015                         btrfs_node_key(buf, &key, 0);
5016                         btrfs_fixup_low_keys(root, path, &key,
5017                                              btrfs_header_level(buf) + 1);
5018                 }
5019         } else {
5020                 struct btrfs_item *item1, *item2;
5021                 struct btrfs_key k1, k2;
5022                 char *item1_data, *item2_data;
5023                 u32 item1_offset, item2_offset, item1_size, item2_size;
5024
5025                 item1 = btrfs_item_nr(slot);
5026                 item2 = btrfs_item_nr(slot + 1);
5027                 btrfs_item_key_to_cpu(buf, &k1, slot);
5028                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5029                 item1_offset = btrfs_item_offset(buf, item1);
5030                 item2_offset = btrfs_item_offset(buf, item2);
5031                 item1_size = btrfs_item_size(buf, item1);
5032                 item2_size = btrfs_item_size(buf, item2);
5033
5034                 item1_data = malloc(item1_size);
5035                 if (!item1_data)
5036                         return -ENOMEM;
5037                 item2_data = malloc(item2_size);
5038                 if (!item2_data) {
5039                         free(item1_data);
5040                         return -ENOMEM;
5041                 }
5042
5043                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5044                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5045
5046                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5047                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5048                 free(item1_data);
5049                 free(item2_data);
5050
5051                 btrfs_set_item_offset(buf, item1, item2_offset);
5052                 btrfs_set_item_offset(buf, item2, item1_offset);
5053                 btrfs_set_item_size(buf, item1, item2_size);
5054                 btrfs_set_item_size(buf, item2, item1_size);
5055
5056                 path->slots[0] = slot;
5057                 btrfs_set_item_key_unsafe(root, path, &k2);
5058                 path->slots[0] = slot + 1;
5059                 btrfs_set_item_key_unsafe(root, path, &k1);
5060         }
5061         return 0;
5062 }
5063
5064 static int fix_key_order(struct btrfs_trans_handle *trans,
5065                          struct btrfs_root *root,
5066                          struct btrfs_path *path)
5067 {
5068         struct extent_buffer *buf;
5069         struct btrfs_key k1, k2;
5070         int i;
5071         int level = path->lowest_level;
5072         int ret = -EIO;
5073
5074         buf = path->nodes[level];
5075         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5076                 if (level) {
5077                         btrfs_node_key_to_cpu(buf, &k1, i);
5078                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5079                 } else {
5080                         btrfs_item_key_to_cpu(buf, &k1, i);
5081                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5082                 }
5083                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5084                         continue;
5085                 ret = swap_values(root, path, buf, i);
5086                 if (ret)
5087                         break;
5088                 btrfs_mark_buffer_dirty(buf);
5089                 i = 0;
5090         }
5091         return ret;
5092 }
5093
5094 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5095                              struct btrfs_root *root,
5096                              struct btrfs_path *path,
5097                              struct extent_buffer *buf, int slot)
5098 {
5099         struct btrfs_key key;
5100         int nritems = btrfs_header_nritems(buf);
5101
5102         btrfs_item_key_to_cpu(buf, &key, slot);
5103
5104         /* These are all the keys we can deal with missing. */
5105         if (key.type != BTRFS_DIR_INDEX_KEY &&
5106             key.type != BTRFS_EXTENT_ITEM_KEY &&
5107             key.type != BTRFS_METADATA_ITEM_KEY &&
5108             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5109             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5110                 return -1;
5111
5112         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5113                (unsigned long long)key.objectid, key.type,
5114                (unsigned long long)key.offset, slot, buf->start);
5115         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5116                               btrfs_item_nr_offset(slot + 1),
5117                               sizeof(struct btrfs_item) *
5118                               (nritems - slot - 1));
5119         btrfs_set_header_nritems(buf, nritems - 1);
5120         if (slot == 0) {
5121                 struct btrfs_disk_key disk_key;
5122
5123                 btrfs_item_key(buf, &disk_key, 0);
5124                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5125         }
5126         btrfs_mark_buffer_dirty(buf);
5127         return 0;
5128 }
5129
5130 static int fix_item_offset(struct btrfs_trans_handle *trans,
5131                            struct btrfs_root *root,
5132                            struct btrfs_path *path)
5133 {
5134         struct extent_buffer *buf;
5135         int i;
5136         int ret = 0;
5137
5138         /* We should only get this for leaves */
5139         BUG_ON(path->lowest_level);
5140         buf = path->nodes[0];
5141 again:
5142         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5143                 unsigned int shift = 0, offset;
5144
5145                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5146                     BTRFS_LEAF_DATA_SIZE(root)) {
5147                         if (btrfs_item_end_nr(buf, i) >
5148                             BTRFS_LEAF_DATA_SIZE(root)) {
5149                                 ret = delete_bogus_item(trans, root, path,
5150                                                         buf, i);
5151                                 if (!ret)
5152                                         goto again;
5153                                 fprintf(stderr, "item is off the end of the "
5154                                         "leaf, can't fix\n");
5155                                 ret = -EIO;
5156                                 break;
5157                         }
5158                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5159                                 btrfs_item_end_nr(buf, i);
5160                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5161                            btrfs_item_offset_nr(buf, i - 1)) {
5162                         if (btrfs_item_end_nr(buf, i) >
5163                             btrfs_item_offset_nr(buf, i - 1)) {
5164                                 ret = delete_bogus_item(trans, root, path,
5165                                                         buf, i);
5166                                 if (!ret)
5167                                         goto again;
5168                                 fprintf(stderr, "items overlap, can't fix\n");
5169                                 ret = -EIO;
5170                                 break;
5171                         }
5172                         shift = btrfs_item_offset_nr(buf, i - 1) -
5173                                 btrfs_item_end_nr(buf, i);
5174                 }
5175                 if (!shift)
5176                         continue;
5177
5178                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5179                        i, shift, (unsigned long long)buf->start);
5180                 offset = btrfs_item_offset_nr(buf, i);
5181                 memmove_extent_buffer(buf,
5182                                       btrfs_leaf_data(buf) + offset + shift,
5183                                       btrfs_leaf_data(buf) + offset,
5184                                       btrfs_item_size_nr(buf, i));
5185                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5186                                       offset + shift);
5187                 btrfs_mark_buffer_dirty(buf);
5188         }
5189
5190         /*
5191          * We may have moved things, in which case we want to exit so we don't
5192          * write those changes out.  Once we have proper abort functionality in
5193          * progs this can be changed to something nicer.
5194          */
5195         BUG_ON(ret);
5196         return ret;
5197 }
5198
5199 /*
5200  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5201  * then just return -EIO.
5202  */
5203 static int try_to_fix_bad_block(struct btrfs_root *root,
5204                                 struct extent_buffer *buf,
5205                                 enum btrfs_tree_block_status status)
5206 {
5207         struct btrfs_trans_handle *trans;
5208         struct ulist *roots;
5209         struct ulist_node *node;
5210         struct btrfs_root *search_root;
5211         struct btrfs_path path;
5212         struct ulist_iterator iter;
5213         struct btrfs_key root_key, key;
5214         int ret;
5215
5216         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5217             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5218                 return -EIO;
5219
5220         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5221         if (ret)
5222                 return -EIO;
5223
5224         btrfs_init_path(&path);
5225         ULIST_ITER_INIT(&iter);
5226         while ((node = ulist_next(roots, &iter))) {
5227                 root_key.objectid = node->val;
5228                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5229                 root_key.offset = (u64)-1;
5230
5231                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5232                 if (IS_ERR(root)) {
5233                         ret = -EIO;
5234                         break;
5235                 }
5236
5237
5238                 trans = btrfs_start_transaction(search_root, 0);
5239                 if (IS_ERR(trans)) {
5240                         ret = PTR_ERR(trans);
5241                         break;
5242                 }
5243
5244                 path.lowest_level = btrfs_header_level(buf);
5245                 path.skip_check_block = 1;
5246                 if (path.lowest_level)
5247                         btrfs_node_key_to_cpu(buf, &key, 0);
5248                 else
5249                         btrfs_item_key_to_cpu(buf, &key, 0);
5250                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5251                 if (ret) {
5252                         ret = -EIO;
5253                         btrfs_commit_transaction(trans, search_root);
5254                         break;
5255                 }
5256                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5257                         ret = fix_key_order(trans, search_root, &path);
5258                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5259                         ret = fix_item_offset(trans, search_root, &path);
5260                 if (ret) {
5261                         btrfs_commit_transaction(trans, search_root);
5262                         break;
5263                 }
5264                 btrfs_release_path(&path);
5265                 btrfs_commit_transaction(trans, search_root);
5266         }
5267         ulist_free(roots);
5268         btrfs_release_path(&path);
5269         return ret;
5270 }
5271
5272 static int check_block(struct btrfs_root *root,
5273                        struct cache_tree *extent_cache,
5274                        struct extent_buffer *buf, u64 flags)
5275 {
5276         struct extent_record *rec;
5277         struct cache_extent *cache;
5278         struct btrfs_key key;
5279         enum btrfs_tree_block_status status;
5280         int ret = 0;
5281         int level;
5282
5283         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5284         if (!cache)
5285                 return 1;
5286         rec = container_of(cache, struct extent_record, cache);
5287         rec->generation = btrfs_header_generation(buf);
5288
5289         level = btrfs_header_level(buf);
5290         if (btrfs_header_nritems(buf) > 0) {
5291
5292                 if (level == 0)
5293                         btrfs_item_key_to_cpu(buf, &key, 0);
5294                 else
5295                         btrfs_node_key_to_cpu(buf, &key, 0);
5296
5297                 rec->info_objectid = key.objectid;
5298         }
5299         rec->info_level = level;
5300
5301         if (btrfs_is_leaf(buf))
5302                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5303         else
5304                 status = btrfs_check_node(root, &rec->parent_key, buf);
5305
5306         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5307                 if (repair)
5308                         status = try_to_fix_bad_block(root, buf, status);
5309                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5310                         ret = -EIO;
5311                         fprintf(stderr, "bad block %llu\n",
5312                                 (unsigned long long)buf->start);
5313                 } else {
5314                         /*
5315                          * Signal to callers we need to start the scan over
5316                          * again since we'll have cowed blocks.
5317                          */
5318                         ret = -EAGAIN;
5319                 }
5320         } else {
5321                 rec->content_checked = 1;
5322                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5323                         rec->owner_ref_checked = 1;
5324                 else {
5325                         ret = check_owner_ref(root, rec, buf);
5326                         if (!ret)
5327                                 rec->owner_ref_checked = 1;
5328                 }
5329         }
5330         if (!ret)
5331                 maybe_free_extent_rec(extent_cache, rec);
5332         return ret;
5333 }
5334
5335 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5336                                                 u64 parent, u64 root)
5337 {
5338         struct list_head *cur = rec->backrefs.next;
5339         struct extent_backref *node;
5340         struct tree_backref *back;
5341
5342         while(cur != &rec->backrefs) {
5343                 node = to_extent_backref(cur);
5344                 cur = cur->next;
5345                 if (node->is_data)
5346                         continue;
5347                 back = to_tree_backref(node);
5348                 if (parent > 0) {
5349                         if (!node->full_backref)
5350                                 continue;
5351                         if (parent == back->parent)
5352                                 return back;
5353                 } else {
5354                         if (node->full_backref)
5355                                 continue;
5356                         if (back->root == root)
5357                                 return back;
5358                 }
5359         }
5360         return NULL;
5361 }
5362
5363 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5364                                                 u64 parent, u64 root)
5365 {
5366         struct tree_backref *ref = malloc(sizeof(*ref));
5367
5368         if (!ref)
5369                 return NULL;
5370         memset(&ref->node, 0, sizeof(ref->node));
5371         if (parent > 0) {
5372                 ref->parent = parent;
5373                 ref->node.full_backref = 1;
5374         } else {
5375                 ref->root = root;
5376                 ref->node.full_backref = 0;
5377         }
5378         list_add_tail(&ref->node.list, &rec->backrefs);
5379
5380         return ref;
5381 }
5382
5383 static struct data_backref *find_data_backref(struct extent_record *rec,
5384                                                 u64 parent, u64 root,
5385                                                 u64 owner, u64 offset,
5386                                                 int found_ref,
5387                                                 u64 disk_bytenr, u64 bytes)
5388 {
5389         struct list_head *cur = rec->backrefs.next;
5390         struct extent_backref *node;
5391         struct data_backref *back;
5392
5393         while(cur != &rec->backrefs) {
5394                 node = to_extent_backref(cur);
5395                 cur = cur->next;
5396                 if (!node->is_data)
5397                         continue;
5398                 back = to_data_backref(node);
5399                 if (parent > 0) {
5400                         if (!node->full_backref)
5401                                 continue;
5402                         if (parent == back->parent)
5403                                 return back;
5404                 } else {
5405                         if (node->full_backref)
5406                                 continue;
5407                         if (back->root == root && back->owner == owner &&
5408                             back->offset == offset) {
5409                                 if (found_ref && node->found_ref &&
5410                                     (back->bytes != bytes ||
5411                                     back->disk_bytenr != disk_bytenr))
5412                                         continue;
5413                                 return back;
5414                         }
5415                 }
5416         }
5417         return NULL;
5418 }
5419
5420 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5421                                                 u64 parent, u64 root,
5422                                                 u64 owner, u64 offset,
5423                                                 u64 max_size)
5424 {
5425         struct data_backref *ref = malloc(sizeof(*ref));
5426
5427         if (!ref)
5428                 return NULL;
5429         memset(&ref->node, 0, sizeof(ref->node));
5430         ref->node.is_data = 1;
5431
5432         if (parent > 0) {
5433                 ref->parent = parent;
5434                 ref->owner = 0;
5435                 ref->offset = 0;
5436                 ref->node.full_backref = 1;
5437         } else {
5438                 ref->root = root;
5439                 ref->owner = owner;
5440                 ref->offset = offset;
5441                 ref->node.full_backref = 0;
5442         }
5443         ref->bytes = max_size;
5444         ref->found_ref = 0;
5445         ref->num_refs = 0;
5446         list_add_tail(&ref->node.list, &rec->backrefs);
5447         if (max_size > rec->max_size)
5448                 rec->max_size = max_size;
5449         return ref;
5450 }
5451
5452 /* Check if the type of extent matches with its chunk */
5453 static void check_extent_type(struct extent_record *rec)
5454 {
5455         struct btrfs_block_group_cache *bg_cache;
5456
5457         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5458         if (!bg_cache)
5459                 return;
5460
5461         /* data extent, check chunk directly*/
5462         if (!rec->metadata) {
5463                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5464                         rec->wrong_chunk_type = 1;
5465                 return;
5466         }
5467
5468         /* metadata extent, check the obvious case first */
5469         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5470                                  BTRFS_BLOCK_GROUP_METADATA))) {
5471                 rec->wrong_chunk_type = 1;
5472                 return;
5473         }
5474
5475         /*
5476          * Check SYSTEM extent, as it's also marked as metadata, we can only
5477          * make sure it's a SYSTEM extent by its backref
5478          */
5479         if (!list_empty(&rec->backrefs)) {
5480                 struct extent_backref *node;
5481                 struct tree_backref *tback;
5482                 u64 bg_type;
5483
5484                 node = to_extent_backref(rec->backrefs.next);
5485                 if (node->is_data) {
5486                         /* tree block shouldn't have data backref */
5487                         rec->wrong_chunk_type = 1;
5488                         return;
5489                 }
5490                 tback = container_of(node, struct tree_backref, node);
5491
5492                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5493                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5494                 else
5495                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5496                 if (!(bg_cache->flags & bg_type))
5497                         rec->wrong_chunk_type = 1;
5498         }
5499 }
5500
5501 /*
5502  * Allocate a new extent record, fill default values from @tmpl and insert int
5503  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5504  * the cache, otherwise it fails.
5505  */
5506 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5507                 struct extent_record *tmpl)
5508 {
5509         struct extent_record *rec;
5510         int ret = 0;
5511
5512         rec = malloc(sizeof(*rec));
5513         if (!rec)
5514                 return -ENOMEM;
5515         rec->start = tmpl->start;
5516         rec->max_size = tmpl->max_size;
5517         rec->nr = max(tmpl->nr, tmpl->max_size);
5518         rec->found_rec = tmpl->found_rec;
5519         rec->content_checked = tmpl->content_checked;
5520         rec->owner_ref_checked = tmpl->owner_ref_checked;
5521         rec->num_duplicates = 0;
5522         rec->metadata = tmpl->metadata;
5523         rec->flag_block_full_backref = FLAG_UNSET;
5524         rec->bad_full_backref = 0;
5525         rec->crossing_stripes = 0;
5526         rec->wrong_chunk_type = 0;
5527         rec->is_root = tmpl->is_root;
5528         rec->refs = tmpl->refs;
5529         rec->extent_item_refs = tmpl->extent_item_refs;
5530         rec->parent_generation = tmpl->parent_generation;
5531         INIT_LIST_HEAD(&rec->backrefs);
5532         INIT_LIST_HEAD(&rec->dups);
5533         INIT_LIST_HEAD(&rec->list);
5534         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5535         rec->cache.start = tmpl->start;
5536         rec->cache.size = tmpl->nr;
5537         ret = insert_cache_extent(extent_cache, &rec->cache);
5538         if (ret) {
5539                 free(rec);
5540                 return ret;
5541         }
5542         bytes_used += rec->nr;
5543
5544         if (tmpl->metadata)
5545                 rec->crossing_stripes = check_crossing_stripes(global_info,
5546                                 rec->start, global_info->tree_root->nodesize);
5547         check_extent_type(rec);
5548         return ret;
5549 }
5550
5551 /*
5552  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5553  * some are hints:
5554  * - refs              - if found, increase refs
5555  * - is_root           - if found, set
5556  * - content_checked   - if found, set
5557  * - owner_ref_checked - if found, set
5558  *
5559  * If not found, create a new one, initialize and insert.
5560  */
5561 static int add_extent_rec(struct cache_tree *extent_cache,
5562                 struct extent_record *tmpl)
5563 {
5564         struct extent_record *rec;
5565         struct cache_extent *cache;
5566         int ret = 0;
5567         int dup = 0;
5568
5569         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5570         if (cache) {
5571                 rec = container_of(cache, struct extent_record, cache);
5572                 if (tmpl->refs)
5573                         rec->refs++;
5574                 if (rec->nr == 1)
5575                         rec->nr = max(tmpl->nr, tmpl->max_size);
5576
5577                 /*
5578                  * We need to make sure to reset nr to whatever the extent
5579                  * record says was the real size, this way we can compare it to
5580                  * the backrefs.
5581                  */
5582                 if (tmpl->found_rec) {
5583                         if (tmpl->start != rec->start || rec->found_rec) {
5584                                 struct extent_record *tmp;
5585
5586                                 dup = 1;
5587                                 if (list_empty(&rec->list))
5588                                         list_add_tail(&rec->list,
5589                                                       &duplicate_extents);
5590
5591                                 /*
5592                                  * We have to do this song and dance in case we
5593                                  * find an extent record that falls inside of
5594                                  * our current extent record but does not have
5595                                  * the same objectid.
5596                                  */
5597                                 tmp = malloc(sizeof(*tmp));
5598                                 if (!tmp)
5599                                         return -ENOMEM;
5600                                 tmp->start = tmpl->start;
5601                                 tmp->max_size = tmpl->max_size;
5602                                 tmp->nr = tmpl->nr;
5603                                 tmp->found_rec = 1;
5604                                 tmp->metadata = tmpl->metadata;
5605                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5606                                 INIT_LIST_HEAD(&tmp->list);
5607                                 list_add_tail(&tmp->list, &rec->dups);
5608                                 rec->num_duplicates++;
5609                         } else {
5610                                 rec->nr = tmpl->nr;
5611                                 rec->found_rec = 1;
5612                         }
5613                 }
5614
5615                 if (tmpl->extent_item_refs && !dup) {
5616                         if (rec->extent_item_refs) {
5617                                 fprintf(stderr, "block %llu rec "
5618                                         "extent_item_refs %llu, passed %llu\n",
5619                                         (unsigned long long)tmpl->start,
5620                                         (unsigned long long)
5621                                                         rec->extent_item_refs,
5622                                         (unsigned long long)tmpl->extent_item_refs);
5623                         }
5624                         rec->extent_item_refs = tmpl->extent_item_refs;
5625                 }
5626                 if (tmpl->is_root)
5627                         rec->is_root = 1;
5628                 if (tmpl->content_checked)
5629                         rec->content_checked = 1;
5630                 if (tmpl->owner_ref_checked)
5631                         rec->owner_ref_checked = 1;
5632                 memcpy(&rec->parent_key, &tmpl->parent_key,
5633                                 sizeof(tmpl->parent_key));
5634                 if (tmpl->parent_generation)
5635                         rec->parent_generation = tmpl->parent_generation;
5636                 if (rec->max_size < tmpl->max_size)
5637                         rec->max_size = tmpl->max_size;
5638
5639                 /*
5640                  * A metadata extent can't cross stripe_len boundary, otherwise
5641                  * kernel scrub won't be able to handle it.
5642                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5643                  * it.
5644                  */
5645                 if (tmpl->metadata)
5646                         rec->crossing_stripes = check_crossing_stripes(
5647                                         global_info, rec->start,
5648                                         global_info->tree_root->nodesize);
5649                 check_extent_type(rec);
5650                 maybe_free_extent_rec(extent_cache, rec);
5651                 return ret;
5652         }
5653
5654         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5655
5656         return ret;
5657 }
5658
5659 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5660                             u64 parent, u64 root, int found_ref)
5661 {
5662         struct extent_record *rec;
5663         struct tree_backref *back;
5664         struct cache_extent *cache;
5665         int ret;
5666
5667         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5668         if (!cache) {
5669                 struct extent_record tmpl;
5670
5671                 memset(&tmpl, 0, sizeof(tmpl));
5672                 tmpl.start = bytenr;
5673                 tmpl.nr = 1;
5674                 tmpl.metadata = 1;
5675
5676                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5677                 if (ret)
5678                         return ret;
5679
5680                 /* really a bug in cache_extent implement now */
5681                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5682                 if (!cache)
5683                         return -ENOENT;
5684         }
5685
5686         rec = container_of(cache, struct extent_record, cache);
5687         if (rec->start != bytenr) {
5688                 /*
5689                  * Several cause, from unaligned bytenr to over lapping extents
5690                  */
5691                 return -EEXIST;
5692         }
5693
5694         back = find_tree_backref(rec, parent, root);
5695         if (!back) {
5696                 back = alloc_tree_backref(rec, parent, root);
5697                 if (!back)
5698                         return -ENOMEM;
5699         }
5700
5701         if (found_ref) {
5702                 if (back->node.found_ref) {
5703                         fprintf(stderr, "Extent back ref already exists "
5704                                 "for %llu parent %llu root %llu \n",
5705                                 (unsigned long long)bytenr,
5706                                 (unsigned long long)parent,
5707                                 (unsigned long long)root);
5708                 }
5709                 back->node.found_ref = 1;
5710         } else {
5711                 if (back->node.found_extent_tree) {
5712                         fprintf(stderr, "Extent back ref already exists "
5713                                 "for %llu parent %llu root %llu \n",
5714                                 (unsigned long long)bytenr,
5715                                 (unsigned long long)parent,
5716                                 (unsigned long long)root);
5717                 }
5718                 back->node.found_extent_tree = 1;
5719         }
5720         check_extent_type(rec);
5721         maybe_free_extent_rec(extent_cache, rec);
5722         return 0;
5723 }
5724
5725 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5726                             u64 parent, u64 root, u64 owner, u64 offset,
5727                             u32 num_refs, int found_ref, u64 max_size)
5728 {
5729         struct extent_record *rec;
5730         struct data_backref *back;
5731         struct cache_extent *cache;
5732         int ret;
5733
5734         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5735         if (!cache) {
5736                 struct extent_record tmpl;
5737
5738                 memset(&tmpl, 0, sizeof(tmpl));
5739                 tmpl.start = bytenr;
5740                 tmpl.nr = 1;
5741                 tmpl.max_size = max_size;
5742
5743                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5744                 if (ret)
5745                         return ret;
5746
5747                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5748                 if (!cache)
5749                         abort();
5750         }
5751
5752         rec = container_of(cache, struct extent_record, cache);
5753         if (rec->max_size < max_size)
5754                 rec->max_size = max_size;
5755
5756         /*
5757          * If found_ref is set then max_size is the real size and must match the
5758          * existing refs.  So if we have already found a ref then we need to
5759          * make sure that this ref matches the existing one, otherwise we need
5760          * to add a new backref so we can notice that the backrefs don't match
5761          * and we need to figure out who is telling the truth.  This is to
5762          * account for that awful fsync bug I introduced where we'd end up with
5763          * a btrfs_file_extent_item that would have its length include multiple
5764          * prealloc extents or point inside of a prealloc extent.
5765          */
5766         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5767                                  bytenr, max_size);
5768         if (!back) {
5769                 back = alloc_data_backref(rec, parent, root, owner, offset,
5770                                           max_size);
5771                 BUG_ON(!back);
5772         }
5773
5774         if (found_ref) {
5775                 BUG_ON(num_refs != 1);
5776                 if (back->node.found_ref)
5777                         BUG_ON(back->bytes != max_size);
5778                 back->node.found_ref = 1;
5779                 back->found_ref += 1;
5780                 back->bytes = max_size;
5781                 back->disk_bytenr = bytenr;
5782                 rec->refs += 1;
5783                 rec->content_checked = 1;
5784                 rec->owner_ref_checked = 1;
5785         } else {
5786                 if (back->node.found_extent_tree) {
5787                         fprintf(stderr, "Extent back ref already exists "
5788                                 "for %llu parent %llu root %llu "
5789                                 "owner %llu offset %llu num_refs %lu\n",
5790                                 (unsigned long long)bytenr,
5791                                 (unsigned long long)parent,
5792                                 (unsigned long long)root,
5793                                 (unsigned long long)owner,
5794                                 (unsigned long long)offset,
5795                                 (unsigned long)num_refs);
5796                 }
5797                 back->num_refs = num_refs;
5798                 back->node.found_extent_tree = 1;
5799         }
5800         maybe_free_extent_rec(extent_cache, rec);
5801         return 0;
5802 }
5803
5804 static int add_pending(struct cache_tree *pending,
5805                        struct cache_tree *seen, u64 bytenr, u32 size)
5806 {
5807         int ret;
5808         ret = add_cache_extent(seen, bytenr, size);
5809         if (ret)
5810                 return ret;
5811         add_cache_extent(pending, bytenr, size);
5812         return 0;
5813 }
5814
5815 static int pick_next_pending(struct cache_tree *pending,
5816                         struct cache_tree *reada,
5817                         struct cache_tree *nodes,
5818                         u64 last, struct block_info *bits, int bits_nr,
5819                         int *reada_bits)
5820 {
5821         unsigned long node_start = last;
5822         struct cache_extent *cache;
5823         int ret;
5824
5825         cache = search_cache_extent(reada, 0);
5826         if (cache) {
5827                 bits[0].start = cache->start;
5828                 bits[0].size = cache->size;
5829                 *reada_bits = 1;
5830                 return 1;
5831         }
5832         *reada_bits = 0;
5833         if (node_start > 32768)
5834                 node_start -= 32768;
5835
5836         cache = search_cache_extent(nodes, node_start);
5837         if (!cache)
5838                 cache = search_cache_extent(nodes, 0);
5839
5840         if (!cache) {
5841                  cache = search_cache_extent(pending, 0);
5842                  if (!cache)
5843                          return 0;
5844                  ret = 0;
5845                  do {
5846                          bits[ret].start = cache->start;
5847                          bits[ret].size = cache->size;
5848                          cache = next_cache_extent(cache);
5849                          ret++;
5850                  } while (cache && ret < bits_nr);
5851                  return ret;
5852         }
5853
5854         ret = 0;
5855         do {
5856                 bits[ret].start = cache->start;
5857                 bits[ret].size = cache->size;
5858                 cache = next_cache_extent(cache);
5859                 ret++;
5860         } while (cache && ret < bits_nr);
5861
5862         if (bits_nr - ret > 8) {
5863                 u64 lookup = bits[0].start + bits[0].size;
5864                 struct cache_extent *next;
5865                 next = search_cache_extent(pending, lookup);
5866                 while(next) {
5867                         if (next->start - lookup > 32768)
5868                                 break;
5869                         bits[ret].start = next->start;
5870                         bits[ret].size = next->size;
5871                         lookup = next->start + next->size;
5872                         ret++;
5873                         if (ret == bits_nr)
5874                                 break;
5875                         next = next_cache_extent(next);
5876                         if (!next)
5877                                 break;
5878                 }
5879         }
5880         return ret;
5881 }
5882
5883 static void free_chunk_record(struct cache_extent *cache)
5884 {
5885         struct chunk_record *rec;
5886
5887         rec = container_of(cache, struct chunk_record, cache);
5888         list_del_init(&rec->list);
5889         list_del_init(&rec->dextents);
5890         free(rec);
5891 }
5892
5893 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5894 {
5895         cache_tree_free_extents(chunk_cache, free_chunk_record);
5896 }
5897
5898 static void free_device_record(struct rb_node *node)
5899 {
5900         struct device_record *rec;
5901
5902         rec = container_of(node, struct device_record, node);
5903         free(rec);
5904 }
5905
5906 FREE_RB_BASED_TREE(device_cache, free_device_record);
5907
5908 int insert_block_group_record(struct block_group_tree *tree,
5909                               struct block_group_record *bg_rec)
5910 {
5911         int ret;
5912
5913         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5914         if (ret)
5915                 return ret;
5916
5917         list_add_tail(&bg_rec->list, &tree->block_groups);
5918         return 0;
5919 }
5920
5921 static void free_block_group_record(struct cache_extent *cache)
5922 {
5923         struct block_group_record *rec;
5924
5925         rec = container_of(cache, struct block_group_record, cache);
5926         list_del_init(&rec->list);
5927         free(rec);
5928 }
5929
5930 void free_block_group_tree(struct block_group_tree *tree)
5931 {
5932         cache_tree_free_extents(&tree->tree, free_block_group_record);
5933 }
5934
5935 int insert_device_extent_record(struct device_extent_tree *tree,
5936                                 struct device_extent_record *de_rec)
5937 {
5938         int ret;
5939
5940         /*
5941          * Device extent is a bit different from the other extents, because
5942          * the extents which belong to the different devices may have the
5943          * same start and size, so we need use the special extent cache
5944          * search/insert functions.
5945          */
5946         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5947         if (ret)
5948                 return ret;
5949
5950         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5951         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5952         return 0;
5953 }
5954
5955 static void free_device_extent_record(struct cache_extent *cache)
5956 {
5957         struct device_extent_record *rec;
5958
5959         rec = container_of(cache, struct device_extent_record, cache);
5960         if (!list_empty(&rec->chunk_list))
5961                 list_del_init(&rec->chunk_list);
5962         if (!list_empty(&rec->device_list))
5963                 list_del_init(&rec->device_list);
5964         free(rec);
5965 }
5966
5967 void free_device_extent_tree(struct device_extent_tree *tree)
5968 {
5969         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5970 }
5971
5972 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5973 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5974                                  struct extent_buffer *leaf, int slot)
5975 {
5976         struct btrfs_extent_ref_v0 *ref0;
5977         struct btrfs_key key;
5978         int ret;
5979
5980         btrfs_item_key_to_cpu(leaf, &key, slot);
5981         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5982         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5983                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5984                                 0, 0);
5985         } else {
5986                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5987                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5988         }
5989         return ret;
5990 }
5991 #endif
5992
5993 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5994                                             struct btrfs_key *key,
5995                                             int slot)
5996 {
5997         struct btrfs_chunk *ptr;
5998         struct chunk_record *rec;
5999         int num_stripes, i;
6000
6001         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6002         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6003
6004         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6005         if (!rec) {
6006                 fprintf(stderr, "memory allocation failed\n");
6007                 exit(-1);
6008         }
6009
6010         INIT_LIST_HEAD(&rec->list);
6011         INIT_LIST_HEAD(&rec->dextents);
6012         rec->bg_rec = NULL;
6013
6014         rec->cache.start = key->offset;
6015         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6016
6017         rec->generation = btrfs_header_generation(leaf);
6018
6019         rec->objectid = key->objectid;
6020         rec->type = key->type;
6021         rec->offset = key->offset;
6022
6023         rec->length = rec->cache.size;
6024         rec->owner = btrfs_chunk_owner(leaf, ptr);
6025         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6026         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6027         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6028         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6029         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6030         rec->num_stripes = num_stripes;
6031         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6032
6033         for (i = 0; i < rec->num_stripes; ++i) {
6034                 rec->stripes[i].devid =
6035                         btrfs_stripe_devid_nr(leaf, ptr, i);
6036                 rec->stripes[i].offset =
6037                         btrfs_stripe_offset_nr(leaf, ptr, i);
6038                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6039                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6040                                 BTRFS_UUID_SIZE);
6041         }
6042
6043         return rec;
6044 }
6045
6046 static int process_chunk_item(struct cache_tree *chunk_cache,
6047                               struct btrfs_key *key, struct extent_buffer *eb,
6048                               int slot)
6049 {
6050         struct chunk_record *rec;
6051         struct btrfs_chunk *chunk;
6052         int ret = 0;
6053
6054         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6055         /*
6056          * Do extra check for this chunk item,
6057          *
6058          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6059          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6060          * and owner<->key_type check.
6061          */
6062         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6063                                       key->offset);
6064         if (ret < 0) {
6065                 error("chunk(%llu, %llu) is not valid, ignore it",
6066                       key->offset, btrfs_chunk_length(eb, chunk));
6067                 return 0;
6068         }
6069         rec = btrfs_new_chunk_record(eb, key, slot);
6070         ret = insert_cache_extent(chunk_cache, &rec->cache);
6071         if (ret) {
6072                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6073                         rec->offset, rec->length);
6074                 free(rec);
6075         }
6076
6077         return ret;
6078 }
6079
6080 static int process_device_item(struct rb_root *dev_cache,
6081                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6082 {
6083         struct btrfs_dev_item *ptr;
6084         struct device_record *rec;
6085         int ret = 0;
6086
6087         ptr = btrfs_item_ptr(eb,
6088                 slot, struct btrfs_dev_item);
6089
6090         rec = malloc(sizeof(*rec));
6091         if (!rec) {
6092                 fprintf(stderr, "memory allocation failed\n");
6093                 return -ENOMEM;
6094         }
6095
6096         rec->devid = key->offset;
6097         rec->generation = btrfs_header_generation(eb);
6098
6099         rec->objectid = key->objectid;
6100         rec->type = key->type;
6101         rec->offset = key->offset;
6102
6103         rec->devid = btrfs_device_id(eb, ptr);
6104         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6105         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6106
6107         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6108         if (ret) {
6109                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6110                 free(rec);
6111         }
6112
6113         return ret;
6114 }
6115
6116 struct block_group_record *
6117 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6118                              int slot)
6119 {
6120         struct btrfs_block_group_item *ptr;
6121         struct block_group_record *rec;
6122
6123         rec = calloc(1, sizeof(*rec));
6124         if (!rec) {
6125                 fprintf(stderr, "memory allocation failed\n");
6126                 exit(-1);
6127         }
6128
6129         rec->cache.start = key->objectid;
6130         rec->cache.size = key->offset;
6131
6132         rec->generation = btrfs_header_generation(leaf);
6133
6134         rec->objectid = key->objectid;
6135         rec->type = key->type;
6136         rec->offset = key->offset;
6137
6138         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6139         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6140
6141         INIT_LIST_HEAD(&rec->list);
6142
6143         return rec;
6144 }
6145
6146 static int process_block_group_item(struct block_group_tree *block_group_cache,
6147                                     struct btrfs_key *key,
6148                                     struct extent_buffer *eb, int slot)
6149 {
6150         struct block_group_record *rec;
6151         int ret = 0;
6152
6153         rec = btrfs_new_block_group_record(eb, key, slot);
6154         ret = insert_block_group_record(block_group_cache, rec);
6155         if (ret) {
6156                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6157                         rec->objectid, rec->offset);
6158                 free(rec);
6159         }
6160
6161         return ret;
6162 }
6163
6164 struct device_extent_record *
6165 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6166                                struct btrfs_key *key, int slot)
6167 {
6168         struct device_extent_record *rec;
6169         struct btrfs_dev_extent *ptr;
6170
6171         rec = calloc(1, sizeof(*rec));
6172         if (!rec) {
6173                 fprintf(stderr, "memory allocation failed\n");
6174                 exit(-1);
6175         }
6176
6177         rec->cache.objectid = key->objectid;
6178         rec->cache.start = key->offset;
6179
6180         rec->generation = btrfs_header_generation(leaf);
6181
6182         rec->objectid = key->objectid;
6183         rec->type = key->type;
6184         rec->offset = key->offset;
6185
6186         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6187         rec->chunk_objecteid =
6188                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6189         rec->chunk_offset =
6190                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6191         rec->length = btrfs_dev_extent_length(leaf, ptr);
6192         rec->cache.size = rec->length;
6193
6194         INIT_LIST_HEAD(&rec->chunk_list);
6195         INIT_LIST_HEAD(&rec->device_list);
6196
6197         return rec;
6198 }
6199
6200 static int
6201 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6202                            struct btrfs_key *key, struct extent_buffer *eb,
6203                            int slot)
6204 {
6205         struct device_extent_record *rec;
6206         int ret;
6207
6208         rec = btrfs_new_device_extent_record(eb, key, slot);
6209         ret = insert_device_extent_record(dev_extent_cache, rec);
6210         if (ret) {
6211                 fprintf(stderr,
6212                         "Device extent[%llu, %llu, %llu] existed.\n",
6213                         rec->objectid, rec->offset, rec->length);
6214                 free(rec);
6215         }
6216
6217         return ret;
6218 }
6219
6220 static int process_extent_item(struct btrfs_root *root,
6221                                struct cache_tree *extent_cache,
6222                                struct extent_buffer *eb, int slot)
6223 {
6224         struct btrfs_extent_item *ei;
6225         struct btrfs_extent_inline_ref *iref;
6226         struct btrfs_extent_data_ref *dref;
6227         struct btrfs_shared_data_ref *sref;
6228         struct btrfs_key key;
6229         struct extent_record tmpl;
6230         unsigned long end;
6231         unsigned long ptr;
6232         int ret;
6233         int type;
6234         u32 item_size = btrfs_item_size_nr(eb, slot);
6235         u64 refs = 0;
6236         u64 offset;
6237         u64 num_bytes;
6238         int metadata = 0;
6239
6240         btrfs_item_key_to_cpu(eb, &key, slot);
6241
6242         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6243                 metadata = 1;
6244                 num_bytes = root->nodesize;
6245         } else {
6246                 num_bytes = key.offset;
6247         }
6248
6249         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6250                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6251                       key.objectid, root->sectorsize);
6252                 return -EIO;
6253         }
6254         if (item_size < sizeof(*ei)) {
6255 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6256                 struct btrfs_extent_item_v0 *ei0;
6257                 BUG_ON(item_size != sizeof(*ei0));
6258                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6259                 refs = btrfs_extent_refs_v0(eb, ei0);
6260 #else
6261                 BUG();
6262 #endif
6263                 memset(&tmpl, 0, sizeof(tmpl));
6264                 tmpl.start = key.objectid;
6265                 tmpl.nr = num_bytes;
6266                 tmpl.extent_item_refs = refs;
6267                 tmpl.metadata = metadata;
6268                 tmpl.found_rec = 1;
6269                 tmpl.max_size = num_bytes;
6270
6271                 return add_extent_rec(extent_cache, &tmpl);
6272         }
6273
6274         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6275         refs = btrfs_extent_refs(eb, ei);
6276         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6277                 metadata = 1;
6278         else
6279                 metadata = 0;
6280         if (metadata && num_bytes != root->nodesize) {
6281                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6282                       num_bytes, root->nodesize);
6283                 return -EIO;
6284         }
6285         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6286                 error("ignore invalid data extent, length %llu is not aligned to %u",
6287                       num_bytes, root->sectorsize);
6288                 return -EIO;
6289         }
6290
6291         memset(&tmpl, 0, sizeof(tmpl));
6292         tmpl.start = key.objectid;
6293         tmpl.nr = num_bytes;
6294         tmpl.extent_item_refs = refs;
6295         tmpl.metadata = metadata;
6296         tmpl.found_rec = 1;
6297         tmpl.max_size = num_bytes;
6298         add_extent_rec(extent_cache, &tmpl);
6299
6300         ptr = (unsigned long)(ei + 1);
6301         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6302             key.type == BTRFS_EXTENT_ITEM_KEY)
6303                 ptr += sizeof(struct btrfs_tree_block_info);
6304
6305         end = (unsigned long)ei + item_size;
6306         while (ptr < end) {
6307                 iref = (struct btrfs_extent_inline_ref *)ptr;
6308                 type = btrfs_extent_inline_ref_type(eb, iref);
6309                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6310                 switch (type) {
6311                 case BTRFS_TREE_BLOCK_REF_KEY:
6312                         ret = add_tree_backref(extent_cache, key.objectid,
6313                                         0, offset, 0);
6314                         if (ret < 0)
6315                                 error("add_tree_backref failed: %s",
6316                                       strerror(-ret));
6317                         break;
6318                 case BTRFS_SHARED_BLOCK_REF_KEY:
6319                         ret = add_tree_backref(extent_cache, key.objectid,
6320                                         offset, 0, 0);
6321                         if (ret < 0)
6322                                 error("add_tree_backref failed: %s",
6323                                       strerror(-ret));
6324                         break;
6325                 case BTRFS_EXTENT_DATA_REF_KEY:
6326                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6327                         add_data_backref(extent_cache, key.objectid, 0,
6328                                         btrfs_extent_data_ref_root(eb, dref),
6329                                         btrfs_extent_data_ref_objectid(eb,
6330                                                                        dref),
6331                                         btrfs_extent_data_ref_offset(eb, dref),
6332                                         btrfs_extent_data_ref_count(eb, dref),
6333                                         0, num_bytes);
6334                         break;
6335                 case BTRFS_SHARED_DATA_REF_KEY:
6336                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6337                         add_data_backref(extent_cache, key.objectid, offset,
6338                                         0, 0, 0,
6339                                         btrfs_shared_data_ref_count(eb, sref),
6340                                         0, num_bytes);
6341                         break;
6342                 default:
6343                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6344                                 key.objectid, key.type, num_bytes);
6345                         goto out;
6346                 }
6347                 ptr += btrfs_extent_inline_ref_size(type);
6348         }
6349         WARN_ON(ptr > end);
6350 out:
6351         return 0;
6352 }
6353
6354 static int check_cache_range(struct btrfs_root *root,
6355                              struct btrfs_block_group_cache *cache,
6356                              u64 offset, u64 bytes)
6357 {
6358         struct btrfs_free_space *entry;
6359         u64 *logical;
6360         u64 bytenr;
6361         int stripe_len;
6362         int i, nr, ret;
6363
6364         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6365                 bytenr = btrfs_sb_offset(i);
6366                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6367                                        cache->key.objectid, bytenr, 0,
6368                                        &logical, &nr, &stripe_len);
6369                 if (ret)
6370                         return ret;
6371
6372                 while (nr--) {
6373                         if (logical[nr] + stripe_len <= offset)
6374                                 continue;
6375                         if (offset + bytes <= logical[nr])
6376                                 continue;
6377                         if (logical[nr] == offset) {
6378                                 if (stripe_len >= bytes) {
6379                                         free(logical);
6380                                         return 0;
6381                                 }
6382                                 bytes -= stripe_len;
6383                                 offset += stripe_len;
6384                         } else if (logical[nr] < offset) {
6385                                 if (logical[nr] + stripe_len >=
6386                                     offset + bytes) {
6387                                         free(logical);
6388                                         return 0;
6389                                 }
6390                                 bytes = (offset + bytes) -
6391                                         (logical[nr] + stripe_len);
6392                                 offset = logical[nr] + stripe_len;
6393                         } else {
6394                                 /*
6395                                  * Could be tricky, the super may land in the
6396                                  * middle of the area we're checking.  First
6397                                  * check the easiest case, it's at the end.
6398                                  */
6399                                 if (logical[nr] + stripe_len >=
6400                                     bytes + offset) {
6401                                         bytes = logical[nr] - offset;
6402                                         continue;
6403                                 }
6404
6405                                 /* Check the left side */
6406                                 ret = check_cache_range(root, cache,
6407                                                         offset,
6408                                                         logical[nr] - offset);
6409                                 if (ret) {
6410                                         free(logical);
6411                                         return ret;
6412                                 }
6413
6414                                 /* Now we continue with the right side */
6415                                 bytes = (offset + bytes) -
6416                                         (logical[nr] + stripe_len);
6417                                 offset = logical[nr] + stripe_len;
6418                         }
6419                 }
6420
6421                 free(logical);
6422         }
6423
6424         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6425         if (!entry) {
6426                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6427                         offset, offset+bytes);
6428                 return -EINVAL;
6429         }
6430
6431         if (entry->offset != offset) {
6432                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6433                         entry->offset);
6434                 return -EINVAL;
6435         }
6436
6437         if (entry->bytes != bytes) {
6438                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6439                         bytes, entry->bytes, offset);
6440                 return -EINVAL;
6441         }
6442
6443         unlink_free_space(cache->free_space_ctl, entry);
6444         free(entry);
6445         return 0;
6446 }
6447
6448 static int verify_space_cache(struct btrfs_root *root,
6449                               struct btrfs_block_group_cache *cache)
6450 {
6451         struct btrfs_path path;
6452         struct extent_buffer *leaf;
6453         struct btrfs_key key;
6454         u64 last;
6455         int ret = 0;
6456
6457         root = root->fs_info->extent_root;
6458
6459         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6460
6461         btrfs_init_path(&path);
6462         key.objectid = last;
6463         key.offset = 0;
6464         key.type = BTRFS_EXTENT_ITEM_KEY;
6465         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6466         if (ret < 0)
6467                 goto out;
6468         ret = 0;
6469         while (1) {
6470                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6471                         ret = btrfs_next_leaf(root, &path);
6472                         if (ret < 0)
6473                                 goto out;
6474                         if (ret > 0) {
6475                                 ret = 0;
6476                                 break;
6477                         }
6478                 }
6479                 leaf = path.nodes[0];
6480                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6481                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6482                         break;
6483                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6484                     key.type != BTRFS_METADATA_ITEM_KEY) {
6485                         path.slots[0]++;
6486                         continue;
6487                 }
6488
6489                 if (last == key.objectid) {
6490                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6491                                 last = key.objectid + key.offset;
6492                         else
6493                                 last = key.objectid + root->nodesize;
6494                         path.slots[0]++;
6495                         continue;
6496                 }
6497
6498                 ret = check_cache_range(root, cache, last,
6499                                         key.objectid - last);
6500                 if (ret)
6501                         break;
6502                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6503                         last = key.objectid + key.offset;
6504                 else
6505                         last = key.objectid + root->nodesize;
6506                 path.slots[0]++;
6507         }
6508
6509         if (last < cache->key.objectid + cache->key.offset)
6510                 ret = check_cache_range(root, cache, last,
6511                                         cache->key.objectid +
6512                                         cache->key.offset - last);
6513
6514 out:
6515         btrfs_release_path(&path);
6516
6517         if (!ret &&
6518             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6519                 fprintf(stderr, "There are still entries left in the space "
6520                         "cache\n");
6521                 ret = -EINVAL;
6522         }
6523
6524         return ret;
6525 }
6526
6527 static int check_space_cache(struct btrfs_root *root)
6528 {
6529         struct btrfs_block_group_cache *cache;
6530         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6531         int ret;
6532         int error = 0;
6533
6534         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6535             btrfs_super_generation(root->fs_info->super_copy) !=
6536             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6537                 printf("cache and super generation don't match, space cache "
6538                        "will be invalidated\n");
6539                 return 0;
6540         }
6541
6542         if (ctx.progress_enabled) {
6543                 ctx.tp = TASK_FREE_SPACE;
6544                 task_start(ctx.info);
6545         }
6546
6547         while (1) {
6548                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6549                 if (!cache)
6550                         break;
6551
6552                 start = cache->key.objectid + cache->key.offset;
6553                 if (!cache->free_space_ctl) {
6554                         if (btrfs_init_free_space_ctl(cache,
6555                                                       root->sectorsize)) {
6556                                 ret = -ENOMEM;
6557                                 break;
6558                         }
6559                 } else {
6560                         btrfs_remove_free_space_cache(cache);
6561                 }
6562
6563                 if (btrfs_fs_compat_ro(root->fs_info,
6564                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6565                         ret = exclude_super_stripes(root, cache);
6566                         if (ret) {
6567                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6568                                         strerror(-ret));
6569                                 error++;
6570                                 continue;
6571                         }
6572                         ret = load_free_space_tree(root->fs_info, cache);
6573                         free_excluded_extents(root, cache);
6574                         if (ret < 0) {
6575                                 fprintf(stderr, "could not load free space tree: %s\n",
6576                                         strerror(-ret));
6577                                 error++;
6578                                 continue;
6579                         }
6580                         error += ret;
6581                 } else {
6582                         ret = load_free_space_cache(root->fs_info, cache);
6583                         if (!ret)
6584                                 continue;
6585                 }
6586
6587                 ret = verify_space_cache(root, cache);
6588                 if (ret) {
6589                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6590                                 cache->key.objectid);
6591                         error++;
6592                 }
6593         }
6594
6595         task_stop(ctx.info);
6596
6597         return error ? -EINVAL : 0;
6598 }
6599
6600 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6601                         u64 num_bytes, unsigned long leaf_offset,
6602                         struct extent_buffer *eb) {
6603
6604         u64 offset = 0;
6605         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6606         char *data;
6607         unsigned long csum_offset;
6608         u32 csum;
6609         u32 csum_expected;
6610         u64 read_len;
6611         u64 data_checked = 0;
6612         u64 tmp;
6613         int ret = 0;
6614         int mirror;
6615         int num_copies;
6616
6617         if (num_bytes % root->sectorsize)
6618                 return -EINVAL;
6619
6620         data = malloc(num_bytes);
6621         if (!data)
6622                 return -ENOMEM;
6623
6624         while (offset < num_bytes) {
6625                 mirror = 0;
6626 again:
6627                 read_len = num_bytes - offset;
6628                 /* read as much space once a time */
6629                 ret = read_extent_data(root, data + offset,
6630                                 bytenr + offset, &read_len, mirror);
6631                 if (ret)
6632                         goto out;
6633                 data_checked = 0;
6634                 /* verify every 4k data's checksum */
6635                 while (data_checked < read_len) {
6636                         csum = ~(u32)0;
6637                         tmp = offset + data_checked;
6638
6639                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6640                                                csum, root->sectorsize);
6641                         btrfs_csum_final(csum, (u8 *)&csum);
6642
6643                         csum_offset = leaf_offset +
6644                                  tmp / root->sectorsize * csum_size;
6645                         read_extent_buffer(eb, (char *)&csum_expected,
6646                                            csum_offset, csum_size);
6647                         /* try another mirror */
6648                         if (csum != csum_expected) {
6649                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6650                                                 mirror, bytenr + tmp,
6651                                                 csum, csum_expected);
6652                                 num_copies = btrfs_num_copies(
6653                                                 &root->fs_info->mapping_tree,
6654                                                 bytenr, num_bytes);
6655                                 if (mirror < num_copies - 1) {
6656                                         mirror += 1;
6657                                         goto again;
6658                                 }
6659                         }
6660                         data_checked += root->sectorsize;
6661                 }
6662                 offset += read_len;
6663         }
6664 out:
6665         free(data);
6666         return ret;
6667 }
6668
6669 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6670                                u64 num_bytes)
6671 {
6672         struct btrfs_path path;
6673         struct extent_buffer *leaf;
6674         struct btrfs_key key;
6675         int ret;
6676
6677         btrfs_init_path(&path);
6678         key.objectid = bytenr;
6679         key.type = BTRFS_EXTENT_ITEM_KEY;
6680         key.offset = (u64)-1;
6681
6682 again:
6683         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6684                                 0, 0);
6685         if (ret < 0) {
6686                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6687                 btrfs_release_path(&path);
6688                 return ret;
6689         } else if (ret) {
6690                 if (path.slots[0] > 0) {
6691                         path.slots[0]--;
6692                 } else {
6693                         ret = btrfs_prev_leaf(root, &path);
6694                         if (ret < 0) {
6695                                 goto out;
6696                         } else if (ret > 0) {
6697                                 ret = 0;
6698                                 goto out;
6699                         }
6700                 }
6701         }
6702
6703         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6704
6705         /*
6706          * Block group items come before extent items if they have the same
6707          * bytenr, so walk back one more just in case.  Dear future traveller,
6708          * first congrats on mastering time travel.  Now if it's not too much
6709          * trouble could you go back to 2006 and tell Chris to make the
6710          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6711          * EXTENT_ITEM_KEY please?
6712          */
6713         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6714                 if (path.slots[0] > 0) {
6715                         path.slots[0]--;
6716                 } else {
6717                         ret = btrfs_prev_leaf(root, &path);
6718                         if (ret < 0) {
6719                                 goto out;
6720                         } else if (ret > 0) {
6721                                 ret = 0;
6722                                 goto out;
6723                         }
6724                 }
6725                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6726         }
6727
6728         while (num_bytes) {
6729                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6730                         ret = btrfs_next_leaf(root, &path);
6731                         if (ret < 0) {
6732                                 fprintf(stderr, "Error going to next leaf "
6733                                         "%d\n", ret);
6734                                 btrfs_release_path(&path);
6735                                 return ret;
6736                         } else if (ret) {
6737                                 break;
6738                         }
6739                 }
6740                 leaf = path.nodes[0];
6741                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6742                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6743                         path.slots[0]++;
6744                         continue;
6745                 }
6746                 if (key.objectid + key.offset < bytenr) {
6747                         path.slots[0]++;
6748                         continue;
6749                 }
6750                 if (key.objectid > bytenr + num_bytes)
6751                         break;
6752
6753                 if (key.objectid == bytenr) {
6754                         if (key.offset >= num_bytes) {
6755                                 num_bytes = 0;
6756                                 break;
6757                         }
6758                         num_bytes -= key.offset;
6759                         bytenr += key.offset;
6760                 } else if (key.objectid < bytenr) {
6761                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6762                                 num_bytes = 0;
6763                                 break;
6764                         }
6765                         num_bytes = (bytenr + num_bytes) -
6766                                 (key.objectid + key.offset);
6767                         bytenr = key.objectid + key.offset;
6768                 } else {
6769                         if (key.objectid + key.offset < bytenr + num_bytes) {
6770                                 u64 new_start = key.objectid + key.offset;
6771                                 u64 new_bytes = bytenr + num_bytes - new_start;
6772
6773                                 /*
6774                                  * Weird case, the extent is in the middle of
6775                                  * our range, we'll have to search one side
6776                                  * and then the other.  Not sure if this happens
6777                                  * in real life, but no harm in coding it up
6778                                  * anyway just in case.
6779                                  */
6780                                 btrfs_release_path(&path);
6781                                 ret = check_extent_exists(root, new_start,
6782                                                           new_bytes);
6783                                 if (ret) {
6784                                         fprintf(stderr, "Right section didn't "
6785                                                 "have a record\n");
6786                                         break;
6787                                 }
6788                                 num_bytes = key.objectid - bytenr;
6789                                 goto again;
6790                         }
6791                         num_bytes = key.objectid - bytenr;
6792                 }
6793                 path.slots[0]++;
6794         }
6795         ret = 0;
6796
6797 out:
6798         if (num_bytes && !ret) {
6799                 fprintf(stderr, "There are no extents for csum range "
6800                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6801                 ret = 1;
6802         }
6803
6804         btrfs_release_path(&path);
6805         return ret;
6806 }
6807
6808 static int check_csums(struct btrfs_root *root)
6809 {
6810         struct btrfs_path path;
6811         struct extent_buffer *leaf;
6812         struct btrfs_key key;
6813         u64 offset = 0, num_bytes = 0;
6814         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6815         int errors = 0;
6816         int ret;
6817         u64 data_len;
6818         unsigned long leaf_offset;
6819
6820         root = root->fs_info->csum_root;
6821         if (!extent_buffer_uptodate(root->node)) {
6822                 fprintf(stderr, "No valid csum tree found\n");
6823                 return -ENOENT;
6824         }
6825
6826         btrfs_init_path(&path);
6827         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6828         key.type = BTRFS_EXTENT_CSUM_KEY;
6829         key.offset = 0;
6830         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6831         if (ret < 0) {
6832                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6833                 btrfs_release_path(&path);
6834                 return ret;
6835         }
6836
6837         if (ret > 0 && path.slots[0])
6838                 path.slots[0]--;
6839         ret = 0;
6840
6841         while (1) {
6842                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6843                         ret = btrfs_next_leaf(root, &path);
6844                         if (ret < 0) {
6845                                 fprintf(stderr, "Error going to next leaf "
6846                                         "%d\n", ret);
6847                                 break;
6848                         }
6849                         if (ret)
6850                                 break;
6851                 }
6852                 leaf = path.nodes[0];
6853
6854                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6855                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6856                         path.slots[0]++;
6857                         continue;
6858                 }
6859
6860                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6861                               csum_size) * root->sectorsize;
6862                 if (!check_data_csum)
6863                         goto skip_csum_check;
6864                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6865                 ret = check_extent_csums(root, key.offset, data_len,
6866                                          leaf_offset, leaf);
6867                 if (ret)
6868                         break;
6869 skip_csum_check:
6870                 if (!num_bytes) {
6871                         offset = key.offset;
6872                 } else if (key.offset != offset + num_bytes) {
6873                         ret = check_extent_exists(root, offset, num_bytes);
6874                         if (ret) {
6875                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6876                                         "there is no extent record\n",
6877                                         offset, offset+num_bytes);
6878                                 errors++;
6879                         }
6880                         offset = key.offset;
6881                         num_bytes = 0;
6882                 }
6883                 num_bytes += data_len;
6884                 path.slots[0]++;
6885         }
6886
6887         btrfs_release_path(&path);
6888         return errors;
6889 }
6890
6891 static int is_dropped_key(struct btrfs_key *key,
6892                           struct btrfs_key *drop_key) {
6893         if (key->objectid < drop_key->objectid)
6894                 return 1;
6895         else if (key->objectid == drop_key->objectid) {
6896                 if (key->type < drop_key->type)
6897                         return 1;
6898                 else if (key->type == drop_key->type) {
6899                         if (key->offset < drop_key->offset)
6900                                 return 1;
6901                 }
6902         }
6903         return 0;
6904 }
6905
6906 /*
6907  * Here are the rules for FULL_BACKREF.
6908  *
6909  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6910  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6911  *      FULL_BACKREF set.
6912  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6913  *    if it happened after the relocation occurred since we'll have dropped the
6914  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6915  *    have no real way to know for sure.
6916  *
6917  * We process the blocks one root at a time, and we start from the lowest root
6918  * objectid and go to the highest.  So we can just lookup the owner backref for
6919  * the record and if we don't find it then we know it doesn't exist and we have
6920  * a FULL BACKREF.
6921  *
6922  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6923  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6924  * be set or not and then we can check later once we've gathered all the refs.
6925  */
6926 static int calc_extent_flag(struct btrfs_root *root,
6927                            struct cache_tree *extent_cache,
6928                            struct extent_buffer *buf,
6929                            struct root_item_record *ri,
6930                            u64 *flags)
6931 {
6932         struct extent_record *rec;
6933         struct cache_extent *cache;
6934         struct tree_backref *tback;
6935         u64 owner = 0;
6936
6937         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6938         /* we have added this extent before */
6939         if (!cache)
6940                 return -ENOENT;
6941
6942         rec = container_of(cache, struct extent_record, cache);
6943
6944         /*
6945          * Except file/reloc tree, we can not have
6946          * FULL BACKREF MODE
6947          */
6948         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6949                 goto normal;
6950         /*
6951          * root node
6952          */
6953         if (buf->start == ri->bytenr)
6954                 goto normal;
6955
6956         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6957                 goto full_backref;
6958
6959         owner = btrfs_header_owner(buf);
6960         if (owner == ri->objectid)
6961                 goto normal;
6962
6963         tback = find_tree_backref(rec, 0, owner);
6964         if (!tback)
6965                 goto full_backref;
6966 normal:
6967         *flags = 0;
6968         if (rec->flag_block_full_backref != FLAG_UNSET &&
6969             rec->flag_block_full_backref != 0)
6970                 rec->bad_full_backref = 1;
6971         return 0;
6972 full_backref:
6973         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6974         if (rec->flag_block_full_backref != FLAG_UNSET &&
6975             rec->flag_block_full_backref != 1)
6976                 rec->bad_full_backref = 1;
6977         return 0;
6978 }
6979
6980 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6981 {
6982         fprintf(stderr, "Invalid key type(");
6983         print_key_type(stderr, 0, key_type);
6984         fprintf(stderr, ") found in root(");
6985         print_objectid(stderr, rootid, 0);
6986         fprintf(stderr, ")\n");
6987 }
6988
6989 /*
6990  * Check if the key is valid with its extent buffer.
6991  *
6992  * This is a early check in case invalid key exists in a extent buffer
6993  * This is not comprehensive yet, but should prevent wrong key/item passed
6994  * further
6995  */
6996 static int check_type_with_root(u64 rootid, u8 key_type)
6997 {
6998         switch (key_type) {
6999         /* Only valid in chunk tree */
7000         case BTRFS_DEV_ITEM_KEY:
7001         case BTRFS_CHUNK_ITEM_KEY:
7002                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7003                         goto err;
7004                 break;
7005         /* valid in csum and log tree */
7006         case BTRFS_CSUM_TREE_OBJECTID:
7007                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7008                       is_fstree(rootid)))
7009                         goto err;
7010                 break;
7011         case BTRFS_EXTENT_ITEM_KEY:
7012         case BTRFS_METADATA_ITEM_KEY:
7013         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7014                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7015                         goto err;
7016                 break;
7017         case BTRFS_ROOT_ITEM_KEY:
7018                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7019                         goto err;
7020                 break;
7021         case BTRFS_DEV_EXTENT_KEY:
7022                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7023                         goto err;
7024                 break;
7025         }
7026         return 0;
7027 err:
7028         report_mismatch_key_root(key_type, rootid);
7029         return -EINVAL;
7030 }
7031
7032 static int run_next_block(struct btrfs_root *root,
7033                           struct block_info *bits,
7034                           int bits_nr,
7035                           u64 *last,
7036                           struct cache_tree *pending,
7037                           struct cache_tree *seen,
7038                           struct cache_tree *reada,
7039                           struct cache_tree *nodes,
7040                           struct cache_tree *extent_cache,
7041                           struct cache_tree *chunk_cache,
7042                           struct rb_root *dev_cache,
7043                           struct block_group_tree *block_group_cache,
7044                           struct device_extent_tree *dev_extent_cache,
7045                           struct root_item_record *ri)
7046 {
7047         struct extent_buffer *buf;
7048         struct extent_record *rec = NULL;
7049         u64 bytenr;
7050         u32 size;
7051         u64 parent;
7052         u64 owner;
7053         u64 flags;
7054         u64 ptr;
7055         u64 gen = 0;
7056         int ret = 0;
7057         int i;
7058         int nritems;
7059         struct btrfs_key key;
7060         struct cache_extent *cache;
7061         int reada_bits;
7062
7063         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7064                                     bits_nr, &reada_bits);
7065         if (nritems == 0)
7066                 return 1;
7067
7068         if (!reada_bits) {
7069                 for(i = 0; i < nritems; i++) {
7070                         ret = add_cache_extent(reada, bits[i].start,
7071                                                bits[i].size);
7072                         if (ret == -EEXIST)
7073                                 continue;
7074
7075                         /* fixme, get the parent transid */
7076                         readahead_tree_block(root, bits[i].start,
7077                                              bits[i].size, 0);
7078                 }
7079         }
7080         *last = bits[0].start;
7081         bytenr = bits[0].start;
7082         size = bits[0].size;
7083
7084         cache = lookup_cache_extent(pending, bytenr, size);
7085         if (cache) {
7086                 remove_cache_extent(pending, cache);
7087                 free(cache);
7088         }
7089         cache = lookup_cache_extent(reada, bytenr, size);
7090         if (cache) {
7091                 remove_cache_extent(reada, cache);
7092                 free(cache);
7093         }
7094         cache = lookup_cache_extent(nodes, bytenr, size);
7095         if (cache) {
7096                 remove_cache_extent(nodes, cache);
7097                 free(cache);
7098         }
7099         cache = lookup_cache_extent(extent_cache, bytenr, size);
7100         if (cache) {
7101                 rec = container_of(cache, struct extent_record, cache);
7102                 gen = rec->parent_generation;
7103         }
7104
7105         /* fixme, get the real parent transid */
7106         buf = read_tree_block(root, bytenr, size, gen);
7107         if (!extent_buffer_uptodate(buf)) {
7108                 record_bad_block_io(root->fs_info,
7109                                     extent_cache, bytenr, size);
7110                 goto out;
7111         }
7112
7113         nritems = btrfs_header_nritems(buf);
7114
7115         flags = 0;
7116         if (!init_extent_tree) {
7117                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7118                                        btrfs_header_level(buf), 1, NULL,
7119                                        &flags);
7120                 if (ret < 0) {
7121                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7122                         if (ret < 0) {
7123                                 fprintf(stderr, "Couldn't calc extent flags\n");
7124                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7125                         }
7126                 }
7127         } else {
7128                 flags = 0;
7129                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7130                 if (ret < 0) {
7131                         fprintf(stderr, "Couldn't calc extent flags\n");
7132                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7133                 }
7134         }
7135
7136         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7137                 if (ri != NULL &&
7138                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7139                     ri->objectid == btrfs_header_owner(buf)) {
7140                         /*
7141                          * Ok we got to this block from it's original owner and
7142                          * we have FULL_BACKREF set.  Relocation can leave
7143                          * converted blocks over so this is altogether possible,
7144                          * however it's not possible if the generation > the
7145                          * last snapshot, so check for this case.
7146                          */
7147                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7148                             btrfs_header_generation(buf) > ri->last_snapshot) {
7149                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7150                                 rec->bad_full_backref = 1;
7151                         }
7152                 }
7153         } else {
7154                 if (ri != NULL &&
7155                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7156                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7157                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7158                         rec->bad_full_backref = 1;
7159                 }
7160         }
7161
7162         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7163                 rec->flag_block_full_backref = 1;
7164                 parent = bytenr;
7165                 owner = 0;
7166         } else {
7167                 rec->flag_block_full_backref = 0;
7168                 parent = 0;
7169                 owner = btrfs_header_owner(buf);
7170         }
7171
7172         ret = check_block(root, extent_cache, buf, flags);
7173         if (ret)
7174                 goto out;
7175
7176         if (btrfs_is_leaf(buf)) {
7177                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7178                 for (i = 0; i < nritems; i++) {
7179                         struct btrfs_file_extent_item *fi;
7180                         btrfs_item_key_to_cpu(buf, &key, i);
7181                         /*
7182                          * Check key type against the leaf owner.
7183                          * Could filter quite a lot of early error if
7184                          * owner is correct
7185                          */
7186                         if (check_type_with_root(btrfs_header_owner(buf),
7187                                                  key.type)) {
7188                                 fprintf(stderr, "ignoring invalid key\n");
7189                                 continue;
7190                         }
7191                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7192                                 process_extent_item(root, extent_cache, buf,
7193                                                     i);
7194                                 continue;
7195                         }
7196                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7197                                 process_extent_item(root, extent_cache, buf,
7198                                                     i);
7199                                 continue;
7200                         }
7201                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7202                                 total_csum_bytes +=
7203                                         btrfs_item_size_nr(buf, i);
7204                                 continue;
7205                         }
7206                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7207                                 process_chunk_item(chunk_cache, &key, buf, i);
7208                                 continue;
7209                         }
7210                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7211                                 process_device_item(dev_cache, &key, buf, i);
7212                                 continue;
7213                         }
7214                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7215                                 process_block_group_item(block_group_cache,
7216                                         &key, buf, i);
7217                                 continue;
7218                         }
7219                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7220                                 process_device_extent_item(dev_extent_cache,
7221                                         &key, buf, i);
7222                                 continue;
7223
7224                         }
7225                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7226 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7227                                 process_extent_ref_v0(extent_cache, buf, i);
7228 #else
7229                                 BUG();
7230 #endif
7231                                 continue;
7232                         }
7233
7234                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7235                                 ret = add_tree_backref(extent_cache,
7236                                                 key.objectid, 0, key.offset, 0);
7237                                 if (ret < 0)
7238                                         error("add_tree_backref failed: %s",
7239                                               strerror(-ret));
7240                                 continue;
7241                         }
7242                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7243                                 ret = add_tree_backref(extent_cache,
7244                                                 key.objectid, key.offset, 0, 0);
7245                                 if (ret < 0)
7246                                         error("add_tree_backref failed: %s",
7247                                               strerror(-ret));
7248                                 continue;
7249                         }
7250                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7251                                 struct btrfs_extent_data_ref *ref;
7252                                 ref = btrfs_item_ptr(buf, i,
7253                                                 struct btrfs_extent_data_ref);
7254                                 add_data_backref(extent_cache,
7255                                         key.objectid, 0,
7256                                         btrfs_extent_data_ref_root(buf, ref),
7257                                         btrfs_extent_data_ref_objectid(buf,
7258                                                                        ref),
7259                                         btrfs_extent_data_ref_offset(buf, ref),
7260                                         btrfs_extent_data_ref_count(buf, ref),
7261                                         0, root->sectorsize);
7262                                 continue;
7263                         }
7264                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7265                                 struct btrfs_shared_data_ref *ref;
7266                                 ref = btrfs_item_ptr(buf, i,
7267                                                 struct btrfs_shared_data_ref);
7268                                 add_data_backref(extent_cache,
7269                                         key.objectid, key.offset, 0, 0, 0,
7270                                         btrfs_shared_data_ref_count(buf, ref),
7271                                         0, root->sectorsize);
7272                                 continue;
7273                         }
7274                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7275                                 struct bad_item *bad;
7276
7277                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7278                                         continue;
7279                                 if (!owner)
7280                                         continue;
7281                                 bad = malloc(sizeof(struct bad_item));
7282                                 if (!bad)
7283                                         continue;
7284                                 INIT_LIST_HEAD(&bad->list);
7285                                 memcpy(&bad->key, &key,
7286                                        sizeof(struct btrfs_key));
7287                                 bad->root_id = owner;
7288                                 list_add_tail(&bad->list, &delete_items);
7289                                 continue;
7290                         }
7291                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7292                                 continue;
7293                         fi = btrfs_item_ptr(buf, i,
7294                                             struct btrfs_file_extent_item);
7295                         if (btrfs_file_extent_type(buf, fi) ==
7296                             BTRFS_FILE_EXTENT_INLINE)
7297                                 continue;
7298                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7299                                 continue;
7300
7301                         data_bytes_allocated +=
7302                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7303                         if (data_bytes_allocated < root->sectorsize) {
7304                                 abort();
7305                         }
7306                         data_bytes_referenced +=
7307                                 btrfs_file_extent_num_bytes(buf, fi);
7308                         add_data_backref(extent_cache,
7309                                 btrfs_file_extent_disk_bytenr(buf, fi),
7310                                 parent, owner, key.objectid, key.offset -
7311                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7312                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7313                 }
7314         } else {
7315                 int level;
7316                 struct btrfs_key first_key;
7317
7318                 first_key.objectid = 0;
7319
7320                 if (nritems > 0)
7321                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7322                 level = btrfs_header_level(buf);
7323                 for (i = 0; i < nritems; i++) {
7324                         struct extent_record tmpl;
7325
7326                         ptr = btrfs_node_blockptr(buf, i);
7327                         size = root->nodesize;
7328                         btrfs_node_key_to_cpu(buf, &key, i);
7329                         if (ri != NULL) {
7330                                 if ((level == ri->drop_level)
7331                                     && is_dropped_key(&key, &ri->drop_key)) {
7332                                         continue;
7333                                 }
7334                         }
7335
7336                         memset(&tmpl, 0, sizeof(tmpl));
7337                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7338                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7339                         tmpl.start = ptr;
7340                         tmpl.nr = size;
7341                         tmpl.refs = 1;
7342                         tmpl.metadata = 1;
7343                         tmpl.max_size = size;
7344                         ret = add_extent_rec(extent_cache, &tmpl);
7345                         if (ret < 0)
7346                                 goto out;
7347
7348                         ret = add_tree_backref(extent_cache, ptr, parent,
7349                                         owner, 1);
7350                         if (ret < 0) {
7351                                 error("add_tree_backref failed: %s",
7352                                       strerror(-ret));
7353                                 continue;
7354                         }
7355
7356                         if (level > 1) {
7357                                 add_pending(nodes, seen, ptr, size);
7358                         } else {
7359                                 add_pending(pending, seen, ptr, size);
7360                         }
7361                 }
7362                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7363                                       nritems) * sizeof(struct btrfs_key_ptr);
7364         }
7365         total_btree_bytes += buf->len;
7366         if (fs_root_objectid(btrfs_header_owner(buf)))
7367                 total_fs_tree_bytes += buf->len;
7368         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7369                 total_extent_tree_bytes += buf->len;
7370         if (!found_old_backref &&
7371             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7372             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7373             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7374                 found_old_backref = 1;
7375 out:
7376         free_extent_buffer(buf);
7377         return ret;
7378 }
7379
7380 static int add_root_to_pending(struct extent_buffer *buf,
7381                                struct cache_tree *extent_cache,
7382                                struct cache_tree *pending,
7383                                struct cache_tree *seen,
7384                                struct cache_tree *nodes,
7385                                u64 objectid)
7386 {
7387         struct extent_record tmpl;
7388         int ret;
7389
7390         if (btrfs_header_level(buf) > 0)
7391                 add_pending(nodes, seen, buf->start, buf->len);
7392         else
7393                 add_pending(pending, seen, buf->start, buf->len);
7394
7395         memset(&tmpl, 0, sizeof(tmpl));
7396         tmpl.start = buf->start;
7397         tmpl.nr = buf->len;
7398         tmpl.is_root = 1;
7399         tmpl.refs = 1;
7400         tmpl.metadata = 1;
7401         tmpl.max_size = buf->len;
7402         add_extent_rec(extent_cache, &tmpl);
7403
7404         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7405             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7406                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7407                                 0, 1);
7408         else
7409                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7410                                 1);
7411         return ret;
7412 }
7413
7414 /* as we fix the tree, we might be deleting blocks that
7415  * we're tracking for repair.  This hook makes sure we
7416  * remove any backrefs for blocks as we are fixing them.
7417  */
7418 static int free_extent_hook(struct btrfs_trans_handle *trans,
7419                             struct btrfs_root *root,
7420                             u64 bytenr, u64 num_bytes, u64 parent,
7421                             u64 root_objectid, u64 owner, u64 offset,
7422                             int refs_to_drop)
7423 {
7424         struct extent_record *rec;
7425         struct cache_extent *cache;
7426         int is_data;
7427         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7428
7429         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7430         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7431         if (!cache)
7432                 return 0;
7433
7434         rec = container_of(cache, struct extent_record, cache);
7435         if (is_data) {
7436                 struct data_backref *back;
7437                 back = find_data_backref(rec, parent, root_objectid, owner,
7438                                          offset, 1, bytenr, num_bytes);
7439                 if (!back)
7440                         goto out;
7441                 if (back->node.found_ref) {
7442                         back->found_ref -= refs_to_drop;
7443                         if (rec->refs)
7444                                 rec->refs -= refs_to_drop;
7445                 }
7446                 if (back->node.found_extent_tree) {
7447                         back->num_refs -= refs_to_drop;
7448                         if (rec->extent_item_refs)
7449                                 rec->extent_item_refs -= refs_to_drop;
7450                 }
7451                 if (back->found_ref == 0)
7452                         back->node.found_ref = 0;
7453                 if (back->num_refs == 0)
7454                         back->node.found_extent_tree = 0;
7455
7456                 if (!back->node.found_extent_tree && back->node.found_ref) {
7457                         list_del(&back->node.list);
7458                         free(back);
7459                 }
7460         } else {
7461                 struct tree_backref *back;
7462                 back = find_tree_backref(rec, parent, root_objectid);
7463                 if (!back)
7464                         goto out;
7465                 if (back->node.found_ref) {
7466                         if (rec->refs)
7467                                 rec->refs--;
7468                         back->node.found_ref = 0;
7469                 }
7470                 if (back->node.found_extent_tree) {
7471                         if (rec->extent_item_refs)
7472                                 rec->extent_item_refs--;
7473                         back->node.found_extent_tree = 0;
7474                 }
7475                 if (!back->node.found_extent_tree && back->node.found_ref) {
7476                         list_del(&back->node.list);
7477                         free(back);
7478                 }
7479         }
7480         maybe_free_extent_rec(extent_cache, rec);
7481 out:
7482         return 0;
7483 }
7484
7485 static int delete_extent_records(struct btrfs_trans_handle *trans,
7486                                  struct btrfs_root *root,
7487                                  struct btrfs_path *path,
7488                                  u64 bytenr, u64 new_len)
7489 {
7490         struct btrfs_key key;
7491         struct btrfs_key found_key;
7492         struct extent_buffer *leaf;
7493         int ret;
7494         int slot;
7495
7496
7497         key.objectid = bytenr;
7498         key.type = (u8)-1;
7499         key.offset = (u64)-1;
7500
7501         while(1) {
7502                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7503                                         &key, path, 0, 1);
7504                 if (ret < 0)
7505                         break;
7506
7507                 if (ret > 0) {
7508                         ret = 0;
7509                         if (path->slots[0] == 0)
7510                                 break;
7511                         path->slots[0]--;
7512                 }
7513                 ret = 0;
7514
7515                 leaf = path->nodes[0];
7516                 slot = path->slots[0];
7517
7518                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7519                 if (found_key.objectid != bytenr)
7520                         break;
7521
7522                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7523                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7524                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7525                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7526                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7527                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7528                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7529                         btrfs_release_path(path);
7530                         if (found_key.type == 0) {
7531                                 if (found_key.offset == 0)
7532                                         break;
7533                                 key.offset = found_key.offset - 1;
7534                                 key.type = found_key.type;
7535                         }
7536                         key.type = found_key.type - 1;
7537                         key.offset = (u64)-1;
7538                         continue;
7539                 }
7540
7541                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7542                         found_key.objectid, found_key.type, found_key.offset);
7543
7544                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7545                 if (ret)
7546                         break;
7547                 btrfs_release_path(path);
7548
7549                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7550                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7551                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7552                                 found_key.offset : root->nodesize;
7553
7554                         ret = btrfs_update_block_group(trans, root, bytenr,
7555                                                        bytes, 0, 0);
7556                         if (ret)
7557                                 break;
7558                 }
7559         }
7560
7561         btrfs_release_path(path);
7562         return ret;
7563 }
7564
7565 /*
7566  * for a single backref, this will allocate a new extent
7567  * and add the backref to it.
7568  */
7569 static int record_extent(struct btrfs_trans_handle *trans,
7570                          struct btrfs_fs_info *info,
7571                          struct btrfs_path *path,
7572                          struct extent_record *rec,
7573                          struct extent_backref *back,
7574                          int allocated, u64 flags)
7575 {
7576         int ret;
7577         struct btrfs_root *extent_root = info->extent_root;
7578         struct extent_buffer *leaf;
7579         struct btrfs_key ins_key;
7580         struct btrfs_extent_item *ei;
7581         struct data_backref *dback;
7582         struct btrfs_tree_block_info *bi;
7583
7584         if (!back->is_data)
7585                 rec->max_size = max_t(u64, rec->max_size,
7586                                     info->extent_root->nodesize);
7587
7588         if (!allocated) {
7589                 u32 item_size = sizeof(*ei);
7590
7591                 if (!back->is_data)
7592                         item_size += sizeof(*bi);
7593
7594                 ins_key.objectid = rec->start;
7595                 ins_key.offset = rec->max_size;
7596                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7597
7598                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7599                                         &ins_key, item_size);
7600                 if (ret)
7601                         goto fail;
7602
7603                 leaf = path->nodes[0];
7604                 ei = btrfs_item_ptr(leaf, path->slots[0],
7605                                     struct btrfs_extent_item);
7606
7607                 btrfs_set_extent_refs(leaf, ei, 0);
7608                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7609
7610                 if (back->is_data) {
7611                         btrfs_set_extent_flags(leaf, ei,
7612                                                BTRFS_EXTENT_FLAG_DATA);
7613                 } else {
7614                         struct btrfs_disk_key copy_key;;
7615
7616                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7617                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7618                                              sizeof(*bi));
7619
7620                         btrfs_set_disk_key_objectid(&copy_key,
7621                                                     rec->info_objectid);
7622                         btrfs_set_disk_key_type(&copy_key, 0);
7623                         btrfs_set_disk_key_offset(&copy_key, 0);
7624
7625                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7626                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7627
7628                         btrfs_set_extent_flags(leaf, ei,
7629                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7630                 }
7631
7632                 btrfs_mark_buffer_dirty(leaf);
7633                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7634                                                rec->max_size, 1, 0);
7635                 if (ret)
7636                         goto fail;
7637                 btrfs_release_path(path);
7638         }
7639
7640         if (back->is_data) {
7641                 u64 parent;
7642                 int i;
7643
7644                 dback = to_data_backref(back);
7645                 if (back->full_backref)
7646                         parent = dback->parent;
7647                 else
7648                         parent = 0;
7649
7650                 for (i = 0; i < dback->found_ref; i++) {
7651                         /* if parent != 0, we're doing a full backref
7652                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7653                          * just makes the backref allocator create a data
7654                          * backref
7655                          */
7656                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7657                                                    rec->start, rec->max_size,
7658                                                    parent,
7659                                                    dback->root,
7660                                                    parent ?
7661                                                    BTRFS_FIRST_FREE_OBJECTID :
7662                                                    dback->owner,
7663                                                    dback->offset);
7664                         if (ret)
7665                                 break;
7666                 }
7667                 fprintf(stderr, "adding new data backref"
7668                                 " on %llu %s %llu owner %llu"
7669                                 " offset %llu found %d\n",
7670                                 (unsigned long long)rec->start,
7671                                 back->full_backref ?
7672                                 "parent" : "root",
7673                                 back->full_backref ?
7674                                 (unsigned long long)parent :
7675                                 (unsigned long long)dback->root,
7676                                 (unsigned long long)dback->owner,
7677                                 (unsigned long long)dback->offset,
7678                                 dback->found_ref);
7679         } else {
7680                 u64 parent;
7681                 struct tree_backref *tback;
7682
7683                 tback = to_tree_backref(back);
7684                 if (back->full_backref)
7685                         parent = tback->parent;
7686                 else
7687                         parent = 0;
7688
7689                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7690                                            rec->start, rec->max_size,
7691                                            parent, tback->root, 0, 0);
7692                 fprintf(stderr, "adding new tree backref on "
7693                         "start %llu len %llu parent %llu root %llu\n",
7694                         rec->start, rec->max_size, parent, tback->root);
7695         }
7696 fail:
7697         btrfs_release_path(path);
7698         return ret;
7699 }
7700
7701 static struct extent_entry *find_entry(struct list_head *entries,
7702                                        u64 bytenr, u64 bytes)
7703 {
7704         struct extent_entry *entry = NULL;
7705
7706         list_for_each_entry(entry, entries, list) {
7707                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7708                         return entry;
7709         }
7710
7711         return NULL;
7712 }
7713
7714 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7715 {
7716         struct extent_entry *entry, *best = NULL, *prev = NULL;
7717
7718         list_for_each_entry(entry, entries, list) {
7719                 /*
7720                  * If there are as many broken entries as entries then we know
7721                  * not to trust this particular entry.
7722                  */
7723                 if (entry->broken == entry->count)
7724                         continue;
7725
7726                 /*
7727                  * Special case, when there are only two entries and 'best' is
7728                  * the first one
7729                  */
7730                 if (!prev) {
7731                         best = entry;
7732                         prev = entry;
7733                         continue;
7734                 }
7735
7736                 /*
7737                  * If our current entry == best then we can't be sure our best
7738                  * is really the best, so we need to keep searching.
7739                  */
7740                 if (best && best->count == entry->count) {
7741                         prev = entry;
7742                         best = NULL;
7743                         continue;
7744                 }
7745
7746                 /* Prev == entry, not good enough, have to keep searching */
7747                 if (!prev->broken && prev->count == entry->count)
7748                         continue;
7749
7750                 if (!best)
7751                         best = (prev->count > entry->count) ? prev : entry;
7752                 else if (best->count < entry->count)
7753                         best = entry;
7754                 prev = entry;
7755         }
7756
7757         return best;
7758 }
7759
7760 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7761                       struct data_backref *dback, struct extent_entry *entry)
7762 {
7763         struct btrfs_trans_handle *trans;
7764         struct btrfs_root *root;
7765         struct btrfs_file_extent_item *fi;
7766         struct extent_buffer *leaf;
7767         struct btrfs_key key;
7768         u64 bytenr, bytes;
7769         int ret, err;
7770
7771         key.objectid = dback->root;
7772         key.type = BTRFS_ROOT_ITEM_KEY;
7773         key.offset = (u64)-1;
7774         root = btrfs_read_fs_root(info, &key);
7775         if (IS_ERR(root)) {
7776                 fprintf(stderr, "Couldn't find root for our ref\n");
7777                 return -EINVAL;
7778         }
7779
7780         /*
7781          * The backref points to the original offset of the extent if it was
7782          * split, so we need to search down to the offset we have and then walk
7783          * forward until we find the backref we're looking for.
7784          */
7785         key.objectid = dback->owner;
7786         key.type = BTRFS_EXTENT_DATA_KEY;
7787         key.offset = dback->offset;
7788         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7789         if (ret < 0) {
7790                 fprintf(stderr, "Error looking up ref %d\n", ret);
7791                 return ret;
7792         }
7793
7794         while (1) {
7795                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7796                         ret = btrfs_next_leaf(root, path);
7797                         if (ret) {
7798                                 fprintf(stderr, "Couldn't find our ref, next\n");
7799                                 return -EINVAL;
7800                         }
7801                 }
7802                 leaf = path->nodes[0];
7803                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7804                 if (key.objectid != dback->owner ||
7805                     key.type != BTRFS_EXTENT_DATA_KEY) {
7806                         fprintf(stderr, "Couldn't find our ref, search\n");
7807                         return -EINVAL;
7808                 }
7809                 fi = btrfs_item_ptr(leaf, path->slots[0],
7810                                     struct btrfs_file_extent_item);
7811                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7812                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7813
7814                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7815                         break;
7816                 path->slots[0]++;
7817         }
7818
7819         btrfs_release_path(path);
7820
7821         trans = btrfs_start_transaction(root, 1);
7822         if (IS_ERR(trans))
7823                 return PTR_ERR(trans);
7824
7825         /*
7826          * Ok we have the key of the file extent we want to fix, now we can cow
7827          * down to the thing and fix it.
7828          */
7829         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7830         if (ret < 0) {
7831                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7832                         key.objectid, key.type, key.offset, ret);
7833                 goto out;
7834         }
7835         if (ret > 0) {
7836                 fprintf(stderr, "Well that's odd, we just found this key "
7837                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7838                         key.offset);
7839                 ret = -EINVAL;
7840                 goto out;
7841         }
7842         leaf = path->nodes[0];
7843         fi = btrfs_item_ptr(leaf, path->slots[0],
7844                             struct btrfs_file_extent_item);
7845
7846         if (btrfs_file_extent_compression(leaf, fi) &&
7847             dback->disk_bytenr != entry->bytenr) {
7848                 fprintf(stderr, "Ref doesn't match the record start and is "
7849                         "compressed, please take a btrfs-image of this file "
7850                         "system and send it to a btrfs developer so they can "
7851                         "complete this functionality for bytenr %Lu\n",
7852                         dback->disk_bytenr);
7853                 ret = -EINVAL;
7854                 goto out;
7855         }
7856
7857         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7858                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7859         } else if (dback->disk_bytenr > entry->bytenr) {
7860                 u64 off_diff, offset;
7861
7862                 off_diff = dback->disk_bytenr - entry->bytenr;
7863                 offset = btrfs_file_extent_offset(leaf, fi);
7864                 if (dback->disk_bytenr + offset +
7865                     btrfs_file_extent_num_bytes(leaf, fi) >
7866                     entry->bytenr + entry->bytes) {
7867                         fprintf(stderr, "Ref is past the entry end, please "
7868                                 "take a btrfs-image of this file system and "
7869                                 "send it to a btrfs developer, ref %Lu\n",
7870                                 dback->disk_bytenr);
7871                         ret = -EINVAL;
7872                         goto out;
7873                 }
7874                 offset += off_diff;
7875                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7876                 btrfs_set_file_extent_offset(leaf, fi, offset);
7877         } else if (dback->disk_bytenr < entry->bytenr) {
7878                 u64 offset;
7879
7880                 offset = btrfs_file_extent_offset(leaf, fi);
7881                 if (dback->disk_bytenr + offset < entry->bytenr) {
7882                         fprintf(stderr, "Ref is before the entry start, please"
7883                                 " take a btrfs-image of this file system and "
7884                                 "send it to a btrfs developer, ref %Lu\n",
7885                                 dback->disk_bytenr);
7886                         ret = -EINVAL;
7887                         goto out;
7888                 }
7889
7890                 offset += dback->disk_bytenr;
7891                 offset -= entry->bytenr;
7892                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7893                 btrfs_set_file_extent_offset(leaf, fi, offset);
7894         }
7895
7896         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7897
7898         /*
7899          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7900          * only do this if we aren't using compression, otherwise it's a
7901          * trickier case.
7902          */
7903         if (!btrfs_file_extent_compression(leaf, fi))
7904                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7905         else
7906                 printf("ram bytes may be wrong?\n");
7907         btrfs_mark_buffer_dirty(leaf);
7908 out:
7909         err = btrfs_commit_transaction(trans, root);
7910         btrfs_release_path(path);
7911         return ret ? ret : err;
7912 }
7913
7914 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7915                            struct extent_record *rec)
7916 {
7917         struct extent_backref *back;
7918         struct data_backref *dback;
7919         struct extent_entry *entry, *best = NULL;
7920         LIST_HEAD(entries);
7921         int nr_entries = 0;
7922         int broken_entries = 0;
7923         int ret = 0;
7924         short mismatch = 0;
7925
7926         /*
7927          * Metadata is easy and the backrefs should always agree on bytenr and
7928          * size, if not we've got bigger issues.
7929          */
7930         if (rec->metadata)
7931                 return 0;
7932
7933         list_for_each_entry(back, &rec->backrefs, list) {
7934                 if (back->full_backref || !back->is_data)
7935                         continue;
7936
7937                 dback = to_data_backref(back);
7938
7939                 /*
7940                  * We only pay attention to backrefs that we found a real
7941                  * backref for.
7942                  */
7943                 if (dback->found_ref == 0)
7944                         continue;
7945
7946                 /*
7947                  * For now we only catch when the bytes don't match, not the
7948                  * bytenr.  We can easily do this at the same time, but I want
7949                  * to have a fs image to test on before we just add repair
7950                  * functionality willy-nilly so we know we won't screw up the
7951                  * repair.
7952                  */
7953
7954                 entry = find_entry(&entries, dback->disk_bytenr,
7955                                    dback->bytes);
7956                 if (!entry) {
7957                         entry = malloc(sizeof(struct extent_entry));
7958                         if (!entry) {
7959                                 ret = -ENOMEM;
7960                                 goto out;
7961                         }
7962                         memset(entry, 0, sizeof(*entry));
7963                         entry->bytenr = dback->disk_bytenr;
7964                         entry->bytes = dback->bytes;
7965                         list_add_tail(&entry->list, &entries);
7966                         nr_entries++;
7967                 }
7968
7969                 /*
7970                  * If we only have on entry we may think the entries agree when
7971                  * in reality they don't so we have to do some extra checking.
7972                  */
7973                 if (dback->disk_bytenr != rec->start ||
7974                     dback->bytes != rec->nr || back->broken)
7975                         mismatch = 1;
7976
7977                 if (back->broken) {
7978                         entry->broken++;
7979                         broken_entries++;
7980                 }
7981
7982                 entry->count++;
7983         }
7984
7985         /* Yay all the backrefs agree, carry on good sir */
7986         if (nr_entries <= 1 && !mismatch)
7987                 goto out;
7988
7989         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7990                 "%Lu\n", rec->start);
7991
7992         /*
7993          * First we want to see if the backrefs can agree amongst themselves who
7994          * is right, so figure out which one of the entries has the highest
7995          * count.
7996          */
7997         best = find_most_right_entry(&entries);
7998
7999         /*
8000          * Ok so we may have an even split between what the backrefs think, so
8001          * this is where we use the extent ref to see what it thinks.
8002          */
8003         if (!best) {
8004                 entry = find_entry(&entries, rec->start, rec->nr);
8005                 if (!entry && (!broken_entries || !rec->found_rec)) {
8006                         fprintf(stderr, "Backrefs don't agree with each other "
8007                                 "and extent record doesn't agree with anybody,"
8008                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8009                                 rec->start, rec->nr);
8010                         ret = -EINVAL;
8011                         goto out;
8012                 } else if (!entry) {
8013                         /*
8014                          * Ok our backrefs were broken, we'll assume this is the
8015                          * correct value and add an entry for this range.
8016                          */
8017                         entry = malloc(sizeof(struct extent_entry));
8018                         if (!entry) {
8019                                 ret = -ENOMEM;
8020                                 goto out;
8021                         }
8022                         memset(entry, 0, sizeof(*entry));
8023                         entry->bytenr = rec->start;
8024                         entry->bytes = rec->nr;
8025                         list_add_tail(&entry->list, &entries);
8026                         nr_entries++;
8027                 }
8028                 entry->count++;
8029                 best = find_most_right_entry(&entries);
8030                 if (!best) {
8031                         fprintf(stderr, "Backrefs and extent record evenly "
8032                                 "split on who is right, this is going to "
8033                                 "require user input to fix bytenr %Lu bytes "
8034                                 "%Lu\n", rec->start, rec->nr);
8035                         ret = -EINVAL;
8036                         goto out;
8037                 }
8038         }
8039
8040         /*
8041          * I don't think this can happen currently as we'll abort() if we catch
8042          * this case higher up, but in case somebody removes that we still can't
8043          * deal with it properly here yet, so just bail out of that's the case.
8044          */
8045         if (best->bytenr != rec->start) {
8046                 fprintf(stderr, "Extent start and backref starts don't match, "
8047                         "please use btrfs-image on this file system and send "
8048                         "it to a btrfs developer so they can make fsck fix "
8049                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8050                         rec->start, rec->nr);
8051                 ret = -EINVAL;
8052                 goto out;
8053         }
8054
8055         /*
8056          * Ok great we all agreed on an extent record, let's go find the real
8057          * references and fix up the ones that don't match.
8058          */
8059         list_for_each_entry(back, &rec->backrefs, list) {
8060                 if (back->full_backref || !back->is_data)
8061                         continue;
8062
8063                 dback = to_data_backref(back);
8064
8065                 /*
8066                  * Still ignoring backrefs that don't have a real ref attached
8067                  * to them.
8068                  */
8069                 if (dback->found_ref == 0)
8070                         continue;
8071
8072                 if (dback->bytes == best->bytes &&
8073                     dback->disk_bytenr == best->bytenr)
8074                         continue;
8075
8076                 ret = repair_ref(info, path, dback, best);
8077                 if (ret)
8078                         goto out;
8079         }
8080
8081         /*
8082          * Ok we messed with the actual refs, which means we need to drop our
8083          * entire cache and go back and rescan.  I know this is a huge pain and
8084          * adds a lot of extra work, but it's the only way to be safe.  Once all
8085          * the backrefs agree we may not need to do anything to the extent
8086          * record itself.
8087          */
8088         ret = -EAGAIN;
8089 out:
8090         while (!list_empty(&entries)) {
8091                 entry = list_entry(entries.next, struct extent_entry, list);
8092                 list_del_init(&entry->list);
8093                 free(entry);
8094         }
8095         return ret;
8096 }
8097
8098 static int process_duplicates(struct btrfs_root *root,
8099                               struct cache_tree *extent_cache,
8100                               struct extent_record *rec)
8101 {
8102         struct extent_record *good, *tmp;
8103         struct cache_extent *cache;
8104         int ret;
8105
8106         /*
8107          * If we found a extent record for this extent then return, or if we
8108          * have more than one duplicate we are likely going to need to delete
8109          * something.
8110          */
8111         if (rec->found_rec || rec->num_duplicates > 1)
8112                 return 0;
8113
8114         /* Shouldn't happen but just in case */
8115         BUG_ON(!rec->num_duplicates);
8116
8117         /*
8118          * So this happens if we end up with a backref that doesn't match the
8119          * actual extent entry.  So either the backref is bad or the extent
8120          * entry is bad.  Either way we want to have the extent_record actually
8121          * reflect what we found in the extent_tree, so we need to take the
8122          * duplicate out and use that as the extent_record since the only way we
8123          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8124          */
8125         remove_cache_extent(extent_cache, &rec->cache);
8126
8127         good = to_extent_record(rec->dups.next);
8128         list_del_init(&good->list);
8129         INIT_LIST_HEAD(&good->backrefs);
8130         INIT_LIST_HEAD(&good->dups);
8131         good->cache.start = good->start;
8132         good->cache.size = good->nr;
8133         good->content_checked = 0;
8134         good->owner_ref_checked = 0;
8135         good->num_duplicates = 0;
8136         good->refs = rec->refs;
8137         list_splice_init(&rec->backrefs, &good->backrefs);
8138         while (1) {
8139                 cache = lookup_cache_extent(extent_cache, good->start,
8140                                             good->nr);
8141                 if (!cache)
8142                         break;
8143                 tmp = container_of(cache, struct extent_record, cache);
8144
8145                 /*
8146                  * If we find another overlapping extent and it's found_rec is
8147                  * set then it's a duplicate and we need to try and delete
8148                  * something.
8149                  */
8150                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8151                         if (list_empty(&good->list))
8152                                 list_add_tail(&good->list,
8153                                               &duplicate_extents);
8154                         good->num_duplicates += tmp->num_duplicates + 1;
8155                         list_splice_init(&tmp->dups, &good->dups);
8156                         list_del_init(&tmp->list);
8157                         list_add_tail(&tmp->list, &good->dups);
8158                         remove_cache_extent(extent_cache, &tmp->cache);
8159                         continue;
8160                 }
8161
8162                 /*
8163                  * Ok we have another non extent item backed extent rec, so lets
8164                  * just add it to this extent and carry on like we did above.
8165                  */
8166                 good->refs += tmp->refs;
8167                 list_splice_init(&tmp->backrefs, &good->backrefs);
8168                 remove_cache_extent(extent_cache, &tmp->cache);
8169                 free(tmp);
8170         }
8171         ret = insert_cache_extent(extent_cache, &good->cache);
8172         BUG_ON(ret);
8173         free(rec);
8174         return good->num_duplicates ? 0 : 1;
8175 }
8176
8177 static int delete_duplicate_records(struct btrfs_root *root,
8178                                     struct extent_record *rec)
8179 {
8180         struct btrfs_trans_handle *trans;
8181         LIST_HEAD(delete_list);
8182         struct btrfs_path path;
8183         struct extent_record *tmp, *good, *n;
8184         int nr_del = 0;
8185         int ret = 0, err;
8186         struct btrfs_key key;
8187
8188         btrfs_init_path(&path);
8189
8190         good = rec;
8191         /* Find the record that covers all of the duplicates. */
8192         list_for_each_entry(tmp, &rec->dups, list) {
8193                 if (good->start < tmp->start)
8194                         continue;
8195                 if (good->nr > tmp->nr)
8196                         continue;
8197
8198                 if (tmp->start + tmp->nr < good->start + good->nr) {
8199                         fprintf(stderr, "Ok we have overlapping extents that "
8200                                 "aren't completely covered by each other, this "
8201                                 "is going to require more careful thought.  "
8202                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8203                                 tmp->start, tmp->nr, good->start, good->nr);
8204                         abort();
8205                 }
8206                 good = tmp;
8207         }
8208
8209         if (good != rec)
8210                 list_add_tail(&rec->list, &delete_list);
8211
8212         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8213                 if (tmp == good)
8214                         continue;
8215                 list_move_tail(&tmp->list, &delete_list);
8216         }
8217
8218         root = root->fs_info->extent_root;
8219         trans = btrfs_start_transaction(root, 1);
8220         if (IS_ERR(trans)) {
8221                 ret = PTR_ERR(trans);
8222                 goto out;
8223         }
8224
8225         list_for_each_entry(tmp, &delete_list, list) {
8226                 if (tmp->found_rec == 0)
8227                         continue;
8228                 key.objectid = tmp->start;
8229                 key.type = BTRFS_EXTENT_ITEM_KEY;
8230                 key.offset = tmp->nr;
8231
8232                 /* Shouldn't happen but just in case */
8233                 if (tmp->metadata) {
8234                         fprintf(stderr, "Well this shouldn't happen, extent "
8235                                 "record overlaps but is metadata? "
8236                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8237                         abort();
8238                 }
8239
8240                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8241                 if (ret) {
8242                         if (ret > 0)
8243                                 ret = -EINVAL;
8244                         break;
8245                 }
8246                 ret = btrfs_del_item(trans, root, &path);
8247                 if (ret)
8248                         break;
8249                 btrfs_release_path(&path);
8250                 nr_del++;
8251         }
8252         err = btrfs_commit_transaction(trans, root);
8253         if (err && !ret)
8254                 ret = err;
8255 out:
8256         while (!list_empty(&delete_list)) {
8257                 tmp = to_extent_record(delete_list.next);
8258                 list_del_init(&tmp->list);
8259                 if (tmp == rec)
8260                         continue;
8261                 free(tmp);
8262         }
8263
8264         while (!list_empty(&rec->dups)) {
8265                 tmp = to_extent_record(rec->dups.next);
8266                 list_del_init(&tmp->list);
8267                 free(tmp);
8268         }
8269
8270         btrfs_release_path(&path);
8271
8272         if (!ret && !nr_del)
8273                 rec->num_duplicates = 0;
8274
8275         return ret ? ret : nr_del;
8276 }
8277
8278 static int find_possible_backrefs(struct btrfs_fs_info *info,
8279                                   struct btrfs_path *path,
8280                                   struct cache_tree *extent_cache,
8281                                   struct extent_record *rec)
8282 {
8283         struct btrfs_root *root;
8284         struct extent_backref *back;
8285         struct data_backref *dback;
8286         struct cache_extent *cache;
8287         struct btrfs_file_extent_item *fi;
8288         struct btrfs_key key;
8289         u64 bytenr, bytes;
8290         int ret;
8291
8292         list_for_each_entry(back, &rec->backrefs, list) {
8293                 /* Don't care about full backrefs (poor unloved backrefs) */
8294                 if (back->full_backref || !back->is_data)
8295                         continue;
8296
8297                 dback = to_data_backref(back);
8298
8299                 /* We found this one, we don't need to do a lookup */
8300                 if (dback->found_ref)
8301                         continue;
8302
8303                 key.objectid = dback->root;
8304                 key.type = BTRFS_ROOT_ITEM_KEY;
8305                 key.offset = (u64)-1;
8306
8307                 root = btrfs_read_fs_root(info, &key);
8308
8309                 /* No root, definitely a bad ref, skip */
8310                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8311                         continue;
8312                 /* Other err, exit */
8313                 if (IS_ERR(root))
8314                         return PTR_ERR(root);
8315
8316                 key.objectid = dback->owner;
8317                 key.type = BTRFS_EXTENT_DATA_KEY;
8318                 key.offset = dback->offset;
8319                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8320                 if (ret) {
8321                         btrfs_release_path(path);
8322                         if (ret < 0)
8323                                 return ret;
8324                         /* Didn't find it, we can carry on */
8325                         ret = 0;
8326                         continue;
8327                 }
8328
8329                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8330                                     struct btrfs_file_extent_item);
8331                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8332                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8333                 btrfs_release_path(path);
8334                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8335                 if (cache) {
8336                         struct extent_record *tmp;
8337                         tmp = container_of(cache, struct extent_record, cache);
8338
8339                         /*
8340                          * If we found an extent record for the bytenr for this
8341                          * particular backref then we can't add it to our
8342                          * current extent record.  We only want to add backrefs
8343                          * that don't have a corresponding extent item in the
8344                          * extent tree since they likely belong to this record
8345                          * and we need to fix it if it doesn't match bytenrs.
8346                          */
8347                         if  (tmp->found_rec)
8348                                 continue;
8349                 }
8350
8351                 dback->found_ref += 1;
8352                 dback->disk_bytenr = bytenr;
8353                 dback->bytes = bytes;
8354
8355                 /*
8356                  * Set this so the verify backref code knows not to trust the
8357                  * values in this backref.
8358                  */
8359                 back->broken = 1;
8360         }
8361
8362         return 0;
8363 }
8364
8365 /*
8366  * Record orphan data ref into corresponding root.
8367  *
8368  * Return 0 if the extent item contains data ref and recorded.
8369  * Return 1 if the extent item contains no useful data ref
8370  *   On that case, it may contains only shared_dataref or metadata backref
8371  *   or the file extent exists(this should be handled by the extent bytenr
8372  *   recovery routine)
8373  * Return <0 if something goes wrong.
8374  */
8375 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8376                                       struct extent_record *rec)
8377 {
8378         struct btrfs_key key;
8379         struct btrfs_root *dest_root;
8380         struct extent_backref *back;
8381         struct data_backref *dback;
8382         struct orphan_data_extent *orphan;
8383         struct btrfs_path path;
8384         int recorded_data_ref = 0;
8385         int ret = 0;
8386
8387         if (rec->metadata)
8388                 return 1;
8389         btrfs_init_path(&path);
8390         list_for_each_entry(back, &rec->backrefs, list) {
8391                 if (back->full_backref || !back->is_data ||
8392                     !back->found_extent_tree)
8393                         continue;
8394                 dback = to_data_backref(back);
8395                 if (dback->found_ref)
8396                         continue;
8397                 key.objectid = dback->root;
8398                 key.type = BTRFS_ROOT_ITEM_KEY;
8399                 key.offset = (u64)-1;
8400
8401                 dest_root = btrfs_read_fs_root(fs_info, &key);
8402
8403                 /* For non-exist root we just skip it */
8404                 if (IS_ERR(dest_root) || !dest_root)
8405                         continue;
8406
8407                 key.objectid = dback->owner;
8408                 key.type = BTRFS_EXTENT_DATA_KEY;
8409                 key.offset = dback->offset;
8410
8411                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8412                 btrfs_release_path(&path);
8413                 /*
8414                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8415                  * we need to record it for inode/file extent rebuild.
8416                  * For ret > 0, we record it only for file extent rebuild.
8417                  * For ret == 0, the file extent exists but only bytenr
8418                  * mismatch, let the original bytenr fix routine to handle,
8419                  * don't record it.
8420                  */
8421                 if (ret == 0)
8422                         continue;
8423                 ret = 0;
8424                 orphan = malloc(sizeof(*orphan));
8425                 if (!orphan) {
8426                         ret = -ENOMEM;
8427                         goto out;
8428                 }
8429                 INIT_LIST_HEAD(&orphan->list);
8430                 orphan->root = dback->root;
8431                 orphan->objectid = dback->owner;
8432                 orphan->offset = dback->offset;
8433                 orphan->disk_bytenr = rec->cache.start;
8434                 orphan->disk_len = rec->cache.size;
8435                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8436                 recorded_data_ref = 1;
8437         }
8438 out:
8439         btrfs_release_path(&path);
8440         if (!ret)
8441                 return !recorded_data_ref;
8442         else
8443                 return ret;
8444 }
8445
8446 /*
8447  * when an incorrect extent item is found, this will delete
8448  * all of the existing entries for it and recreate them
8449  * based on what the tree scan found.
8450  */
8451 static int fixup_extent_refs(struct btrfs_fs_info *info,
8452                              struct cache_tree *extent_cache,
8453                              struct extent_record *rec)
8454 {
8455         struct btrfs_trans_handle *trans = NULL;
8456         int ret;
8457         struct btrfs_path path;
8458         struct list_head *cur = rec->backrefs.next;
8459         struct cache_extent *cache;
8460         struct extent_backref *back;
8461         int allocated = 0;
8462         u64 flags = 0;
8463
8464         if (rec->flag_block_full_backref)
8465                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8466
8467         btrfs_init_path(&path);
8468         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8469                 /*
8470                  * Sometimes the backrefs themselves are so broken they don't
8471                  * get attached to any meaningful rec, so first go back and
8472                  * check any of our backrefs that we couldn't find and throw
8473                  * them into the list if we find the backref so that
8474                  * verify_backrefs can figure out what to do.
8475                  */
8476                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8477                 if (ret < 0)
8478                         goto out;
8479         }
8480
8481         /* step one, make sure all of the backrefs agree */
8482         ret = verify_backrefs(info, &path, rec);
8483         if (ret < 0)
8484                 goto out;
8485
8486         trans = btrfs_start_transaction(info->extent_root, 1);
8487         if (IS_ERR(trans)) {
8488                 ret = PTR_ERR(trans);
8489                 goto out;
8490         }
8491
8492         /* step two, delete all the existing records */
8493         ret = delete_extent_records(trans, info->extent_root, &path,
8494                                     rec->start, rec->max_size);
8495
8496         if (ret < 0)
8497                 goto out;
8498
8499         /* was this block corrupt?  If so, don't add references to it */
8500         cache = lookup_cache_extent(info->corrupt_blocks,
8501                                     rec->start, rec->max_size);
8502         if (cache) {
8503                 ret = 0;
8504                 goto out;
8505         }
8506
8507         /* step three, recreate all the refs we did find */
8508         while(cur != &rec->backrefs) {
8509                 back = to_extent_backref(cur);
8510                 cur = cur->next;
8511
8512                 /*
8513                  * if we didn't find any references, don't create a
8514                  * new extent record
8515                  */
8516                 if (!back->found_ref)
8517                         continue;
8518
8519                 rec->bad_full_backref = 0;
8520                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8521                 allocated = 1;
8522
8523                 if (ret)
8524                         goto out;
8525         }
8526 out:
8527         if (trans) {
8528                 int err = btrfs_commit_transaction(trans, info->extent_root);
8529                 if (!ret)
8530                         ret = err;
8531         }
8532
8533         btrfs_release_path(&path);
8534         return ret;
8535 }
8536
8537 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8538                               struct extent_record *rec)
8539 {
8540         struct btrfs_trans_handle *trans;
8541         struct btrfs_root *root = fs_info->extent_root;
8542         struct btrfs_path path;
8543         struct btrfs_extent_item *ei;
8544         struct btrfs_key key;
8545         u64 flags;
8546         int ret = 0;
8547
8548         key.objectid = rec->start;
8549         if (rec->metadata) {
8550                 key.type = BTRFS_METADATA_ITEM_KEY;
8551                 key.offset = rec->info_level;
8552         } else {
8553                 key.type = BTRFS_EXTENT_ITEM_KEY;
8554                 key.offset = rec->max_size;
8555         }
8556
8557         trans = btrfs_start_transaction(root, 0);
8558         if (IS_ERR(trans))
8559                 return PTR_ERR(trans);
8560
8561         btrfs_init_path(&path);
8562         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8563         if (ret < 0) {
8564                 btrfs_release_path(&path);
8565                 btrfs_commit_transaction(trans, root);
8566                 return ret;
8567         } else if (ret) {
8568                 fprintf(stderr, "Didn't find extent for %llu\n",
8569                         (unsigned long long)rec->start);
8570                 btrfs_release_path(&path);
8571                 btrfs_commit_transaction(trans, root);
8572                 return -ENOENT;
8573         }
8574
8575         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8576                             struct btrfs_extent_item);
8577         flags = btrfs_extent_flags(path.nodes[0], ei);
8578         if (rec->flag_block_full_backref) {
8579                 fprintf(stderr, "setting full backref on %llu\n",
8580                         (unsigned long long)key.objectid);
8581                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8582         } else {
8583                 fprintf(stderr, "clearing full backref on %llu\n",
8584                         (unsigned long long)key.objectid);
8585                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8586         }
8587         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8588         btrfs_mark_buffer_dirty(path.nodes[0]);
8589         btrfs_release_path(&path);
8590         return btrfs_commit_transaction(trans, root);
8591 }
8592
8593 /* right now we only prune from the extent allocation tree */
8594 static int prune_one_block(struct btrfs_trans_handle *trans,
8595                            struct btrfs_fs_info *info,
8596                            struct btrfs_corrupt_block *corrupt)
8597 {
8598         int ret;
8599         struct btrfs_path path;
8600         struct extent_buffer *eb;
8601         u64 found;
8602         int slot;
8603         int nritems;
8604         int level = corrupt->level + 1;
8605
8606         btrfs_init_path(&path);
8607 again:
8608         /* we want to stop at the parent to our busted block */
8609         path.lowest_level = level;
8610
8611         ret = btrfs_search_slot(trans, info->extent_root,
8612                                 &corrupt->key, &path, -1, 1);
8613
8614         if (ret < 0)
8615                 goto out;
8616
8617         eb = path.nodes[level];
8618         if (!eb) {
8619                 ret = -ENOENT;
8620                 goto out;
8621         }
8622
8623         /*
8624          * hopefully the search gave us the block we want to prune,
8625          * lets try that first
8626          */
8627         slot = path.slots[level];
8628         found =  btrfs_node_blockptr(eb, slot);
8629         if (found == corrupt->cache.start)
8630                 goto del_ptr;
8631
8632         nritems = btrfs_header_nritems(eb);
8633
8634         /* the search failed, lets scan this node and hope we find it */
8635         for (slot = 0; slot < nritems; slot++) {
8636                 found =  btrfs_node_blockptr(eb, slot);
8637                 if (found == corrupt->cache.start)
8638                         goto del_ptr;
8639         }
8640         /*
8641          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8642          * to this block
8643          */
8644         if (eb == info->extent_root->node) {
8645                 ret = -ENOENT;
8646                 goto out;
8647         } else {
8648                 level++;
8649                 btrfs_release_path(&path);
8650                 goto again;
8651         }
8652
8653 del_ptr:
8654         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8655         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8656
8657 out:
8658         btrfs_release_path(&path);
8659         return ret;
8660 }
8661
8662 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8663 {
8664         struct btrfs_trans_handle *trans = NULL;
8665         struct cache_extent *cache;
8666         struct btrfs_corrupt_block *corrupt;
8667
8668         while (1) {
8669                 cache = search_cache_extent(info->corrupt_blocks, 0);
8670                 if (!cache)
8671                         break;
8672                 if (!trans) {
8673                         trans = btrfs_start_transaction(info->extent_root, 1);
8674                         if (IS_ERR(trans))
8675                                 return PTR_ERR(trans);
8676                 }
8677                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8678                 prune_one_block(trans, info, corrupt);
8679                 remove_cache_extent(info->corrupt_blocks, cache);
8680         }
8681         if (trans)
8682                 return btrfs_commit_transaction(trans, info->extent_root);
8683         return 0;
8684 }
8685
8686 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8687 {
8688         struct btrfs_block_group_cache *cache;
8689         u64 start, end;
8690         int ret;
8691
8692         while (1) {
8693                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8694                                             &start, &end, EXTENT_DIRTY);
8695                 if (ret)
8696                         break;
8697                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8698                                    GFP_NOFS);
8699         }
8700
8701         start = 0;
8702         while (1) {
8703                 cache = btrfs_lookup_first_block_group(fs_info, start);
8704                 if (!cache)
8705                         break;
8706                 if (cache->cached)
8707                         cache->cached = 0;
8708                 start = cache->key.objectid + cache->key.offset;
8709         }
8710 }
8711
8712 static int check_extent_refs(struct btrfs_root *root,
8713                              struct cache_tree *extent_cache)
8714 {
8715         struct extent_record *rec;
8716         struct cache_extent *cache;
8717         int err = 0;
8718         int ret = 0;
8719         int fixed = 0;
8720         int had_dups = 0;
8721         int recorded = 0;
8722
8723         if (repair) {
8724                 /*
8725                  * if we're doing a repair, we have to make sure
8726                  * we don't allocate from the problem extents.
8727                  * In the worst case, this will be all the
8728                  * extents in the FS
8729                  */
8730                 cache = search_cache_extent(extent_cache, 0);
8731                 while(cache) {
8732                         rec = container_of(cache, struct extent_record, cache);
8733                         set_extent_dirty(root->fs_info->excluded_extents,
8734                                          rec->start,
8735                                          rec->start + rec->max_size - 1,
8736                                          GFP_NOFS);
8737                         cache = next_cache_extent(cache);
8738                 }
8739
8740                 /* pin down all the corrupted blocks too */
8741                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8742                 while(cache) {
8743                         set_extent_dirty(root->fs_info->excluded_extents,
8744                                          cache->start,
8745                                          cache->start + cache->size - 1,
8746                                          GFP_NOFS);
8747                         cache = next_cache_extent(cache);
8748                 }
8749                 prune_corrupt_blocks(root->fs_info);
8750                 reset_cached_block_groups(root->fs_info);
8751         }
8752
8753         reset_cached_block_groups(root->fs_info);
8754
8755         /*
8756          * We need to delete any duplicate entries we find first otherwise we
8757          * could mess up the extent tree when we have backrefs that actually
8758          * belong to a different extent item and not the weird duplicate one.
8759          */
8760         while (repair && !list_empty(&duplicate_extents)) {
8761                 rec = to_extent_record(duplicate_extents.next);
8762                 list_del_init(&rec->list);
8763
8764                 /* Sometimes we can find a backref before we find an actual
8765                  * extent, so we need to process it a little bit to see if there
8766                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8767                  * if this is a backref screwup.  If we need to delete stuff
8768                  * process_duplicates() will return 0, otherwise it will return
8769                  * 1 and we
8770                  */
8771                 if (process_duplicates(root, extent_cache, rec))
8772                         continue;
8773                 ret = delete_duplicate_records(root, rec);
8774                 if (ret < 0)
8775                         return ret;
8776                 /*
8777                  * delete_duplicate_records will return the number of entries
8778                  * deleted, so if it's greater than 0 then we know we actually
8779                  * did something and we need to remove.
8780                  */
8781                 if (ret)
8782                         had_dups = 1;
8783         }
8784
8785         if (had_dups)
8786                 return -EAGAIN;
8787
8788         while(1) {
8789                 int cur_err = 0;
8790
8791                 fixed = 0;
8792                 recorded = 0;
8793                 cache = search_cache_extent(extent_cache, 0);
8794                 if (!cache)
8795                         break;
8796                 rec = container_of(cache, struct extent_record, cache);
8797                 if (rec->num_duplicates) {
8798                         fprintf(stderr, "extent item %llu has multiple extent "
8799                                 "items\n", (unsigned long long)rec->start);
8800                         err = 1;
8801                         cur_err = 1;
8802                 }
8803
8804                 if (rec->refs != rec->extent_item_refs) {
8805                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8806                                 (unsigned long long)rec->start,
8807                                 (unsigned long long)rec->nr);
8808                         fprintf(stderr, "extent item %llu, found %llu\n",
8809                                 (unsigned long long)rec->extent_item_refs,
8810                                 (unsigned long long)rec->refs);
8811                         ret = record_orphan_data_extents(root->fs_info, rec);
8812                         if (ret < 0)
8813                                 goto repair_abort;
8814                         if (ret == 0) {
8815                                 recorded = 1;
8816                         } else {
8817                                 /*
8818                                  * we can't use the extent to repair file
8819                                  * extent, let the fallback method handle it.
8820                                  */
8821                                 if (!fixed && repair) {
8822                                         ret = fixup_extent_refs(
8823                                                         root->fs_info,
8824                                                         extent_cache, rec);
8825                                         if (ret)
8826                                                 goto repair_abort;
8827                                         fixed = 1;
8828                                 }
8829                         }
8830                         err = 1;
8831                         cur_err = 1;
8832                 }
8833                 if (all_backpointers_checked(rec, 1)) {
8834                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8835                                 (unsigned long long)rec->start,
8836                                 (unsigned long long)rec->nr);
8837
8838                         if (!fixed && !recorded && repair) {
8839                                 ret = fixup_extent_refs(root->fs_info,
8840                                                         extent_cache, rec);
8841                                 if (ret)
8842                                         goto repair_abort;
8843                                 fixed = 1;
8844                         }
8845                         cur_err = 1;
8846                         err = 1;
8847                 }
8848                 if (!rec->owner_ref_checked) {
8849                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8850                                 (unsigned long long)rec->start,
8851                                 (unsigned long long)rec->nr);
8852                         if (!fixed && !recorded && repair) {
8853                                 ret = fixup_extent_refs(root->fs_info,
8854                                                         extent_cache, rec);
8855                                 if (ret)
8856                                         goto repair_abort;
8857                                 fixed = 1;
8858                         }
8859                         err = 1;
8860                         cur_err = 1;
8861                 }
8862                 if (rec->bad_full_backref) {
8863                         fprintf(stderr, "bad full backref, on [%llu]\n",
8864                                 (unsigned long long)rec->start);
8865                         if (repair) {
8866                                 ret = fixup_extent_flags(root->fs_info, rec);
8867                                 if (ret)
8868                                         goto repair_abort;
8869                                 fixed = 1;
8870                         }
8871                         err = 1;
8872                         cur_err = 1;
8873                 }
8874                 /*
8875                  * Although it's not a extent ref's problem, we reuse this
8876                  * routine for error reporting.
8877                  * No repair function yet.
8878                  */
8879                 if (rec->crossing_stripes) {
8880                         fprintf(stderr,
8881                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8882                                 rec->start, rec->start + rec->max_size);
8883                         err = 1;
8884                         cur_err = 1;
8885                 }
8886
8887                 if (rec->wrong_chunk_type) {
8888                         fprintf(stderr,
8889                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8890                                 rec->start, rec->start + rec->max_size);
8891                         err = 1;
8892                         cur_err = 1;
8893                 }
8894
8895                 remove_cache_extent(extent_cache, cache);
8896                 free_all_extent_backrefs(rec);
8897                 if (!init_extent_tree && repair && (!cur_err || fixed))
8898                         clear_extent_dirty(root->fs_info->excluded_extents,
8899                                            rec->start,
8900                                            rec->start + rec->max_size - 1,
8901                                            GFP_NOFS);
8902                 free(rec);
8903         }
8904 repair_abort:
8905         if (repair) {
8906                 if (ret && ret != -EAGAIN) {
8907                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8908                         exit(1);
8909                 } else if (!ret) {
8910                         struct btrfs_trans_handle *trans;
8911
8912                         root = root->fs_info->extent_root;
8913                         trans = btrfs_start_transaction(root, 1);
8914                         if (IS_ERR(trans)) {
8915                                 ret = PTR_ERR(trans);
8916                                 goto repair_abort;
8917                         }
8918
8919                         btrfs_fix_block_accounting(trans, root);
8920                         ret = btrfs_commit_transaction(trans, root);
8921                         if (ret)
8922                                 goto repair_abort;
8923                 }
8924                 if (err)
8925                         fprintf(stderr, "repaired damaged extent references\n");
8926                 return ret;
8927         }
8928         return err;
8929 }
8930
8931 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8932 {
8933         u64 stripe_size;
8934
8935         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8936                 stripe_size = length;
8937                 stripe_size /= num_stripes;
8938         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8939                 stripe_size = length * 2;
8940                 stripe_size /= num_stripes;
8941         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8942                 stripe_size = length;
8943                 stripe_size /= (num_stripes - 1);
8944         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8945                 stripe_size = length;
8946                 stripe_size /= (num_stripes - 2);
8947         } else {
8948                 stripe_size = length;
8949         }
8950         return stripe_size;
8951 }
8952
8953 /*
8954  * Check the chunk with its block group/dev list ref:
8955  * Return 0 if all refs seems valid.
8956  * Return 1 if part of refs seems valid, need later check for rebuild ref
8957  * like missing block group and needs to search extent tree to rebuild them.
8958  * Return -1 if essential refs are missing and unable to rebuild.
8959  */
8960 static int check_chunk_refs(struct chunk_record *chunk_rec,
8961                             struct block_group_tree *block_group_cache,
8962                             struct device_extent_tree *dev_extent_cache,
8963                             int silent)
8964 {
8965         struct cache_extent *block_group_item;
8966         struct block_group_record *block_group_rec;
8967         struct cache_extent *dev_extent_item;
8968         struct device_extent_record *dev_extent_rec;
8969         u64 devid;
8970         u64 offset;
8971         u64 length;
8972         int metadump_v2 = 0;
8973         int i;
8974         int ret = 0;
8975
8976         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8977                                                chunk_rec->offset,
8978                                                chunk_rec->length);
8979         if (block_group_item) {
8980                 block_group_rec = container_of(block_group_item,
8981                                                struct block_group_record,
8982                                                cache);
8983                 if (chunk_rec->length != block_group_rec->offset ||
8984                     chunk_rec->offset != block_group_rec->objectid ||
8985                     (!metadump_v2 &&
8986                      chunk_rec->type_flags != block_group_rec->flags)) {
8987                         if (!silent)
8988                                 fprintf(stderr,
8989                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8990                                         chunk_rec->objectid,
8991                                         chunk_rec->type,
8992                                         chunk_rec->offset,
8993                                         chunk_rec->length,
8994                                         chunk_rec->offset,
8995                                         chunk_rec->type_flags,
8996                                         block_group_rec->objectid,
8997                                         block_group_rec->type,
8998                                         block_group_rec->offset,
8999                                         block_group_rec->offset,
9000                                         block_group_rec->objectid,
9001                                         block_group_rec->flags);
9002                         ret = -1;
9003                 } else {
9004                         list_del_init(&block_group_rec->list);
9005                         chunk_rec->bg_rec = block_group_rec;
9006                 }
9007         } else {
9008                 if (!silent)
9009                         fprintf(stderr,
9010                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9011                                 chunk_rec->objectid,
9012                                 chunk_rec->type,
9013                                 chunk_rec->offset,
9014                                 chunk_rec->length,
9015                                 chunk_rec->offset,
9016                                 chunk_rec->type_flags);
9017                 ret = 1;
9018         }
9019
9020         if (metadump_v2)
9021                 return ret;
9022
9023         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9024                                     chunk_rec->num_stripes);
9025         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9026                 devid = chunk_rec->stripes[i].devid;
9027                 offset = chunk_rec->stripes[i].offset;
9028                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9029                                                        devid, offset, length);
9030                 if (dev_extent_item) {
9031                         dev_extent_rec = container_of(dev_extent_item,
9032                                                 struct device_extent_record,
9033                                                 cache);
9034                         if (dev_extent_rec->objectid != devid ||
9035                             dev_extent_rec->offset != offset ||
9036                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9037                             dev_extent_rec->length != length) {
9038                                 if (!silent)
9039                                         fprintf(stderr,
9040                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9041                                                 chunk_rec->objectid,
9042                                                 chunk_rec->type,
9043                                                 chunk_rec->offset,
9044                                                 chunk_rec->stripes[i].devid,
9045                                                 chunk_rec->stripes[i].offset,
9046                                                 dev_extent_rec->objectid,
9047                                                 dev_extent_rec->offset,
9048                                                 dev_extent_rec->length);
9049                                 ret = -1;
9050                         } else {
9051                                 list_move(&dev_extent_rec->chunk_list,
9052                                           &chunk_rec->dextents);
9053                         }
9054                 } else {
9055                         if (!silent)
9056                                 fprintf(stderr,
9057                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9058                                         chunk_rec->objectid,
9059                                         chunk_rec->type,
9060                                         chunk_rec->offset,
9061                                         chunk_rec->stripes[i].devid,
9062                                         chunk_rec->stripes[i].offset);
9063                         ret = -1;
9064                 }
9065         }
9066         return ret;
9067 }
9068
9069 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9070 int check_chunks(struct cache_tree *chunk_cache,
9071                  struct block_group_tree *block_group_cache,
9072                  struct device_extent_tree *dev_extent_cache,
9073                  struct list_head *good, struct list_head *bad,
9074                  struct list_head *rebuild, int silent)
9075 {
9076         struct cache_extent *chunk_item;
9077         struct chunk_record *chunk_rec;
9078         struct block_group_record *bg_rec;
9079         struct device_extent_record *dext_rec;
9080         int err;
9081         int ret = 0;
9082
9083         chunk_item = first_cache_extent(chunk_cache);
9084         while (chunk_item) {
9085                 chunk_rec = container_of(chunk_item, struct chunk_record,
9086                                          cache);
9087                 err = check_chunk_refs(chunk_rec, block_group_cache,
9088                                        dev_extent_cache, silent);
9089                 if (err < 0)
9090                         ret = err;
9091                 if (err == 0 && good)
9092                         list_add_tail(&chunk_rec->list, good);
9093                 if (err > 0 && rebuild)
9094                         list_add_tail(&chunk_rec->list, rebuild);
9095                 if (err < 0 && bad)
9096                         list_add_tail(&chunk_rec->list, bad);
9097                 chunk_item = next_cache_extent(chunk_item);
9098         }
9099
9100         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9101                 if (!silent)
9102                         fprintf(stderr,
9103                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9104                                 bg_rec->objectid,
9105                                 bg_rec->offset,
9106                                 bg_rec->flags);
9107                 if (!ret)
9108                         ret = 1;
9109         }
9110
9111         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9112                             chunk_list) {
9113                 if (!silent)
9114                         fprintf(stderr,
9115                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9116                                 dext_rec->objectid,
9117                                 dext_rec->offset,
9118                                 dext_rec->length);
9119                 if (!ret)
9120                         ret = 1;
9121         }
9122         return ret;
9123 }
9124
9125
9126 static int check_device_used(struct device_record *dev_rec,
9127                              struct device_extent_tree *dext_cache)
9128 {
9129         struct cache_extent *cache;
9130         struct device_extent_record *dev_extent_rec;
9131         u64 total_byte = 0;
9132
9133         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9134         while (cache) {
9135                 dev_extent_rec = container_of(cache,
9136                                               struct device_extent_record,
9137                                               cache);
9138                 if (dev_extent_rec->objectid != dev_rec->devid)
9139                         break;
9140
9141                 list_del_init(&dev_extent_rec->device_list);
9142                 total_byte += dev_extent_rec->length;
9143                 cache = next_cache_extent(cache);
9144         }
9145
9146         if (total_byte != dev_rec->byte_used) {
9147                 fprintf(stderr,
9148                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9149                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9150                         dev_rec->type, dev_rec->offset);
9151                 return -1;
9152         } else {
9153                 return 0;
9154         }
9155 }
9156
9157 /* check btrfs_dev_item -> btrfs_dev_extent */
9158 static int check_devices(struct rb_root *dev_cache,
9159                          struct device_extent_tree *dev_extent_cache)
9160 {
9161         struct rb_node *dev_node;
9162         struct device_record *dev_rec;
9163         struct device_extent_record *dext_rec;
9164         int err;
9165         int ret = 0;
9166
9167         dev_node = rb_first(dev_cache);
9168         while (dev_node) {
9169                 dev_rec = container_of(dev_node, struct device_record, node);
9170                 err = check_device_used(dev_rec, dev_extent_cache);
9171                 if (err)
9172                         ret = err;
9173
9174                 dev_node = rb_next(dev_node);
9175         }
9176         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9177                             device_list) {
9178                 fprintf(stderr,
9179                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9180                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9181                 if (!ret)
9182                         ret = 1;
9183         }
9184         return ret;
9185 }
9186
9187 static int add_root_item_to_list(struct list_head *head,
9188                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9189                                   u8 level, u8 drop_level,
9190                                   int level_size, struct btrfs_key *drop_key)
9191 {
9192
9193         struct root_item_record *ri_rec;
9194         ri_rec = malloc(sizeof(*ri_rec));
9195         if (!ri_rec)
9196                 return -ENOMEM;
9197         ri_rec->bytenr = bytenr;
9198         ri_rec->objectid = objectid;
9199         ri_rec->level = level;
9200         ri_rec->level_size = level_size;
9201         ri_rec->drop_level = drop_level;
9202         ri_rec->last_snapshot = last_snapshot;
9203         if (drop_key)
9204                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9205         list_add_tail(&ri_rec->list, head);
9206
9207         return 0;
9208 }
9209
9210 static void free_root_item_list(struct list_head *list)
9211 {
9212         struct root_item_record *ri_rec;
9213
9214         while (!list_empty(list)) {
9215                 ri_rec = list_first_entry(list, struct root_item_record,
9216                                           list);
9217                 list_del_init(&ri_rec->list);
9218                 free(ri_rec);
9219         }
9220 }
9221
9222 static int deal_root_from_list(struct list_head *list,
9223                                struct btrfs_root *root,
9224                                struct block_info *bits,
9225                                int bits_nr,
9226                                struct cache_tree *pending,
9227                                struct cache_tree *seen,
9228                                struct cache_tree *reada,
9229                                struct cache_tree *nodes,
9230                                struct cache_tree *extent_cache,
9231                                struct cache_tree *chunk_cache,
9232                                struct rb_root *dev_cache,
9233                                struct block_group_tree *block_group_cache,
9234                                struct device_extent_tree *dev_extent_cache)
9235 {
9236         int ret = 0;
9237         u64 last;
9238
9239         while (!list_empty(list)) {
9240                 struct root_item_record *rec;
9241                 struct extent_buffer *buf;
9242                 rec = list_entry(list->next,
9243                                  struct root_item_record, list);
9244                 last = 0;
9245                 buf = read_tree_block(root->fs_info->tree_root,
9246                                       rec->bytenr, rec->level_size, 0);
9247                 if (!extent_buffer_uptodate(buf)) {
9248                         free_extent_buffer(buf);
9249                         ret = -EIO;
9250                         break;
9251                 }
9252                 ret = add_root_to_pending(buf, extent_cache, pending,
9253                                     seen, nodes, rec->objectid);
9254                 if (ret < 0)
9255                         break;
9256                 /*
9257                  * To rebuild extent tree, we need deal with snapshot
9258                  * one by one, otherwise we deal with node firstly which
9259                  * can maximize readahead.
9260                  */
9261                 while (1) {
9262                         ret = run_next_block(root, bits, bits_nr, &last,
9263                                              pending, seen, reada, nodes,
9264                                              extent_cache, chunk_cache,
9265                                              dev_cache, block_group_cache,
9266                                              dev_extent_cache, rec);
9267                         if (ret != 0)
9268                                 break;
9269                 }
9270                 free_extent_buffer(buf);
9271                 list_del(&rec->list);
9272                 free(rec);
9273                 if (ret < 0)
9274                         break;
9275         }
9276         while (ret >= 0) {
9277                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9278                                      reada, nodes, extent_cache, chunk_cache,
9279                                      dev_cache, block_group_cache,
9280                                      dev_extent_cache, NULL);
9281                 if (ret != 0) {
9282                         if (ret > 0)
9283                                 ret = 0;
9284                         break;
9285                 }
9286         }
9287         return ret;
9288 }
9289
9290 static int check_chunks_and_extents(struct btrfs_root *root)
9291 {
9292         struct rb_root dev_cache;
9293         struct cache_tree chunk_cache;
9294         struct block_group_tree block_group_cache;
9295         struct device_extent_tree dev_extent_cache;
9296         struct cache_tree extent_cache;
9297         struct cache_tree seen;
9298         struct cache_tree pending;
9299         struct cache_tree reada;
9300         struct cache_tree nodes;
9301         struct extent_io_tree excluded_extents;
9302         struct cache_tree corrupt_blocks;
9303         struct btrfs_path path;
9304         struct btrfs_key key;
9305         struct btrfs_key found_key;
9306         int ret, err = 0;
9307         struct block_info *bits;
9308         int bits_nr;
9309         struct extent_buffer *leaf;
9310         int slot;
9311         struct btrfs_root_item ri;
9312         struct list_head dropping_trees;
9313         struct list_head normal_trees;
9314         struct btrfs_root *root1;
9315         u64 objectid;
9316         u32 level_size;
9317         u8 level;
9318
9319         dev_cache = RB_ROOT;
9320         cache_tree_init(&chunk_cache);
9321         block_group_tree_init(&block_group_cache);
9322         device_extent_tree_init(&dev_extent_cache);
9323
9324         cache_tree_init(&extent_cache);
9325         cache_tree_init(&seen);
9326         cache_tree_init(&pending);
9327         cache_tree_init(&nodes);
9328         cache_tree_init(&reada);
9329         cache_tree_init(&corrupt_blocks);
9330         extent_io_tree_init(&excluded_extents);
9331         INIT_LIST_HEAD(&dropping_trees);
9332         INIT_LIST_HEAD(&normal_trees);
9333
9334         if (repair) {
9335                 root->fs_info->excluded_extents = &excluded_extents;
9336                 root->fs_info->fsck_extent_cache = &extent_cache;
9337                 root->fs_info->free_extent_hook = free_extent_hook;
9338                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9339         }
9340
9341         bits_nr = 1024;
9342         bits = malloc(bits_nr * sizeof(struct block_info));
9343         if (!bits) {
9344                 perror("malloc");
9345                 exit(1);
9346         }
9347
9348         if (ctx.progress_enabled) {
9349                 ctx.tp = TASK_EXTENTS;
9350                 task_start(ctx.info);
9351         }
9352
9353 again:
9354         root1 = root->fs_info->tree_root;
9355         level = btrfs_header_level(root1->node);
9356         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9357                                     root1->node->start, 0, level, 0,
9358                                     root1->nodesize, NULL);
9359         if (ret < 0)
9360                 goto out;
9361         root1 = root->fs_info->chunk_root;
9362         level = btrfs_header_level(root1->node);
9363         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9364                                     root1->node->start, 0, level, 0,
9365                                     root1->nodesize, NULL);
9366         if (ret < 0)
9367                 goto out;
9368         btrfs_init_path(&path);
9369         key.offset = 0;
9370         key.objectid = 0;
9371         key.type = BTRFS_ROOT_ITEM_KEY;
9372         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9373                                         &key, &path, 0, 0);
9374         if (ret < 0)
9375                 goto out;
9376         while(1) {
9377                 leaf = path.nodes[0];
9378                 slot = path.slots[0];
9379                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9380                         ret = btrfs_next_leaf(root, &path);
9381                         if (ret != 0)
9382                                 break;
9383                         leaf = path.nodes[0];
9384                         slot = path.slots[0];
9385                 }
9386                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9387                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9388                         unsigned long offset;
9389                         u64 last_snapshot;
9390
9391                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9392                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9393                         last_snapshot = btrfs_root_last_snapshot(&ri);
9394                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9395                                 level = btrfs_root_level(&ri);
9396                                 level_size = root->nodesize;
9397                                 ret = add_root_item_to_list(&normal_trees,
9398                                                 found_key.objectid,
9399                                                 btrfs_root_bytenr(&ri),
9400                                                 last_snapshot, level,
9401                                                 0, level_size, NULL);
9402                                 if (ret < 0)
9403                                         goto out;
9404                         } else {
9405                                 level = btrfs_root_level(&ri);
9406                                 level_size = root->nodesize;
9407                                 objectid = found_key.objectid;
9408                                 btrfs_disk_key_to_cpu(&found_key,
9409                                                       &ri.drop_progress);
9410                                 ret = add_root_item_to_list(&dropping_trees,
9411                                                 objectid,
9412                                                 btrfs_root_bytenr(&ri),
9413                                                 last_snapshot, level,
9414                                                 ri.drop_level,
9415                                                 level_size, &found_key);
9416                                 if (ret < 0)
9417                                         goto out;
9418                         }
9419                 }
9420                 path.slots[0]++;
9421         }
9422         btrfs_release_path(&path);
9423
9424         /*
9425          * check_block can return -EAGAIN if it fixes something, please keep
9426          * this in mind when dealing with return values from these functions, if
9427          * we get -EAGAIN we want to fall through and restart the loop.
9428          */
9429         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9430                                   &seen, &reada, &nodes, &extent_cache,
9431                                   &chunk_cache, &dev_cache, &block_group_cache,
9432                                   &dev_extent_cache);
9433         if (ret < 0) {
9434                 if (ret == -EAGAIN)
9435                         goto loop;
9436                 goto out;
9437         }
9438         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9439                                   &pending, &seen, &reada, &nodes,
9440                                   &extent_cache, &chunk_cache, &dev_cache,
9441                                   &block_group_cache, &dev_extent_cache);
9442         if (ret < 0) {
9443                 if (ret == -EAGAIN)
9444                         goto loop;
9445                 goto out;
9446         }
9447
9448         ret = check_chunks(&chunk_cache, &block_group_cache,
9449                            &dev_extent_cache, NULL, NULL, NULL, 0);
9450         if (ret) {
9451                 if (ret == -EAGAIN)
9452                         goto loop;
9453                 err = ret;
9454         }
9455
9456         ret = check_extent_refs(root, &extent_cache);
9457         if (ret < 0) {
9458                 if (ret == -EAGAIN)
9459                         goto loop;
9460                 goto out;
9461         }
9462
9463         ret = check_devices(&dev_cache, &dev_extent_cache);
9464         if (ret && err)
9465                 ret = err;
9466
9467 out:
9468         task_stop(ctx.info);
9469         if (repair) {
9470                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9471                 extent_io_tree_cleanup(&excluded_extents);
9472                 root->fs_info->fsck_extent_cache = NULL;
9473                 root->fs_info->free_extent_hook = NULL;
9474                 root->fs_info->corrupt_blocks = NULL;
9475                 root->fs_info->excluded_extents = NULL;
9476         }
9477         free(bits);
9478         free_chunk_cache_tree(&chunk_cache);
9479         free_device_cache_tree(&dev_cache);
9480         free_block_group_tree(&block_group_cache);
9481         free_device_extent_tree(&dev_extent_cache);
9482         free_extent_cache_tree(&seen);
9483         free_extent_cache_tree(&pending);
9484         free_extent_cache_tree(&reada);
9485         free_extent_cache_tree(&nodes);
9486         return ret;
9487 loop:
9488         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9489         free_extent_cache_tree(&seen);
9490         free_extent_cache_tree(&pending);
9491         free_extent_cache_tree(&reada);
9492         free_extent_cache_tree(&nodes);
9493         free_chunk_cache_tree(&chunk_cache);
9494         free_block_group_tree(&block_group_cache);
9495         free_device_cache_tree(&dev_cache);
9496         free_device_extent_tree(&dev_extent_cache);
9497         free_extent_record_cache(root->fs_info, &extent_cache);
9498         free_root_item_list(&normal_trees);
9499         free_root_item_list(&dropping_trees);
9500         extent_io_tree_cleanup(&excluded_extents);
9501         goto again;
9502 }
9503
9504 /*
9505  * Check backrefs of a tree block given by @bytenr or @eb.
9506  *
9507  * @root:       the root containing the @bytenr or @eb
9508  * @eb:         tree block extent buffer, can be NULL
9509  * @bytenr:     bytenr of the tree block to search
9510  * @level:      tree level of the tree block
9511  * @owner:      owner of the tree block
9512  *
9513  * Return >0 for any error found and output error message
9514  * Return 0 for no error found
9515  */
9516 static int check_tree_block_ref(struct btrfs_root *root,
9517                                 struct extent_buffer *eb, u64 bytenr,
9518                                 int level, u64 owner)
9519 {
9520         struct btrfs_key key;
9521         struct btrfs_root *extent_root = root->fs_info->extent_root;
9522         struct btrfs_path path;
9523         struct btrfs_extent_item *ei;
9524         struct btrfs_extent_inline_ref *iref;
9525         struct extent_buffer *leaf;
9526         unsigned long end;
9527         unsigned long ptr;
9528         int slot;
9529         int skinny_level;
9530         int type;
9531         u32 nodesize = root->nodesize;
9532         u32 item_size;
9533         u64 offset;
9534         int found_ref = 0;
9535         int err = 0;
9536         int ret;
9537
9538         btrfs_init_path(&path);
9539         key.objectid = bytenr;
9540         if (btrfs_fs_incompat(root->fs_info,
9541                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9542                 key.type = BTRFS_METADATA_ITEM_KEY;
9543         else
9544                 key.type = BTRFS_EXTENT_ITEM_KEY;
9545         key.offset = (u64)-1;
9546
9547         /* Search for the backref in extent tree */
9548         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9549         if (ret < 0) {
9550                 err |= BACKREF_MISSING;
9551                 goto out;
9552         }
9553         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9554         if (ret) {
9555                 err |= BACKREF_MISSING;
9556                 goto out;
9557         }
9558
9559         leaf = path.nodes[0];
9560         slot = path.slots[0];
9561         btrfs_item_key_to_cpu(leaf, &key, slot);
9562
9563         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9564
9565         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9566                 skinny_level = (int)key.offset;
9567                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9568         } else {
9569                 struct btrfs_tree_block_info *info;
9570
9571                 info = (struct btrfs_tree_block_info *)(ei + 1);
9572                 skinny_level = btrfs_tree_block_level(leaf, info);
9573                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9574         }
9575
9576         if (eb) {
9577                 u64 header_gen;
9578                 u64 extent_gen;
9579
9580                 if (!(btrfs_extent_flags(leaf, ei) &
9581                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9582                         error(
9583                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9584                                 key.objectid, nodesize,
9585                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9586                         err = BACKREF_MISMATCH;
9587                 }
9588                 header_gen = btrfs_header_generation(eb);
9589                 extent_gen = btrfs_extent_generation(leaf, ei);
9590                 if (header_gen != extent_gen) {
9591                         error(
9592         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9593                                 key.objectid, nodesize, header_gen,
9594                                 extent_gen);
9595                         err = BACKREF_MISMATCH;
9596                 }
9597                 if (level != skinny_level) {
9598                         error(
9599                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9600                                 key.objectid, nodesize, level, skinny_level);
9601                         err = BACKREF_MISMATCH;
9602                 }
9603                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9604                         error(
9605                         "extent[%llu %u] is referred by other roots than %llu",
9606                                 key.objectid, nodesize, root->objectid);
9607                         err = BACKREF_MISMATCH;
9608                 }
9609         }
9610
9611         /*
9612          * Iterate the extent/metadata item to find the exact backref
9613          */
9614         item_size = btrfs_item_size_nr(leaf, slot);
9615         ptr = (unsigned long)iref;
9616         end = (unsigned long)ei + item_size;
9617         while (ptr < end) {
9618                 iref = (struct btrfs_extent_inline_ref *)ptr;
9619                 type = btrfs_extent_inline_ref_type(leaf, iref);
9620                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9621
9622                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9623                         (offset == root->objectid || offset == owner)) {
9624                         found_ref = 1;
9625                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9626                         /* Check if the backref points to valid referencer */
9627                         found_ref = !check_tree_block_ref(root, NULL, offset,
9628                                                           level + 1, owner);
9629                 }
9630
9631                 if (found_ref)
9632                         break;
9633                 ptr += btrfs_extent_inline_ref_size(type);
9634         }
9635
9636         /*
9637          * Inlined extent item doesn't have what we need, check
9638          * TREE_BLOCK_REF_KEY
9639          */
9640         if (!found_ref) {
9641                 btrfs_release_path(&path);
9642                 key.objectid = bytenr;
9643                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9644                 key.offset = root->objectid;
9645
9646                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9647                 if (!ret)
9648                         found_ref = 1;
9649         }
9650         if (!found_ref)
9651                 err |= BACKREF_MISSING;
9652 out:
9653         btrfs_release_path(&path);
9654         if (eb && (err & BACKREF_MISSING))
9655                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9656                         bytenr, nodesize, owner, level);
9657         return err;
9658 }
9659
9660 /*
9661  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9662  *
9663  * Return >0 any error found and output error message
9664  * Return 0 for no error found
9665  */
9666 static int check_extent_data_item(struct btrfs_root *root,
9667                                   struct extent_buffer *eb, int slot)
9668 {
9669         struct btrfs_file_extent_item *fi;
9670         struct btrfs_path path;
9671         struct btrfs_root *extent_root = root->fs_info->extent_root;
9672         struct btrfs_key fi_key;
9673         struct btrfs_key dbref_key;
9674         struct extent_buffer *leaf;
9675         struct btrfs_extent_item *ei;
9676         struct btrfs_extent_inline_ref *iref;
9677         struct btrfs_extent_data_ref *dref;
9678         u64 owner;
9679         u64 file_extent_gen;
9680         u64 disk_bytenr;
9681         u64 disk_num_bytes;
9682         u64 extent_num_bytes;
9683         u64 extent_flags;
9684         u64 extent_gen;
9685         u32 item_size;
9686         unsigned long end;
9687         unsigned long ptr;
9688         int type;
9689         u64 ref_root;
9690         int found_dbackref = 0;
9691         int err = 0;
9692         int ret;
9693
9694         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9695         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9696         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9697
9698         /* Nothing to check for hole and inline data extents */
9699         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9700             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9701                 return 0;
9702
9703         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9704         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9705         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9706
9707         /* Check unaligned disk_num_bytes and num_bytes */
9708         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9709                 error(
9710 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9711                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9712                         root->sectorsize);
9713                 err |= BYTES_UNALIGNED;
9714         } else {
9715                 data_bytes_allocated += disk_num_bytes;
9716         }
9717         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9718                 error(
9719 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9720                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9721                         root->sectorsize);
9722                 err |= BYTES_UNALIGNED;
9723         } else {
9724                 data_bytes_referenced += extent_num_bytes;
9725         }
9726         owner = btrfs_header_owner(eb);
9727
9728         /* Check the extent item of the file extent in extent tree */
9729         btrfs_init_path(&path);
9730         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9731         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9732         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9733
9734         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9735         if (ret) {
9736                 err |= BACKREF_MISSING;
9737                 goto error;
9738         }
9739
9740         leaf = path.nodes[0];
9741         slot = path.slots[0];
9742         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9743
9744         extent_flags = btrfs_extent_flags(leaf, ei);
9745         extent_gen = btrfs_extent_generation(leaf, ei);
9746
9747         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9748                 error(
9749                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9750                     disk_bytenr, disk_num_bytes,
9751                     BTRFS_EXTENT_FLAG_DATA);
9752                 err |= BACKREF_MISMATCH;
9753         }
9754
9755         if (file_extent_gen < extent_gen) {
9756                 error(
9757 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9758                         disk_bytenr, disk_num_bytes, file_extent_gen,
9759                         extent_gen);
9760                 err |= BACKREF_MISMATCH;
9761         }
9762
9763         /* Check data backref inside that extent item */
9764         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9765         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9766         ptr = (unsigned long)iref;
9767         end = (unsigned long)ei + item_size;
9768         while (ptr < end) {
9769                 iref = (struct btrfs_extent_inline_ref *)ptr;
9770                 type = btrfs_extent_inline_ref_type(leaf, iref);
9771                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9772
9773                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9774                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9775                         if (ref_root == owner || ref_root == root->objectid)
9776                                 found_dbackref = 1;
9777                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9778                         found_dbackref = !check_tree_block_ref(root, NULL,
9779                                 btrfs_extent_inline_ref_offset(leaf, iref),
9780                                 0, owner);
9781                 }
9782
9783                 if (found_dbackref)
9784                         break;
9785                 ptr += btrfs_extent_inline_ref_size(type);
9786         }
9787
9788         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9789         if (!found_dbackref) {
9790                 btrfs_release_path(&path);
9791
9792                 btrfs_init_path(&path);
9793                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9794                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9795                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9796                                 fi_key.objectid, fi_key.offset);
9797
9798                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9799                                         &dbref_key, &path, 0, 0);
9800                 if (!ret)
9801                         found_dbackref = 1;
9802         }
9803
9804         if (!found_dbackref)
9805                 err |= BACKREF_MISSING;
9806 error:
9807         btrfs_release_path(&path);
9808         if (err & BACKREF_MISSING) {
9809                 error("data extent[%llu %llu] backref lost",
9810                       disk_bytenr, disk_num_bytes);
9811         }
9812         return err;
9813 }
9814
9815 /*
9816  * Get real tree block level for the case like shared block
9817  * Return >= 0 as tree level
9818  * Return <0 for error
9819  */
9820 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9821 {
9822         struct extent_buffer *eb;
9823         struct btrfs_path path;
9824         struct btrfs_key key;
9825         struct btrfs_extent_item *ei;
9826         u64 flags;
9827         u64 transid;
9828         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9829         u8 backref_level;
9830         u8 header_level;
9831         int ret;
9832
9833         /* Search extent tree for extent generation and level */
9834         key.objectid = bytenr;
9835         key.type = BTRFS_METADATA_ITEM_KEY;
9836         key.offset = (u64)-1;
9837
9838         btrfs_init_path(&path);
9839         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9840         if (ret < 0)
9841                 goto release_out;
9842         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9843         if (ret < 0)
9844                 goto release_out;
9845         if (ret > 0) {
9846                 ret = -ENOENT;
9847                 goto release_out;
9848         }
9849
9850         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9851         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9852                             struct btrfs_extent_item);
9853         flags = btrfs_extent_flags(path.nodes[0], ei);
9854         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9855                 ret = -ENOENT;
9856                 goto release_out;
9857         }
9858
9859         /* Get transid for later read_tree_block() check */
9860         transid = btrfs_extent_generation(path.nodes[0], ei);
9861
9862         /* Get backref level as one source */
9863         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9864                 backref_level = key.offset;
9865         } else {
9866                 struct btrfs_tree_block_info *info;
9867
9868                 info = (struct btrfs_tree_block_info *)(ei + 1);
9869                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9870         }
9871         btrfs_release_path(&path);
9872
9873         /* Get level from tree block as an alternative source */
9874         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9875         if (!extent_buffer_uptodate(eb)) {
9876                 free_extent_buffer(eb);
9877                 return -EIO;
9878         }
9879         header_level = btrfs_header_level(eb);
9880         free_extent_buffer(eb);
9881
9882         if (header_level != backref_level)
9883                 return -EIO;
9884         return header_level;
9885
9886 release_out:
9887         btrfs_release_path(&path);
9888         return ret;
9889 }
9890
9891 /*
9892  * Check if a tree block backref is valid (points to a valid tree block)
9893  * if level == -1, level will be resolved
9894  * Return >0 for any error found and print error message
9895  */
9896 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9897                                     u64 bytenr, int level)
9898 {
9899         struct btrfs_root *root;
9900         struct btrfs_key key;
9901         struct btrfs_path path;
9902         struct extent_buffer *eb;
9903         struct extent_buffer *node;
9904         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9905         int err = 0;
9906         int ret;
9907
9908         /* Query level for level == -1 special case */
9909         if (level == -1)
9910                 level = query_tree_block_level(fs_info, bytenr);
9911         if (level < 0) {
9912                 err |= REFERENCER_MISSING;
9913                 goto out;
9914         }
9915
9916         key.objectid = root_id;
9917         key.type = BTRFS_ROOT_ITEM_KEY;
9918         key.offset = (u64)-1;
9919
9920         root = btrfs_read_fs_root(fs_info, &key);
9921         if (IS_ERR(root)) {
9922                 err |= REFERENCER_MISSING;
9923                 goto out;
9924         }
9925
9926         /* Read out the tree block to get item/node key */
9927         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9928         if (!extent_buffer_uptodate(eb)) {
9929                 err |= REFERENCER_MISSING;
9930                 free_extent_buffer(eb);
9931                 goto out;
9932         }
9933
9934         /* Empty tree, no need to check key */
9935         if (!btrfs_header_nritems(eb) && !level) {
9936                 free_extent_buffer(eb);
9937                 goto out;
9938         }
9939
9940         if (level)
9941                 btrfs_node_key_to_cpu(eb, &key, 0);
9942         else
9943                 btrfs_item_key_to_cpu(eb, &key, 0);
9944
9945         free_extent_buffer(eb);
9946
9947         btrfs_init_path(&path);
9948         path.lowest_level = level;
9949         /* Search with the first key, to ensure we can reach it */
9950         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9951         if (ret < 0) {
9952                 err |= REFERENCER_MISSING;
9953                 goto release_out;
9954         }
9955
9956         node = path.nodes[level];
9957         if (btrfs_header_bytenr(node) != bytenr) {
9958                 error(
9959         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9960                         bytenr, nodesize, bytenr,
9961                         btrfs_header_bytenr(node));
9962                 err |= REFERENCER_MISMATCH;
9963         }
9964         if (btrfs_header_level(node) != level) {
9965                 error(
9966         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9967                         bytenr, nodesize, level,
9968                         btrfs_header_level(node));
9969                 err |= REFERENCER_MISMATCH;
9970         }
9971
9972 release_out:
9973         btrfs_release_path(&path);
9974 out:
9975         if (err & REFERENCER_MISSING) {
9976                 if (level < 0)
9977                         error("extent [%llu %d] lost referencer (owner: %llu)",
9978                                 bytenr, nodesize, root_id);
9979                 else
9980                         error(
9981                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9982                                 bytenr, nodesize, root_id, level);
9983         }
9984
9985         return err;
9986 }
9987
9988 /*
9989  * Check referencer for shared block backref
9990  * If level == -1, this function will resolve the level.
9991  */
9992 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9993                                      u64 parent, u64 bytenr, int level)
9994 {
9995         struct extent_buffer *eb;
9996         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9997         u32 nr;
9998         int found_parent = 0;
9999         int i;
10000
10001         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10002         if (!extent_buffer_uptodate(eb))
10003                 goto out;
10004
10005         if (level == -1)
10006                 level = query_tree_block_level(fs_info, bytenr);
10007         if (level < 0)
10008                 goto out;
10009
10010         if (level + 1 != btrfs_header_level(eb))
10011                 goto out;
10012
10013         nr = btrfs_header_nritems(eb);
10014         for (i = 0; i < nr; i++) {
10015                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10016                         found_parent = 1;
10017                         break;
10018                 }
10019         }
10020 out:
10021         free_extent_buffer(eb);
10022         if (!found_parent) {
10023                 error(
10024         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10025                         bytenr, nodesize, parent, level);
10026                 return REFERENCER_MISSING;
10027         }
10028         return 0;
10029 }
10030
10031 /*
10032  * Check referencer for normal (inlined) data ref
10033  * If len == 0, it will be resolved by searching in extent tree
10034  */
10035 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10036                                      u64 root_id, u64 objectid, u64 offset,
10037                                      u64 bytenr, u64 len, u32 count)
10038 {
10039         struct btrfs_root *root;
10040         struct btrfs_root *extent_root = fs_info->extent_root;
10041         struct btrfs_key key;
10042         struct btrfs_path path;
10043         struct extent_buffer *leaf;
10044         struct btrfs_file_extent_item *fi;
10045         u32 found_count = 0;
10046         int slot;
10047         int ret = 0;
10048
10049         if (!len) {
10050                 key.objectid = bytenr;
10051                 key.type = BTRFS_EXTENT_ITEM_KEY;
10052                 key.offset = (u64)-1;
10053
10054                 btrfs_init_path(&path);
10055                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10056                 if (ret < 0)
10057                         goto out;
10058                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10059                 if (ret)
10060                         goto out;
10061                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10062                 if (key.objectid != bytenr ||
10063                     key.type != BTRFS_EXTENT_ITEM_KEY)
10064                         goto out;
10065                 len = key.offset;
10066                 btrfs_release_path(&path);
10067         }
10068         key.objectid = root_id;
10069         key.type = BTRFS_ROOT_ITEM_KEY;
10070         key.offset = (u64)-1;
10071         btrfs_init_path(&path);
10072
10073         root = btrfs_read_fs_root(fs_info, &key);
10074         if (IS_ERR(root))
10075                 goto out;
10076
10077         key.objectid = objectid;
10078         key.type = BTRFS_EXTENT_DATA_KEY;
10079         /*
10080          * It can be nasty as data backref offset is
10081          * file offset - file extent offset, which is smaller or
10082          * equal to original backref offset.  The only special case is
10083          * overflow.  So we need to special check and do further search.
10084          */
10085         key.offset = offset & (1ULL << 63) ? 0 : offset;
10086
10087         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10088         if (ret < 0)
10089                 goto out;
10090
10091         /*
10092          * Search afterwards to get correct one
10093          * NOTE: As we must do a comprehensive check on the data backref to
10094          * make sure the dref count also matches, we must iterate all file
10095          * extents for that inode.
10096          */
10097         while (1) {
10098                 leaf = path.nodes[0];
10099                 slot = path.slots[0];
10100
10101                 btrfs_item_key_to_cpu(leaf, &key, slot);
10102                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10103                         break;
10104                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10105                 /*
10106                  * Except normal disk bytenr and disk num bytes, we still
10107                  * need to do extra check on dbackref offset as
10108                  * dbackref offset = file_offset - file_extent_offset
10109                  */
10110                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10111                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10112                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10113                     offset)
10114                         found_count++;
10115
10116                 ret = btrfs_next_item(root, &path);
10117                 if (ret)
10118                         break;
10119         }
10120 out:
10121         btrfs_release_path(&path);
10122         if (found_count != count) {
10123                 error(
10124 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10125                         bytenr, len, root_id, objectid, offset, count, found_count);
10126                 return REFERENCER_MISSING;
10127         }
10128         return 0;
10129 }
10130
10131 /*
10132  * Check if the referencer of a shared data backref exists
10133  */
10134 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10135                                      u64 parent, u64 bytenr)
10136 {
10137         struct extent_buffer *eb;
10138         struct btrfs_key key;
10139         struct btrfs_file_extent_item *fi;
10140         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10141         u32 nr;
10142         int found_parent = 0;
10143         int i;
10144
10145         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10146         if (!extent_buffer_uptodate(eb))
10147                 goto out;
10148
10149         nr = btrfs_header_nritems(eb);
10150         for (i = 0; i < nr; i++) {
10151                 btrfs_item_key_to_cpu(eb, &key, i);
10152                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10153                         continue;
10154
10155                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10156                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10157                         continue;
10158
10159                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10160                         found_parent = 1;
10161                         break;
10162                 }
10163         }
10164
10165 out:
10166         free_extent_buffer(eb);
10167         if (!found_parent) {
10168                 error("shared extent %llu referencer lost (parent: %llu)",
10169                         bytenr, parent);
10170                 return REFERENCER_MISSING;
10171         }
10172         return 0;
10173 }
10174
10175 /*
10176  * This function will check a given extent item, including its backref and
10177  * itself (like crossing stripe boundary and type)
10178  *
10179  * Since we don't use extent_record anymore, introduce new error bit
10180  */
10181 static int check_extent_item(struct btrfs_fs_info *fs_info,
10182                              struct extent_buffer *eb, int slot)
10183 {
10184         struct btrfs_extent_item *ei;
10185         struct btrfs_extent_inline_ref *iref;
10186         struct btrfs_extent_data_ref *dref;
10187         unsigned long end;
10188         unsigned long ptr;
10189         int type;
10190         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10191         u32 item_size = btrfs_item_size_nr(eb, slot);
10192         u64 flags;
10193         u64 offset;
10194         int metadata = 0;
10195         int level;
10196         struct btrfs_key key;
10197         int ret;
10198         int err = 0;
10199
10200         btrfs_item_key_to_cpu(eb, &key, slot);
10201         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10202                 bytes_used += key.offset;
10203         else
10204                 bytes_used += nodesize;
10205
10206         if (item_size < sizeof(*ei)) {
10207                 /*
10208                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10209                  * old thing when on disk format is still un-determined.
10210                  * No need to care about it anymore
10211                  */
10212                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10213                 return -ENOTTY;
10214         }
10215
10216         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10217         flags = btrfs_extent_flags(eb, ei);
10218
10219         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10220                 metadata = 1;
10221         if (metadata && check_crossing_stripes(global_info, key.objectid,
10222                                                eb->len)) {
10223                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10224                       key.objectid, key.objectid + nodesize);
10225                 err |= CROSSING_STRIPE_BOUNDARY;
10226         }
10227
10228         ptr = (unsigned long)(ei + 1);
10229
10230         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10231                 /* Old EXTENT_ITEM metadata */
10232                 struct btrfs_tree_block_info *info;
10233
10234                 info = (struct btrfs_tree_block_info *)ptr;
10235                 level = btrfs_tree_block_level(eb, info);
10236                 ptr += sizeof(struct btrfs_tree_block_info);
10237         } else {
10238                 /* New METADATA_ITEM */
10239                 level = key.offset;
10240         }
10241         end = (unsigned long)ei + item_size;
10242
10243         if (ptr >= end) {
10244                 err |= ITEM_SIZE_MISMATCH;
10245                 goto out;
10246         }
10247
10248         /* Now check every backref in this extent item */
10249 next:
10250         iref = (struct btrfs_extent_inline_ref *)ptr;
10251         type = btrfs_extent_inline_ref_type(eb, iref);
10252         offset = btrfs_extent_inline_ref_offset(eb, iref);
10253         switch (type) {
10254         case BTRFS_TREE_BLOCK_REF_KEY:
10255                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10256                                                level);
10257                 err |= ret;
10258                 break;
10259         case BTRFS_SHARED_BLOCK_REF_KEY:
10260                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10261                                                  level);
10262                 err |= ret;
10263                 break;
10264         case BTRFS_EXTENT_DATA_REF_KEY:
10265                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10266                 ret = check_extent_data_backref(fs_info,
10267                                 btrfs_extent_data_ref_root(eb, dref),
10268                                 btrfs_extent_data_ref_objectid(eb, dref),
10269                                 btrfs_extent_data_ref_offset(eb, dref),
10270                                 key.objectid, key.offset,
10271                                 btrfs_extent_data_ref_count(eb, dref));
10272                 err |= ret;
10273                 break;
10274         case BTRFS_SHARED_DATA_REF_KEY:
10275                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10276                 err |= ret;
10277                 break;
10278         default:
10279                 error("extent[%llu %d %llu] has unknown ref type: %d",
10280                         key.objectid, key.type, key.offset, type);
10281                 err |= UNKNOWN_TYPE;
10282                 goto out;
10283         }
10284
10285         ptr += btrfs_extent_inline_ref_size(type);
10286         if (ptr < end)
10287                 goto next;
10288
10289 out:
10290         return err;
10291 }
10292
10293 /*
10294  * Check if a dev extent item is referred correctly by its chunk
10295  */
10296 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10297                                  struct extent_buffer *eb, int slot)
10298 {
10299         struct btrfs_root *chunk_root = fs_info->chunk_root;
10300         struct btrfs_dev_extent *ptr;
10301         struct btrfs_path path;
10302         struct btrfs_key chunk_key;
10303         struct btrfs_key devext_key;
10304         struct btrfs_chunk *chunk;
10305         struct extent_buffer *l;
10306         int num_stripes;
10307         u64 length;
10308         int i;
10309         int found_chunk = 0;
10310         int ret;
10311
10312         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10313         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10314         length = btrfs_dev_extent_length(eb, ptr);
10315
10316         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10317         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10318         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10319
10320         btrfs_init_path(&path);
10321         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10322         if (ret)
10323                 goto out;
10324
10325         l = path.nodes[0];
10326         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10327         if (btrfs_chunk_length(l, chunk) != length)
10328                 goto out;
10329
10330         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10331         for (i = 0; i < num_stripes; i++) {
10332                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10333                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10334
10335                 if (devid == devext_key.objectid &&
10336                     offset == devext_key.offset) {
10337                         found_chunk = 1;
10338                         break;
10339                 }
10340         }
10341 out:
10342         btrfs_release_path(&path);
10343         if (!found_chunk) {
10344                 error(
10345                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10346                         devext_key.objectid, devext_key.offset, length);
10347                 return REFERENCER_MISSING;
10348         }
10349         return 0;
10350 }
10351
10352 /*
10353  * Check if the used space is correct with the dev item
10354  */
10355 static int check_dev_item(struct btrfs_fs_info *fs_info,
10356                           struct extent_buffer *eb, int slot)
10357 {
10358         struct btrfs_root *dev_root = fs_info->dev_root;
10359         struct btrfs_dev_item *dev_item;
10360         struct btrfs_path path;
10361         struct btrfs_key key;
10362         struct btrfs_dev_extent *ptr;
10363         u64 dev_id;
10364         u64 used;
10365         u64 total = 0;
10366         int ret;
10367
10368         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10369         dev_id = btrfs_device_id(eb, dev_item);
10370         used = btrfs_device_bytes_used(eb, dev_item);
10371
10372         key.objectid = dev_id;
10373         key.type = BTRFS_DEV_EXTENT_KEY;
10374         key.offset = 0;
10375
10376         btrfs_init_path(&path);
10377         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10378         if (ret < 0) {
10379                 btrfs_item_key_to_cpu(eb, &key, slot);
10380                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10381                         key.objectid, key.type, key.offset);
10382                 btrfs_release_path(&path);
10383                 return REFERENCER_MISSING;
10384         }
10385
10386         /* Iterate dev_extents to calculate the used space of a device */
10387         while (1) {
10388                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10389
10390                 if (key.objectid > dev_id)
10391                         break;
10392                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10393                         goto next;
10394
10395                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10396                                      struct btrfs_dev_extent);
10397                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10398 next:
10399                 ret = btrfs_next_item(dev_root, &path);
10400                 if (ret)
10401                         break;
10402         }
10403         btrfs_release_path(&path);
10404
10405         if (used != total) {
10406                 btrfs_item_key_to_cpu(eb, &key, slot);
10407                 error(
10408 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10409                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10410                         BTRFS_DEV_EXTENT_KEY, dev_id);
10411                 return ACCOUNTING_MISMATCH;
10412         }
10413         return 0;
10414 }
10415
10416 /*
10417  * Check a block group item with its referener (chunk) and its used space
10418  * with extent/metadata item
10419  */
10420 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10421                                   struct extent_buffer *eb, int slot)
10422 {
10423         struct btrfs_root *extent_root = fs_info->extent_root;
10424         struct btrfs_root *chunk_root = fs_info->chunk_root;
10425         struct btrfs_block_group_item *bi;
10426         struct btrfs_block_group_item bg_item;
10427         struct btrfs_path path;
10428         struct btrfs_key bg_key;
10429         struct btrfs_key chunk_key;
10430         struct btrfs_key extent_key;
10431         struct btrfs_chunk *chunk;
10432         struct extent_buffer *leaf;
10433         struct btrfs_extent_item *ei;
10434         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10435         u64 flags;
10436         u64 bg_flags;
10437         u64 used;
10438         u64 total = 0;
10439         int ret;
10440         int err = 0;
10441
10442         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10443         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10444         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10445         used = btrfs_block_group_used(&bg_item);
10446         bg_flags = btrfs_block_group_flags(&bg_item);
10447
10448         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10449         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10450         chunk_key.offset = bg_key.objectid;
10451
10452         btrfs_init_path(&path);
10453         /* Search for the referencer chunk */
10454         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10455         if (ret) {
10456                 error(
10457                 "block group[%llu %llu] did not find the related chunk item",
10458                         bg_key.objectid, bg_key.offset);
10459                 err |= REFERENCER_MISSING;
10460         } else {
10461                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10462                                         struct btrfs_chunk);
10463                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10464                                                 bg_key.offset) {
10465                         error(
10466         "block group[%llu %llu] related chunk item length does not match",
10467                                 bg_key.objectid, bg_key.offset);
10468                         err |= REFERENCER_MISMATCH;
10469                 }
10470         }
10471         btrfs_release_path(&path);
10472
10473         /* Search from the block group bytenr */
10474         extent_key.objectid = bg_key.objectid;
10475         extent_key.type = 0;
10476         extent_key.offset = 0;
10477
10478         btrfs_init_path(&path);
10479         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10480         if (ret < 0)
10481                 goto out;
10482
10483         /* Iterate extent tree to account used space */
10484         while (1) {
10485                 leaf = path.nodes[0];
10486                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10487                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10488                         break;
10489
10490                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10491                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10492                         goto next;
10493                 if (extent_key.objectid < bg_key.objectid)
10494                         goto next;
10495
10496                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10497                         total += nodesize;
10498                 else
10499                         total += extent_key.offset;
10500
10501                 ei = btrfs_item_ptr(leaf, path.slots[0],
10502                                     struct btrfs_extent_item);
10503                 flags = btrfs_extent_flags(leaf, ei);
10504                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10505                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10506                                 error(
10507                         "bad extent[%llu, %llu) type mismatch with chunk",
10508                                         extent_key.objectid,
10509                                         extent_key.objectid + extent_key.offset);
10510                                 err |= CHUNK_TYPE_MISMATCH;
10511                         }
10512                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10513                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10514                                     BTRFS_BLOCK_GROUP_METADATA))) {
10515                                 error(
10516                         "bad extent[%llu, %llu) type mismatch with chunk",
10517                                         extent_key.objectid,
10518                                         extent_key.objectid + nodesize);
10519                                 err |= CHUNK_TYPE_MISMATCH;
10520                         }
10521                 }
10522 next:
10523                 ret = btrfs_next_item(extent_root, &path);
10524                 if (ret)
10525                         break;
10526         }
10527
10528 out:
10529         btrfs_release_path(&path);
10530
10531         if (total != used) {
10532                 error(
10533                 "block group[%llu %llu] used %llu but extent items used %llu",
10534                         bg_key.objectid, bg_key.offset, used, total);
10535                 err |= ACCOUNTING_MISMATCH;
10536         }
10537         return err;
10538 }
10539
10540 /*
10541  * Check a chunk item.
10542  * Including checking all referred dev_extents and block group
10543  */
10544 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10545                             struct extent_buffer *eb, int slot)
10546 {
10547         struct btrfs_root *extent_root = fs_info->extent_root;
10548         struct btrfs_root *dev_root = fs_info->dev_root;
10549         struct btrfs_path path;
10550         struct btrfs_key chunk_key;
10551         struct btrfs_key bg_key;
10552         struct btrfs_key devext_key;
10553         struct btrfs_chunk *chunk;
10554         struct extent_buffer *leaf;
10555         struct btrfs_block_group_item *bi;
10556         struct btrfs_block_group_item bg_item;
10557         struct btrfs_dev_extent *ptr;
10558         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10559         u64 length;
10560         u64 chunk_end;
10561         u64 type;
10562         u64 profile;
10563         int num_stripes;
10564         u64 offset;
10565         u64 objectid;
10566         int i;
10567         int ret;
10568         int err = 0;
10569
10570         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10571         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10572         length = btrfs_chunk_length(eb, chunk);
10573         chunk_end = chunk_key.offset + length;
10574         if (!IS_ALIGNED(length, sectorsize)) {
10575                 error("chunk[%llu %llu) not aligned to %u",
10576                         chunk_key.offset, chunk_end, sectorsize);
10577                 err |= BYTES_UNALIGNED;
10578                 goto out;
10579         }
10580
10581         type = btrfs_chunk_type(eb, chunk);
10582         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10583         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10584                 error("chunk[%llu %llu) has no chunk type",
10585                         chunk_key.offset, chunk_end);
10586                 err |= UNKNOWN_TYPE;
10587         }
10588         if (profile && (profile & (profile - 1))) {
10589                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10590                         chunk_key.offset, chunk_end, profile);
10591                 err |= UNKNOWN_TYPE;
10592         }
10593
10594         bg_key.objectid = chunk_key.offset;
10595         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10596         bg_key.offset = length;
10597
10598         btrfs_init_path(&path);
10599         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10600         if (ret) {
10601                 error(
10602                 "chunk[%llu %llu) did not find the related block group item",
10603                         chunk_key.offset, chunk_end);
10604                 err |= REFERENCER_MISSING;
10605         } else{
10606                 leaf = path.nodes[0];
10607                 bi = btrfs_item_ptr(leaf, path.slots[0],
10608                                     struct btrfs_block_group_item);
10609                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10610                                    sizeof(bg_item));
10611                 if (btrfs_block_group_flags(&bg_item) != type) {
10612                         error(
10613 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10614                                 chunk_key.offset, chunk_end, type,
10615                                 btrfs_block_group_flags(&bg_item));
10616                         err |= REFERENCER_MISSING;
10617                 }
10618         }
10619
10620         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10621         for (i = 0; i < num_stripes; i++) {
10622                 btrfs_release_path(&path);
10623                 btrfs_init_path(&path);
10624                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10625                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10626                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10627
10628                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10629                                         0, 0);
10630                 if (ret)
10631                         goto not_match_dev;
10632
10633                 leaf = path.nodes[0];
10634                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10635                                      struct btrfs_dev_extent);
10636                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10637                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10638                 if (objectid != chunk_key.objectid ||
10639                     offset != chunk_key.offset ||
10640                     btrfs_dev_extent_length(leaf, ptr) != length)
10641                         goto not_match_dev;
10642                 continue;
10643 not_match_dev:
10644                 err |= BACKREF_MISSING;
10645                 error(
10646                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10647                         chunk_key.objectid, chunk_end, i);
10648                 continue;
10649         }
10650         btrfs_release_path(&path);
10651 out:
10652         return err;
10653 }
10654
10655 /*
10656  * Main entry function to check known items and update related accounting info
10657  */
10658 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10659 {
10660         struct btrfs_fs_info *fs_info = root->fs_info;
10661         struct btrfs_key key;
10662         int slot = 0;
10663         int type;
10664         struct btrfs_extent_data_ref *dref;
10665         int ret;
10666         int err = 0;
10667
10668 next:
10669         btrfs_item_key_to_cpu(eb, &key, slot);
10670         type = key.type;
10671
10672         switch (type) {
10673         case BTRFS_EXTENT_DATA_KEY:
10674                 ret = check_extent_data_item(root, eb, slot);
10675                 err |= ret;
10676                 break;
10677         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10678                 ret = check_block_group_item(fs_info, eb, slot);
10679                 err |= ret;
10680                 break;
10681         case BTRFS_DEV_ITEM_KEY:
10682                 ret = check_dev_item(fs_info, eb, slot);
10683                 err |= ret;
10684                 break;
10685         case BTRFS_CHUNK_ITEM_KEY:
10686                 ret = check_chunk_item(fs_info, eb, slot);
10687                 err |= ret;
10688                 break;
10689         case BTRFS_DEV_EXTENT_KEY:
10690                 ret = check_dev_extent_item(fs_info, eb, slot);
10691                 err |= ret;
10692                 break;
10693         case BTRFS_EXTENT_ITEM_KEY:
10694         case BTRFS_METADATA_ITEM_KEY:
10695                 ret = check_extent_item(fs_info, eb, slot);
10696                 err |= ret;
10697                 break;
10698         case BTRFS_EXTENT_CSUM_KEY:
10699                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10700                 break;
10701         case BTRFS_TREE_BLOCK_REF_KEY:
10702                 ret = check_tree_block_backref(fs_info, key.offset,
10703                                                key.objectid, -1);
10704                 err |= ret;
10705                 break;
10706         case BTRFS_EXTENT_DATA_REF_KEY:
10707                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10708                 ret = check_extent_data_backref(fs_info,
10709                                 btrfs_extent_data_ref_root(eb, dref),
10710                                 btrfs_extent_data_ref_objectid(eb, dref),
10711                                 btrfs_extent_data_ref_offset(eb, dref),
10712                                 key.objectid, 0,
10713                                 btrfs_extent_data_ref_count(eb, dref));
10714                 err |= ret;
10715                 break;
10716         case BTRFS_SHARED_BLOCK_REF_KEY:
10717                 ret = check_shared_block_backref(fs_info, key.offset,
10718                                                  key.objectid, -1);
10719                 err |= ret;
10720                 break;
10721         case BTRFS_SHARED_DATA_REF_KEY:
10722                 ret = check_shared_data_backref(fs_info, key.offset,
10723                                                 key.objectid);
10724                 err |= ret;
10725                 break;
10726         default:
10727                 break;
10728         }
10729
10730         if (++slot < btrfs_header_nritems(eb))
10731                 goto next;
10732
10733         return err;
10734 }
10735
10736 /*
10737  * Helper function for later fs/subvol tree check.  To determine if a tree
10738  * block should be checked.
10739  * This function will ensure only the direct referencer with lowest rootid to
10740  * check a fs/subvolume tree block.
10741  *
10742  * Backref check at extent tree would detect errors like missing subvolume
10743  * tree, so we can do aggressive check to reduce duplicated checks.
10744  */
10745 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10746 {
10747         struct btrfs_root *extent_root = root->fs_info->extent_root;
10748         struct btrfs_key key;
10749         struct btrfs_path path;
10750         struct extent_buffer *leaf;
10751         int slot;
10752         struct btrfs_extent_item *ei;
10753         unsigned long ptr;
10754         unsigned long end;
10755         int type;
10756         u32 item_size;
10757         u64 offset;
10758         struct btrfs_extent_inline_ref *iref;
10759         int ret;
10760
10761         btrfs_init_path(&path);
10762         key.objectid = btrfs_header_bytenr(eb);
10763         key.type = BTRFS_METADATA_ITEM_KEY;
10764         key.offset = (u64)-1;
10765
10766         /*
10767          * Any failure in backref resolving means we can't determine
10768          * whom the tree block belongs to.
10769          * So in that case, we need to check that tree block
10770          */
10771         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10772         if (ret < 0)
10773                 goto need_check;
10774
10775         ret = btrfs_previous_extent_item(extent_root, &path,
10776                                          btrfs_header_bytenr(eb));
10777         if (ret)
10778                 goto need_check;
10779
10780         leaf = path.nodes[0];
10781         slot = path.slots[0];
10782         btrfs_item_key_to_cpu(leaf, &key, slot);
10783         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10784
10785         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10786                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10787         } else {
10788                 struct btrfs_tree_block_info *info;
10789
10790                 info = (struct btrfs_tree_block_info *)(ei + 1);
10791                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10792         }
10793
10794         item_size = btrfs_item_size_nr(leaf, slot);
10795         ptr = (unsigned long)iref;
10796         end = (unsigned long)ei + item_size;
10797         while (ptr < end) {
10798                 iref = (struct btrfs_extent_inline_ref *)ptr;
10799                 type = btrfs_extent_inline_ref_type(leaf, iref);
10800                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10801
10802                 /*
10803                  * We only check the tree block if current root is
10804                  * the lowest referencer of it.
10805                  */
10806                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10807                     offset < root->objectid) {
10808                         btrfs_release_path(&path);
10809                         return 0;
10810                 }
10811
10812                 ptr += btrfs_extent_inline_ref_size(type);
10813         }
10814         /*
10815          * Normally we should also check keyed tree block ref, but that may be
10816          * very time consuming.  Inlined ref should already make us skip a lot
10817          * of refs now.  So skip search keyed tree block ref.
10818          */
10819
10820 need_check:
10821         btrfs_release_path(&path);
10822         return 1;
10823 }
10824
10825 /*
10826  * Traversal function for tree block. We will do:
10827  * 1) Skip shared fs/subvolume tree blocks
10828  * 2) Update related bytes accounting
10829  * 3) Pre-order traversal
10830  */
10831 static int traverse_tree_block(struct btrfs_root *root,
10832                                 struct extent_buffer *node)
10833 {
10834         struct extent_buffer *eb;
10835         struct btrfs_key key;
10836         struct btrfs_key drop_key;
10837         int level;
10838         u64 nr;
10839         int i;
10840         int err = 0;
10841         int ret;
10842
10843         /*
10844          * Skip shared fs/subvolume tree block, in that case they will
10845          * be checked by referencer with lowest rootid
10846          */
10847         if (is_fstree(root->objectid) && !should_check(root, node))
10848                 return 0;
10849
10850         /* Update bytes accounting */
10851         total_btree_bytes += node->len;
10852         if (fs_root_objectid(btrfs_header_owner(node)))
10853                 total_fs_tree_bytes += node->len;
10854         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10855                 total_extent_tree_bytes += node->len;
10856         if (!found_old_backref &&
10857             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10858             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10859             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10860                 found_old_backref = 1;
10861
10862         /* pre-order tranversal, check itself first */
10863         level = btrfs_header_level(node);
10864         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10865                                    btrfs_header_level(node),
10866                                    btrfs_header_owner(node));
10867         err |= ret;
10868         if (err)
10869                 error(
10870         "check %s failed root %llu bytenr %llu level %d, force continue check",
10871                         level ? "node":"leaf", root->objectid,
10872                         btrfs_header_bytenr(node), btrfs_header_level(node));
10873
10874         if (!level) {
10875                 btree_space_waste += btrfs_leaf_free_space(root, node);
10876                 ret = check_leaf_items(root, node);
10877                 err |= ret;
10878                 return err;
10879         }
10880
10881         nr = btrfs_header_nritems(node);
10882         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10883         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10884                 sizeof(struct btrfs_key_ptr);
10885
10886         /* Then check all its children */
10887         for (i = 0; i < nr; i++) {
10888                 u64 blocknr = btrfs_node_blockptr(node, i);
10889
10890                 btrfs_node_key_to_cpu(node, &key, i);
10891                 if (level == root->root_item.drop_level &&
10892                     is_dropped_key(&key, &drop_key))
10893                         continue;
10894
10895                 /*
10896                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10897                  * to call the function itself.
10898                  */
10899                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10900                 if (extent_buffer_uptodate(eb)) {
10901                         ret = traverse_tree_block(root, eb);
10902                         err |= ret;
10903                 }
10904                 free_extent_buffer(eb);
10905         }
10906
10907         return err;
10908 }
10909
10910 /*
10911  * Low memory usage version check_chunks_and_extents.
10912  */
10913 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10914 {
10915         struct btrfs_path path;
10916         struct btrfs_key key;
10917         struct btrfs_root *root1;
10918         struct btrfs_root *cur_root;
10919         int err = 0;
10920         int ret;
10921
10922         root1 = root->fs_info->chunk_root;
10923         ret = traverse_tree_block(root1, root1->node);
10924         err |= ret;
10925
10926         root1 = root->fs_info->tree_root;
10927         ret = traverse_tree_block(root1, root1->node);
10928         err |= ret;
10929
10930         btrfs_init_path(&path);
10931         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10932         key.offset = 0;
10933         key.type = BTRFS_ROOT_ITEM_KEY;
10934
10935         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10936         if (ret) {
10937                 error("cannot find extent treet in tree_root");
10938                 goto out;
10939         }
10940
10941         while (1) {
10942                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10943                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10944                         goto next;
10945                 key.offset = (u64)-1;
10946
10947                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10948                 if (IS_ERR(cur_root) || !cur_root) {
10949                         error("failed to read tree: %lld", key.objectid);
10950                         goto next;
10951                 }
10952
10953                 ret = traverse_tree_block(cur_root, cur_root->node);
10954                 err |= ret;
10955
10956 next:
10957                 ret = btrfs_next_item(root1, &path);
10958                 if (ret)
10959                         goto out;
10960         }
10961
10962 out:
10963         btrfs_release_path(&path);
10964         return err;
10965 }
10966
10967 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10968                            struct btrfs_root *root, int overwrite)
10969 {
10970         struct extent_buffer *c;
10971         struct extent_buffer *old = root->node;
10972         int level;
10973         int ret;
10974         struct btrfs_disk_key disk_key = {0,0,0};
10975
10976         level = 0;
10977
10978         if (overwrite) {
10979                 c = old;
10980                 extent_buffer_get(c);
10981                 goto init;
10982         }
10983         c = btrfs_alloc_free_block(trans, root,
10984                                    root->nodesize,
10985                                    root->root_key.objectid,
10986                                    &disk_key, level, 0, 0);
10987         if (IS_ERR(c)) {
10988                 c = old;
10989                 extent_buffer_get(c);
10990                 overwrite = 1;
10991         }
10992 init:
10993         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10994         btrfs_set_header_level(c, level);
10995         btrfs_set_header_bytenr(c, c->start);
10996         btrfs_set_header_generation(c, trans->transid);
10997         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10998         btrfs_set_header_owner(c, root->root_key.objectid);
10999
11000         write_extent_buffer(c, root->fs_info->fsid,
11001                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11002
11003         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11004                             btrfs_header_chunk_tree_uuid(c),
11005                             BTRFS_UUID_SIZE);
11006
11007         btrfs_mark_buffer_dirty(c);
11008         /*
11009          * this case can happen in the following case:
11010          *
11011          * 1.overwrite previous root.
11012          *
11013          * 2.reinit reloc data root, this is because we skip pin
11014          * down reloc data tree before which means we can allocate
11015          * same block bytenr here.
11016          */
11017         if (old->start == c->start) {
11018                 btrfs_set_root_generation(&root->root_item,
11019                                           trans->transid);
11020                 root->root_item.level = btrfs_header_level(root->node);
11021                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11022                                         &root->root_key, &root->root_item);
11023                 if (ret) {
11024                         free_extent_buffer(c);
11025                         return ret;
11026                 }
11027         }
11028         free_extent_buffer(old);
11029         root->node = c;
11030         add_root_to_dirty_list(root);
11031         return 0;
11032 }
11033
11034 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11035                                 struct extent_buffer *eb, int tree_root)
11036 {
11037         struct extent_buffer *tmp;
11038         struct btrfs_root_item *ri;
11039         struct btrfs_key key;
11040         u64 bytenr;
11041         u32 nodesize;
11042         int level = btrfs_header_level(eb);
11043         int nritems;
11044         int ret;
11045         int i;
11046
11047         /*
11048          * If we have pinned this block before, don't pin it again.
11049          * This can not only avoid forever loop with broken filesystem
11050          * but also give us some speedups.
11051          */
11052         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11053                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11054                 return 0;
11055
11056         btrfs_pin_extent(fs_info, eb->start, eb->len);
11057
11058         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11059         nritems = btrfs_header_nritems(eb);
11060         for (i = 0; i < nritems; i++) {
11061                 if (level == 0) {
11062                         btrfs_item_key_to_cpu(eb, &key, i);
11063                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11064                                 continue;
11065                         /* Skip the extent root and reloc roots */
11066                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11067                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11068                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11069                                 continue;
11070                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11071                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11072
11073                         /*
11074                          * If at any point we start needing the real root we
11075                          * will have to build a stump root for the root we are
11076                          * in, but for now this doesn't actually use the root so
11077                          * just pass in extent_root.
11078                          */
11079                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11080                                               nodesize, 0);
11081                         if (!extent_buffer_uptodate(tmp)) {
11082                                 fprintf(stderr, "Error reading root block\n");
11083                                 return -EIO;
11084                         }
11085                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11086                         free_extent_buffer(tmp);
11087                         if (ret)
11088                                 return ret;
11089                 } else {
11090                         bytenr = btrfs_node_blockptr(eb, i);
11091
11092                         /* If we aren't the tree root don't read the block */
11093                         if (level == 1 && !tree_root) {
11094                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11095                                 continue;
11096                         }
11097
11098                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11099                                               nodesize, 0);
11100                         if (!extent_buffer_uptodate(tmp)) {
11101                                 fprintf(stderr, "Error reading tree block\n");
11102                                 return -EIO;
11103                         }
11104                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11105                         free_extent_buffer(tmp);
11106                         if (ret)
11107                                 return ret;
11108                 }
11109         }
11110
11111         return 0;
11112 }
11113
11114 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11115 {
11116         int ret;
11117
11118         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11119         if (ret)
11120                 return ret;
11121
11122         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11123 }
11124
11125 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11126 {
11127         struct btrfs_block_group_cache *cache;
11128         struct btrfs_path path;
11129         struct extent_buffer *leaf;
11130         struct btrfs_chunk *chunk;
11131         struct btrfs_key key;
11132         int ret;
11133         u64 start;
11134
11135         btrfs_init_path(&path);
11136         key.objectid = 0;
11137         key.type = BTRFS_CHUNK_ITEM_KEY;
11138         key.offset = 0;
11139         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11140         if (ret < 0) {
11141                 btrfs_release_path(&path);
11142                 return ret;
11143         }
11144
11145         /*
11146          * We do this in case the block groups were screwed up and had alloc
11147          * bits that aren't actually set on the chunks.  This happens with
11148          * restored images every time and could happen in real life I guess.
11149          */
11150         fs_info->avail_data_alloc_bits = 0;
11151         fs_info->avail_metadata_alloc_bits = 0;
11152         fs_info->avail_system_alloc_bits = 0;
11153
11154         /* First we need to create the in-memory block groups */
11155         while (1) {
11156                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11157                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11158                         if (ret < 0) {
11159                                 btrfs_release_path(&path);
11160                                 return ret;
11161                         }
11162                         if (ret) {
11163                                 ret = 0;
11164                                 break;
11165                         }
11166                 }
11167                 leaf = path.nodes[0];
11168                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11169                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11170                         path.slots[0]++;
11171                         continue;
11172                 }
11173
11174                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11175                 btrfs_add_block_group(fs_info, 0,
11176                                       btrfs_chunk_type(leaf, chunk),
11177                                       key.objectid, key.offset,
11178                                       btrfs_chunk_length(leaf, chunk));
11179                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11180                                  key.offset + btrfs_chunk_length(leaf, chunk),
11181                                  GFP_NOFS);
11182                 path.slots[0]++;
11183         }
11184         start = 0;
11185         while (1) {
11186                 cache = btrfs_lookup_first_block_group(fs_info, start);
11187                 if (!cache)
11188                         break;
11189                 cache->cached = 1;
11190                 start = cache->key.objectid + cache->key.offset;
11191         }
11192
11193         btrfs_release_path(&path);
11194         return 0;
11195 }
11196
11197 static int reset_balance(struct btrfs_trans_handle *trans,
11198                          struct btrfs_fs_info *fs_info)
11199 {
11200         struct btrfs_root *root = fs_info->tree_root;
11201         struct btrfs_path path;
11202         struct extent_buffer *leaf;
11203         struct btrfs_key key;
11204         int del_slot, del_nr = 0;
11205         int ret;
11206         int found = 0;
11207
11208         btrfs_init_path(&path);
11209         key.objectid = BTRFS_BALANCE_OBJECTID;
11210         key.type = BTRFS_BALANCE_ITEM_KEY;
11211         key.offset = 0;
11212         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11213         if (ret) {
11214                 if (ret > 0)
11215                         ret = 0;
11216                 if (!ret)
11217                         goto reinit_data_reloc;
11218                 else
11219                         goto out;
11220         }
11221
11222         ret = btrfs_del_item(trans, root, &path);
11223         if (ret)
11224                 goto out;
11225         btrfs_release_path(&path);
11226
11227         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11228         key.type = BTRFS_ROOT_ITEM_KEY;
11229         key.offset = 0;
11230         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11231         if (ret < 0)
11232                 goto out;
11233         while (1) {
11234                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11235                         if (!found)
11236                                 break;
11237
11238                         if (del_nr) {
11239                                 ret = btrfs_del_items(trans, root, &path,
11240                                                       del_slot, del_nr);
11241                                 del_nr = 0;
11242                                 if (ret)
11243                                         goto out;
11244                         }
11245                         key.offset++;
11246                         btrfs_release_path(&path);
11247
11248                         found = 0;
11249                         ret = btrfs_search_slot(trans, root, &key, &path,
11250                                                 -1, 1);
11251                         if (ret < 0)
11252                                 goto out;
11253                         continue;
11254                 }
11255                 found = 1;
11256                 leaf = path.nodes[0];
11257                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11258                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11259                         break;
11260                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11261                         path.slots[0]++;
11262                         continue;
11263                 }
11264                 if (!del_nr) {
11265                         del_slot = path.slots[0];
11266                         del_nr = 1;
11267                 } else {
11268                         del_nr++;
11269                 }
11270                 path.slots[0]++;
11271         }
11272
11273         if (del_nr) {
11274                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11275                 if (ret)
11276                         goto out;
11277         }
11278         btrfs_release_path(&path);
11279
11280 reinit_data_reloc:
11281         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11282         key.type = BTRFS_ROOT_ITEM_KEY;
11283         key.offset = (u64)-1;
11284         root = btrfs_read_fs_root(fs_info, &key);
11285         if (IS_ERR(root)) {
11286                 fprintf(stderr, "Error reading data reloc tree\n");
11287                 ret = PTR_ERR(root);
11288                 goto out;
11289         }
11290         record_root_in_trans(trans, root);
11291         ret = btrfs_fsck_reinit_root(trans, root, 0);
11292         if (ret)
11293                 goto out;
11294         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11295 out:
11296         btrfs_release_path(&path);
11297         return ret;
11298 }
11299
11300 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11301                               struct btrfs_fs_info *fs_info)
11302 {
11303         u64 start = 0;
11304         int ret;
11305
11306         /*
11307          * The only reason we don't do this is because right now we're just
11308          * walking the trees we find and pinning down their bytes, we don't look
11309          * at any of the leaves.  In order to do mixed groups we'd have to check
11310          * the leaves of any fs roots and pin down the bytes for any file
11311          * extents we find.  Not hard but why do it if we don't have to?
11312          */
11313         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11314                 fprintf(stderr, "We don't support re-initing the extent tree "
11315                         "for mixed block groups yet, please notify a btrfs "
11316                         "developer you want to do this so they can add this "
11317                         "functionality.\n");
11318                 return -EINVAL;
11319         }
11320
11321         /*
11322          * first we need to walk all of the trees except the extent tree and pin
11323          * down the bytes that are in use so we don't overwrite any existing
11324          * metadata.
11325          */
11326         ret = pin_metadata_blocks(fs_info);
11327         if (ret) {
11328                 fprintf(stderr, "error pinning down used bytes\n");
11329                 return ret;
11330         }
11331
11332         /*
11333          * Need to drop all the block groups since we're going to recreate all
11334          * of them again.
11335          */
11336         btrfs_free_block_groups(fs_info);
11337         ret = reset_block_groups(fs_info);
11338         if (ret) {
11339                 fprintf(stderr, "error resetting the block groups\n");
11340                 return ret;
11341         }
11342
11343         /* Ok we can allocate now, reinit the extent root */
11344         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11345         if (ret) {
11346                 fprintf(stderr, "extent root initialization failed\n");
11347                 /*
11348                  * When the transaction code is updated we should end the
11349                  * transaction, but for now progs only knows about commit so
11350                  * just return an error.
11351                  */
11352                 return ret;
11353         }
11354
11355         /*
11356          * Now we have all the in-memory block groups setup so we can make
11357          * allocations properly, and the metadata we care about is safe since we
11358          * pinned all of it above.
11359          */
11360         while (1) {
11361                 struct btrfs_block_group_cache *cache;
11362
11363                 cache = btrfs_lookup_first_block_group(fs_info, start);
11364                 if (!cache)
11365                         break;
11366                 start = cache->key.objectid + cache->key.offset;
11367                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11368                                         &cache->key, &cache->item,
11369                                         sizeof(cache->item));
11370                 if (ret) {
11371                         fprintf(stderr, "Error adding block group\n");
11372                         return ret;
11373                 }
11374                 btrfs_extent_post_op(trans, fs_info->extent_root);
11375         }
11376
11377         ret = reset_balance(trans, fs_info);
11378         if (ret)
11379                 fprintf(stderr, "error resetting the pending balance\n");
11380
11381         return ret;
11382 }
11383
11384 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11385 {
11386         struct btrfs_path path;
11387         struct btrfs_trans_handle *trans;
11388         struct btrfs_key key;
11389         int ret;
11390
11391         printf("Recowing metadata block %llu\n", eb->start);
11392         key.objectid = btrfs_header_owner(eb);
11393         key.type = BTRFS_ROOT_ITEM_KEY;
11394         key.offset = (u64)-1;
11395
11396         root = btrfs_read_fs_root(root->fs_info, &key);
11397         if (IS_ERR(root)) {
11398                 fprintf(stderr, "Couldn't find owner root %llu\n",
11399                         key.objectid);
11400                 return PTR_ERR(root);
11401         }
11402
11403         trans = btrfs_start_transaction(root, 1);
11404         if (IS_ERR(trans))
11405                 return PTR_ERR(trans);
11406
11407         btrfs_init_path(&path);
11408         path.lowest_level = btrfs_header_level(eb);
11409         if (path.lowest_level)
11410                 btrfs_node_key_to_cpu(eb, &key, 0);
11411         else
11412                 btrfs_item_key_to_cpu(eb, &key, 0);
11413
11414         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11415         btrfs_commit_transaction(trans, root);
11416         btrfs_release_path(&path);
11417         return ret;
11418 }
11419
11420 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11421 {
11422         struct btrfs_path path;
11423         struct btrfs_trans_handle *trans;
11424         struct btrfs_key key;
11425         int ret;
11426
11427         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11428                bad->key.type, bad->key.offset);
11429         key.objectid = bad->root_id;
11430         key.type = BTRFS_ROOT_ITEM_KEY;
11431         key.offset = (u64)-1;
11432
11433         root = btrfs_read_fs_root(root->fs_info, &key);
11434         if (IS_ERR(root)) {
11435                 fprintf(stderr, "Couldn't find owner root %llu\n",
11436                         key.objectid);
11437                 return PTR_ERR(root);
11438         }
11439
11440         trans = btrfs_start_transaction(root, 1);
11441         if (IS_ERR(trans))
11442                 return PTR_ERR(trans);
11443
11444         btrfs_init_path(&path);
11445         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11446         if (ret) {
11447                 if (ret > 0)
11448                         ret = 0;
11449                 goto out;
11450         }
11451         ret = btrfs_del_item(trans, root, &path);
11452 out:
11453         btrfs_commit_transaction(trans, root);
11454         btrfs_release_path(&path);
11455         return ret;
11456 }
11457
11458 static int zero_log_tree(struct btrfs_root *root)
11459 {
11460         struct btrfs_trans_handle *trans;
11461         int ret;
11462
11463         trans = btrfs_start_transaction(root, 1);
11464         if (IS_ERR(trans)) {
11465                 ret = PTR_ERR(trans);
11466                 return ret;
11467         }
11468         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11469         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11470         ret = btrfs_commit_transaction(trans, root);
11471         return ret;
11472 }
11473
11474 static int populate_csum(struct btrfs_trans_handle *trans,
11475                          struct btrfs_root *csum_root, char *buf, u64 start,
11476                          u64 len)
11477 {
11478         u64 offset = 0;
11479         u64 sectorsize;
11480         int ret = 0;
11481
11482         while (offset < len) {
11483                 sectorsize = csum_root->sectorsize;
11484                 ret = read_extent_data(csum_root, buf, start + offset,
11485                                        &sectorsize, 0);
11486                 if (ret)
11487                         break;
11488                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11489                                             start + offset, buf, sectorsize);
11490                 if (ret)
11491                         break;
11492                 offset += sectorsize;
11493         }
11494         return ret;
11495 }
11496
11497 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11498                                       struct btrfs_root *csum_root,
11499                                       struct btrfs_root *cur_root)
11500 {
11501         struct btrfs_path path;
11502         struct btrfs_key key;
11503         struct extent_buffer *node;
11504         struct btrfs_file_extent_item *fi;
11505         char *buf = NULL;
11506         u64 start = 0;
11507         u64 len = 0;
11508         int slot = 0;
11509         int ret = 0;
11510
11511         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11512         if (!buf)
11513                 return -ENOMEM;
11514
11515         btrfs_init_path(&path);
11516         key.objectid = 0;
11517         key.offset = 0;
11518         key.type = 0;
11519         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11520         if (ret < 0)
11521                 goto out;
11522         /* Iterate all regular file extents and fill its csum */
11523         while (1) {
11524                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11525
11526                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11527                         goto next;
11528                 node = path.nodes[0];
11529                 slot = path.slots[0];
11530                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11531                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11532                         goto next;
11533                 start = btrfs_file_extent_disk_bytenr(node, fi);
11534                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11535
11536                 ret = populate_csum(trans, csum_root, buf, start, len);
11537                 if (ret == -EEXIST)
11538                         ret = 0;
11539                 if (ret < 0)
11540                         goto out;
11541 next:
11542                 /*
11543                  * TODO: if next leaf is corrupted, jump to nearest next valid
11544                  * leaf.
11545                  */
11546                 ret = btrfs_next_item(cur_root, &path);
11547                 if (ret < 0)
11548                         goto out;
11549                 if (ret > 0) {
11550                         ret = 0;
11551                         goto out;
11552                 }
11553         }
11554
11555 out:
11556         btrfs_release_path(&path);
11557         free(buf);
11558         return ret;
11559 }
11560
11561 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11562                                   struct btrfs_root *csum_root)
11563 {
11564         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11565         struct btrfs_path path;
11566         struct btrfs_root *tree_root = fs_info->tree_root;
11567         struct btrfs_root *cur_root;
11568         struct extent_buffer *node;
11569         struct btrfs_key key;
11570         int slot = 0;
11571         int ret = 0;
11572
11573         btrfs_init_path(&path);
11574         key.objectid = BTRFS_FS_TREE_OBJECTID;
11575         key.offset = 0;
11576         key.type = BTRFS_ROOT_ITEM_KEY;
11577         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11578         if (ret < 0)
11579                 goto out;
11580         if (ret > 0) {
11581                 ret = -ENOENT;
11582                 goto out;
11583         }
11584
11585         while (1) {
11586                 node = path.nodes[0];
11587                 slot = path.slots[0];
11588                 btrfs_item_key_to_cpu(node, &key, slot);
11589                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11590                         goto out;
11591                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11592                         goto next;
11593                 if (!is_fstree(key.objectid))
11594                         goto next;
11595                 key.offset = (u64)-1;
11596
11597                 cur_root = btrfs_read_fs_root(fs_info, &key);
11598                 if (IS_ERR(cur_root) || !cur_root) {
11599                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11600                                 key.objectid);
11601                         goto out;
11602                 }
11603                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11604                                 cur_root);
11605                 if (ret < 0)
11606                         goto out;
11607 next:
11608                 ret = btrfs_next_item(tree_root, &path);
11609                 if (ret > 0) {
11610                         ret = 0;
11611                         goto out;
11612                 }
11613                 if (ret < 0)
11614                         goto out;
11615         }
11616
11617 out:
11618         btrfs_release_path(&path);
11619         return ret;
11620 }
11621
11622 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11623                                       struct btrfs_root *csum_root)
11624 {
11625         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11626         struct btrfs_path path;
11627         struct btrfs_extent_item *ei;
11628         struct extent_buffer *leaf;
11629         char *buf;
11630         struct btrfs_key key;
11631         int ret;
11632
11633         btrfs_init_path(&path);
11634         key.objectid = 0;
11635         key.type = BTRFS_EXTENT_ITEM_KEY;
11636         key.offset = 0;
11637         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11638         if (ret < 0) {
11639                 btrfs_release_path(&path);
11640                 return ret;
11641         }
11642
11643         buf = malloc(csum_root->sectorsize);
11644         if (!buf) {
11645                 btrfs_release_path(&path);
11646                 return -ENOMEM;
11647         }
11648
11649         while (1) {
11650                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11651                         ret = btrfs_next_leaf(extent_root, &path);
11652                         if (ret < 0)
11653                                 break;
11654                         if (ret) {
11655                                 ret = 0;
11656                                 break;
11657                         }
11658                 }
11659                 leaf = path.nodes[0];
11660
11661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11662                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11663                         path.slots[0]++;
11664                         continue;
11665                 }
11666
11667                 ei = btrfs_item_ptr(leaf, path.slots[0],
11668                                     struct btrfs_extent_item);
11669                 if (!(btrfs_extent_flags(leaf, ei) &
11670                       BTRFS_EXTENT_FLAG_DATA)) {
11671                         path.slots[0]++;
11672                         continue;
11673                 }
11674
11675                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11676                                     key.offset);
11677                 if (ret)
11678                         break;
11679                 path.slots[0]++;
11680         }
11681
11682         btrfs_release_path(&path);
11683         free(buf);
11684         return ret;
11685 }
11686
11687 /*
11688  * Recalculate the csum and put it into the csum tree.
11689  *
11690  * Extent tree init will wipe out all the extent info, so in that case, we
11691  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11692  * will use fs/subvol trees to init the csum tree.
11693  */
11694 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11695                           struct btrfs_root *csum_root,
11696                           int search_fs_tree)
11697 {
11698         if (search_fs_tree)
11699                 return fill_csum_tree_from_fs(trans, csum_root);
11700         else
11701                 return fill_csum_tree_from_extent(trans, csum_root);
11702 }
11703
11704 static void free_roots_info_cache(void)
11705 {
11706         if (!roots_info_cache)
11707                 return;
11708
11709         while (!cache_tree_empty(roots_info_cache)) {
11710                 struct cache_extent *entry;
11711                 struct root_item_info *rii;
11712
11713                 entry = first_cache_extent(roots_info_cache);
11714                 if (!entry)
11715                         break;
11716                 remove_cache_extent(roots_info_cache, entry);
11717                 rii = container_of(entry, struct root_item_info, cache_extent);
11718                 free(rii);
11719         }
11720
11721         free(roots_info_cache);
11722         roots_info_cache = NULL;
11723 }
11724
11725 static int build_roots_info_cache(struct btrfs_fs_info *info)
11726 {
11727         int ret = 0;
11728         struct btrfs_key key;
11729         struct extent_buffer *leaf;
11730         struct btrfs_path path;
11731
11732         if (!roots_info_cache) {
11733                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11734                 if (!roots_info_cache)
11735                         return -ENOMEM;
11736                 cache_tree_init(roots_info_cache);
11737         }
11738
11739         btrfs_init_path(&path);
11740         key.objectid = 0;
11741         key.type = BTRFS_EXTENT_ITEM_KEY;
11742         key.offset = 0;
11743         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11744         if (ret < 0)
11745                 goto out;
11746         leaf = path.nodes[0];
11747
11748         while (1) {
11749                 struct btrfs_key found_key;
11750                 struct btrfs_extent_item *ei;
11751                 struct btrfs_extent_inline_ref *iref;
11752                 int slot = path.slots[0];
11753                 int type;
11754                 u64 flags;
11755                 u64 root_id;
11756                 u8 level;
11757                 struct cache_extent *entry;
11758                 struct root_item_info *rii;
11759
11760                 if (slot >= btrfs_header_nritems(leaf)) {
11761                         ret = btrfs_next_leaf(info->extent_root, &path);
11762                         if (ret < 0) {
11763                                 break;
11764                         } else if (ret) {
11765                                 ret = 0;
11766                                 break;
11767                         }
11768                         leaf = path.nodes[0];
11769                         slot = path.slots[0];
11770                 }
11771
11772                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11773
11774                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11775                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11776                         goto next;
11777
11778                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11779                 flags = btrfs_extent_flags(leaf, ei);
11780
11781                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11782                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11783                         goto next;
11784
11785                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11786                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11787                         level = found_key.offset;
11788                 } else {
11789                         struct btrfs_tree_block_info *binfo;
11790
11791                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11792                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11793                         level = btrfs_tree_block_level(leaf, binfo);
11794                 }
11795
11796                 /*
11797                  * For a root extent, it must be of the following type and the
11798                  * first (and only one) iref in the item.
11799                  */
11800                 type = btrfs_extent_inline_ref_type(leaf, iref);
11801                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11802                         goto next;
11803
11804                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11805                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11806                 if (!entry) {
11807                         rii = malloc(sizeof(struct root_item_info));
11808                         if (!rii) {
11809                                 ret = -ENOMEM;
11810                                 goto out;
11811                         }
11812                         rii->cache_extent.start = root_id;
11813                         rii->cache_extent.size = 1;
11814                         rii->level = (u8)-1;
11815                         entry = &rii->cache_extent;
11816                         ret = insert_cache_extent(roots_info_cache, entry);
11817                         ASSERT(ret == 0);
11818                 } else {
11819                         rii = container_of(entry, struct root_item_info,
11820                                            cache_extent);
11821                 }
11822
11823                 ASSERT(rii->cache_extent.start == root_id);
11824                 ASSERT(rii->cache_extent.size == 1);
11825
11826                 if (level > rii->level || rii->level == (u8)-1) {
11827                         rii->level = level;
11828                         rii->bytenr = found_key.objectid;
11829                         rii->gen = btrfs_extent_generation(leaf, ei);
11830                         rii->node_count = 1;
11831                 } else if (level == rii->level) {
11832                         rii->node_count++;
11833                 }
11834 next:
11835                 path.slots[0]++;
11836         }
11837
11838 out:
11839         btrfs_release_path(&path);
11840
11841         return ret;
11842 }
11843
11844 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11845                                   struct btrfs_path *path,
11846                                   const struct btrfs_key *root_key,
11847                                   const int read_only_mode)
11848 {
11849         const u64 root_id = root_key->objectid;
11850         struct cache_extent *entry;
11851         struct root_item_info *rii;
11852         struct btrfs_root_item ri;
11853         unsigned long offset;
11854
11855         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11856         if (!entry) {
11857                 fprintf(stderr,
11858                         "Error: could not find extent items for root %llu\n",
11859                         root_key->objectid);
11860                 return -ENOENT;
11861         }
11862
11863         rii = container_of(entry, struct root_item_info, cache_extent);
11864         ASSERT(rii->cache_extent.start == root_id);
11865         ASSERT(rii->cache_extent.size == 1);
11866
11867         if (rii->node_count != 1) {
11868                 fprintf(stderr,
11869                         "Error: could not find btree root extent for root %llu\n",
11870                         root_id);
11871                 return -ENOENT;
11872         }
11873
11874         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11875         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11876
11877         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11878             btrfs_root_level(&ri) != rii->level ||
11879             btrfs_root_generation(&ri) != rii->gen) {
11880
11881                 /*
11882                  * If we're in repair mode but our caller told us to not update
11883                  * the root item, i.e. just check if it needs to be updated, don't
11884                  * print this message, since the caller will call us again shortly
11885                  * for the same root item without read only mode (the caller will
11886                  * open a transaction first).
11887                  */
11888                 if (!(read_only_mode && repair))
11889                         fprintf(stderr,
11890                                 "%sroot item for root %llu,"
11891                                 " current bytenr %llu, current gen %llu, current level %u,"
11892                                 " new bytenr %llu, new gen %llu, new level %u\n",
11893                                 (read_only_mode ? "" : "fixing "),
11894                                 root_id,
11895                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11896                                 btrfs_root_level(&ri),
11897                                 rii->bytenr, rii->gen, rii->level);
11898
11899                 if (btrfs_root_generation(&ri) > rii->gen) {
11900                         fprintf(stderr,
11901                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11902                                 root_id, btrfs_root_generation(&ri), rii->gen);
11903                         return -EINVAL;
11904                 }
11905
11906                 if (!read_only_mode) {
11907                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11908                         btrfs_set_root_level(&ri, rii->level);
11909                         btrfs_set_root_generation(&ri, rii->gen);
11910                         write_extent_buffer(path->nodes[0], &ri,
11911                                             offset, sizeof(ri));
11912                 }
11913
11914                 return 1;
11915         }
11916
11917         return 0;
11918 }
11919
11920 /*
11921  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11922  * caused read-only snapshots to be corrupted if they were created at a moment
11923  * when the source subvolume/snapshot had orphan items. The issue was that the
11924  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11925  * node instead of the post orphan cleanup root node.
11926  * So this function, and its callees, just detects and fixes those cases. Even
11927  * though the regression was for read-only snapshots, this function applies to
11928  * any snapshot/subvolume root.
11929  * This must be run before any other repair code - not doing it so, makes other
11930  * repair code delete or modify backrefs in the extent tree for example, which
11931  * will result in an inconsistent fs after repairing the root items.
11932  */
11933 static int repair_root_items(struct btrfs_fs_info *info)
11934 {
11935         struct btrfs_path path;
11936         struct btrfs_key key;
11937         struct extent_buffer *leaf;
11938         struct btrfs_trans_handle *trans = NULL;
11939         int ret = 0;
11940         int bad_roots = 0;
11941         int need_trans = 0;
11942
11943         btrfs_init_path(&path);
11944
11945         ret = build_roots_info_cache(info);
11946         if (ret)
11947                 goto out;
11948
11949         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11950         key.type = BTRFS_ROOT_ITEM_KEY;
11951         key.offset = 0;
11952
11953 again:
11954         /*
11955          * Avoid opening and committing transactions if a leaf doesn't have
11956          * any root items that need to be fixed, so that we avoid rotating
11957          * backup roots unnecessarily.
11958          */
11959         if (need_trans) {
11960                 trans = btrfs_start_transaction(info->tree_root, 1);
11961                 if (IS_ERR(trans)) {
11962                         ret = PTR_ERR(trans);
11963                         goto out;
11964                 }
11965         }
11966
11967         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11968                                 0, trans ? 1 : 0);
11969         if (ret < 0)
11970                 goto out;
11971         leaf = path.nodes[0];
11972
11973         while (1) {
11974                 struct btrfs_key found_key;
11975
11976                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11977                         int no_more_keys = find_next_key(&path, &key);
11978
11979                         btrfs_release_path(&path);
11980                         if (trans) {
11981                                 ret = btrfs_commit_transaction(trans,
11982                                                                info->tree_root);
11983                                 trans = NULL;
11984                                 if (ret < 0)
11985                                         goto out;
11986                         }
11987                         need_trans = 0;
11988                         if (no_more_keys)
11989                                 break;
11990                         goto again;
11991                 }
11992
11993                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11994
11995                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11996                         goto next;
11997                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11998                         goto next;
11999
12000                 ret = maybe_repair_root_item(info, &path, &found_key,
12001                                              trans ? 0 : 1);
12002                 if (ret < 0)
12003                         goto out;
12004                 if (ret) {
12005                         if (!trans && repair) {
12006                                 need_trans = 1;
12007                                 key = found_key;
12008                                 btrfs_release_path(&path);
12009                                 goto again;
12010                         }
12011                         bad_roots++;
12012                 }
12013 next:
12014                 path.slots[0]++;
12015         }
12016         ret = 0;
12017 out:
12018         free_roots_info_cache();
12019         btrfs_release_path(&path);
12020         if (trans)
12021                 btrfs_commit_transaction(trans, info->tree_root);
12022         if (ret < 0)
12023                 return ret;
12024
12025         return bad_roots;
12026 }
12027
12028 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12029 {
12030         struct btrfs_trans_handle *trans;
12031         struct btrfs_block_group_cache *bg_cache;
12032         u64 current = 0;
12033         int ret = 0;
12034
12035         /* Clear all free space cache inodes and its extent data */
12036         while (1) {
12037                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12038                 if (!bg_cache)
12039                         break;
12040                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12041                 if (ret < 0)
12042                         return ret;
12043                 current = bg_cache->key.objectid + bg_cache->key.offset;
12044         }
12045
12046         /* Don't forget to set cache_generation to -1 */
12047         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12048         if (IS_ERR(trans)) {
12049                 error("failed to update super block cache generation");
12050                 return PTR_ERR(trans);
12051         }
12052         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12053         btrfs_commit_transaction(trans, fs_info->tree_root);
12054
12055         return ret;
12056 }
12057
12058 const char * const cmd_check_usage[] = {
12059         "btrfs check [options] <device>",
12060         "Check structural integrity of a filesystem (unmounted).",
12061         "Check structural integrity of an unmounted filesystem. Verify internal",
12062         "trees' consistency and item connectivity. In the repair mode try to",
12063         "fix the problems found. ",
12064         "WARNING: the repair mode is considered dangerous",
12065         "",
12066         "-s|--super <superblock>     use this superblock copy",
12067         "-b|--backup                 use the first valid backup root copy",
12068         "--repair                    try to repair the filesystem",
12069         "--readonly                  run in read-only mode (default)",
12070         "--init-csum-tree            create a new CRC tree",
12071         "--init-extent-tree          create a new extent tree",
12072         "--mode <MODE>               allows choice of memory/IO trade-offs",
12073         "                            where MODE is one of:",
12074         "                            original - read inodes and extents to memory (requires",
12075         "                                       more memory, does less IO)",
12076         "                            lowmem   - try to use less memory but read blocks again",
12077         "                                       when needed",
12078         "--check-data-csum           verify checksums of data blocks",
12079         "-Q|--qgroup-report          print a report on qgroup consistency",
12080         "-E|--subvol-extents <subvolid>",
12081         "                            print subvolume extents and sharing state",
12082         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12083         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12084         "-p|--progress               indicate progress",
12085         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12086         NULL
12087 };
12088
12089 int cmd_check(int argc, char **argv)
12090 {
12091         struct cache_tree root_cache;
12092         struct btrfs_root *root;
12093         struct btrfs_fs_info *info;
12094         u64 bytenr = 0;
12095         u64 subvolid = 0;
12096         u64 tree_root_bytenr = 0;
12097         u64 chunk_root_bytenr = 0;
12098         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12099         int ret;
12100         u64 num;
12101         int init_csum_tree = 0;
12102         int readonly = 0;
12103         int clear_space_cache = 0;
12104         int qgroup_report = 0;
12105         int qgroups_repaired = 0;
12106         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12107
12108         while(1) {
12109                 int c;
12110                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12111                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12112                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12113                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12114                 static const struct option long_options[] = {
12115                         { "super", required_argument, NULL, 's' },
12116                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12117                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12118                         { "init-csum-tree", no_argument, NULL,
12119                                 GETOPT_VAL_INIT_CSUM },
12120                         { "init-extent-tree", no_argument, NULL,
12121                                 GETOPT_VAL_INIT_EXTENT },
12122                         { "check-data-csum", no_argument, NULL,
12123                                 GETOPT_VAL_CHECK_CSUM },
12124                         { "backup", no_argument, NULL, 'b' },
12125                         { "subvol-extents", required_argument, NULL, 'E' },
12126                         { "qgroup-report", no_argument, NULL, 'Q' },
12127                         { "tree-root", required_argument, NULL, 'r' },
12128                         { "chunk-root", required_argument, NULL,
12129                                 GETOPT_VAL_CHUNK_TREE },
12130                         { "progress", no_argument, NULL, 'p' },
12131                         { "mode", required_argument, NULL,
12132                                 GETOPT_VAL_MODE },
12133                         { "clear-space-cache", required_argument, NULL,
12134                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12135                         { NULL, 0, NULL, 0}
12136                 };
12137
12138                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12139                 if (c < 0)
12140                         break;
12141                 switch(c) {
12142                         case 'a': /* ignored */ break;
12143                         case 'b':
12144                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12145                                 break;
12146                         case 's':
12147                                 num = arg_strtou64(optarg);
12148                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12149                                         error(
12150                                         "super mirror should be less than %d",
12151                                                 BTRFS_SUPER_MIRROR_MAX);
12152                                         exit(1);
12153                                 }
12154                                 bytenr = btrfs_sb_offset(((int)num));
12155                                 printf("using SB copy %llu, bytenr %llu\n", num,
12156                                        (unsigned long long)bytenr);
12157                                 break;
12158                         case 'Q':
12159                                 qgroup_report = 1;
12160                                 break;
12161                         case 'E':
12162                                 subvolid = arg_strtou64(optarg);
12163                                 break;
12164                         case 'r':
12165                                 tree_root_bytenr = arg_strtou64(optarg);
12166                                 break;
12167                         case GETOPT_VAL_CHUNK_TREE:
12168                                 chunk_root_bytenr = arg_strtou64(optarg);
12169                                 break;
12170                         case 'p':
12171                                 ctx.progress_enabled = true;
12172                                 break;
12173                         case '?':
12174                         case 'h':
12175                                 usage(cmd_check_usage);
12176                         case GETOPT_VAL_REPAIR:
12177                                 printf("enabling repair mode\n");
12178                                 repair = 1;
12179                                 ctree_flags |= OPEN_CTREE_WRITES;
12180                                 break;
12181                         case GETOPT_VAL_READONLY:
12182                                 readonly = 1;
12183                                 break;
12184                         case GETOPT_VAL_INIT_CSUM:
12185                                 printf("Creating a new CRC tree\n");
12186                                 init_csum_tree = 1;
12187                                 repair = 1;
12188                                 ctree_flags |= OPEN_CTREE_WRITES;
12189                                 break;
12190                         case GETOPT_VAL_INIT_EXTENT:
12191                                 init_extent_tree = 1;
12192                                 ctree_flags |= (OPEN_CTREE_WRITES |
12193                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12194                                 repair = 1;
12195                                 break;
12196                         case GETOPT_VAL_CHECK_CSUM:
12197                                 check_data_csum = 1;
12198                                 break;
12199                         case GETOPT_VAL_MODE:
12200                                 check_mode = parse_check_mode(optarg);
12201                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12202                                         error("unknown mode: %s", optarg);
12203                                         exit(1);
12204                                 }
12205                                 break;
12206                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12207                                 if (strcmp(optarg, "v1") == 0) {
12208                                         clear_space_cache = 1;
12209                                 } else if (strcmp(optarg, "v2") == 0) {
12210                                         clear_space_cache = 2;
12211                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12212                                 } else {
12213                                         error(
12214                 "invalid argument to --clear-space-cache, must be v1 or v2");
12215                                         exit(1);
12216                                 }
12217                                 ctree_flags |= OPEN_CTREE_WRITES;
12218                                 break;
12219                 }
12220         }
12221
12222         if (check_argc_exact(argc - optind, 1))
12223                 usage(cmd_check_usage);
12224
12225         if (ctx.progress_enabled) {
12226                 ctx.tp = TASK_NOTHING;
12227                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12228         }
12229
12230         /* This check is the only reason for --readonly to exist */
12231         if (readonly && repair) {
12232                 error("repair options are not compatible with --readonly");
12233                 exit(1);
12234         }
12235
12236         /*
12237          * Not supported yet
12238          */
12239         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12240                 error("low memory mode doesn't support repair yet");
12241                 exit(1);
12242         }
12243
12244         radix_tree_init();
12245         cache_tree_init(&root_cache);
12246
12247         if((ret = check_mounted(argv[optind])) < 0) {
12248                 error("could not check mount status: %s", strerror(-ret));
12249                 goto err_out;
12250         } else if(ret) {
12251                 error("%s is currently mounted, aborting", argv[optind]);
12252                 ret = -EBUSY;
12253                 goto err_out;
12254         }
12255
12256         /* only allow partial opening under repair mode */
12257         if (repair)
12258                 ctree_flags |= OPEN_CTREE_PARTIAL;
12259
12260         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12261                                   chunk_root_bytenr, ctree_flags);
12262         if (!info) {
12263                 error("cannot open file system");
12264                 ret = -EIO;
12265                 goto err_out;
12266         }
12267
12268         global_info = info;
12269         root = info->fs_root;
12270         if (clear_space_cache == 1) {
12271                 if (btrfs_fs_compat_ro(info,
12272                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12273                         error(
12274                 "free space cache v2 detected, use --clear-space-cache v2");
12275                         ret = 1;
12276                         goto close_out;
12277                 }
12278                 printf("Clearing free space cache\n");
12279                 ret = clear_free_space_cache(info);
12280                 if (ret) {
12281                         error("failed to clear free space cache");
12282                         ret = 1;
12283                 } else {
12284                         printf("Free space cache cleared\n");
12285                 }
12286                 goto close_out;
12287         } else if (clear_space_cache == 2) {
12288                 if (!btrfs_fs_compat_ro(info,
12289                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12290                         printf("no free space cache v2 to clear\n");
12291                         ret = 0;
12292                         goto close_out;
12293                 }
12294                 printf("Clear free space cache v2\n");
12295                 ret = btrfs_clear_free_space_tree(info);
12296                 if (ret) {
12297                         error("failed to clear free space cache v2: %d", ret);
12298                         ret = 1;
12299                 } else {
12300                         printf("free space cache v2 cleared\n");
12301                 }
12302                 goto close_out;
12303         }
12304
12305         /*
12306          * repair mode will force us to commit transaction which
12307          * will make us fail to load log tree when mounting.
12308          */
12309         if (repair && btrfs_super_log_root(info->super_copy)) {
12310                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12311                 if (!ret) {
12312                         ret = 1;
12313                         goto close_out;
12314                 }
12315                 ret = zero_log_tree(root);
12316                 if (ret) {
12317                         error("failed to zero log tree: %d", ret);
12318                         goto close_out;
12319                 }
12320         }
12321
12322         uuid_unparse(info->super_copy->fsid, uuidbuf);
12323         if (qgroup_report) {
12324                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12325                        uuidbuf);
12326                 ret = qgroup_verify_all(info);
12327                 if (ret == 0)
12328                         report_qgroups(1);
12329                 goto close_out;
12330         }
12331         if (subvolid) {
12332                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12333                        subvolid, argv[optind], uuidbuf);
12334                 ret = print_extent_state(info, subvolid);
12335                 goto close_out;
12336         }
12337         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12338
12339         if (!extent_buffer_uptodate(info->tree_root->node) ||
12340             !extent_buffer_uptodate(info->dev_root->node) ||
12341             !extent_buffer_uptodate(info->chunk_root->node)) {
12342                 error("critical roots corrupted, unable to check the filesystem");
12343                 ret = -EIO;
12344                 goto close_out;
12345         }
12346
12347         if (init_extent_tree || init_csum_tree) {
12348                 struct btrfs_trans_handle *trans;
12349
12350                 trans = btrfs_start_transaction(info->extent_root, 0);
12351                 if (IS_ERR(trans)) {
12352                         error("error starting transaction");
12353                         ret = PTR_ERR(trans);
12354                         goto close_out;
12355                 }
12356
12357                 if (init_extent_tree) {
12358                         printf("Creating a new extent tree\n");
12359                         ret = reinit_extent_tree(trans, info);
12360                         if (ret)
12361                                 goto close_out;
12362                 }
12363
12364                 if (init_csum_tree) {
12365                         printf("Reinitialize checksum tree\n");
12366                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12367                         if (ret) {
12368                                 error("checksum tree initialization failed: %d",
12369                                                 ret);
12370                                 ret = -EIO;
12371                                 goto close_out;
12372                         }
12373
12374                         ret = fill_csum_tree(trans, info->csum_root,
12375                                              init_extent_tree);
12376                         if (ret) {
12377                                 error("checksum tree refilling failed: %d", ret);
12378                                 return -EIO;
12379                         }
12380                 }
12381                 /*
12382                  * Ok now we commit and run the normal fsck, which will add
12383                  * extent entries for all of the items it finds.
12384                  */
12385                 ret = btrfs_commit_transaction(trans, info->extent_root);
12386                 if (ret)
12387                         goto close_out;
12388         }
12389         if (!extent_buffer_uptodate(info->extent_root->node)) {
12390                 error("critical: extent_root, unable to check the filesystem");
12391                 ret = -EIO;
12392                 goto close_out;
12393         }
12394         if (!extent_buffer_uptodate(info->csum_root->node)) {
12395                 error("critical: csum_root, unable to check the filesystem");
12396                 ret = -EIO;
12397                 goto close_out;
12398         }
12399
12400         if (!ctx.progress_enabled)
12401                 fprintf(stderr, "checking extents\n");
12402         if (check_mode == CHECK_MODE_LOWMEM)
12403                 ret = check_chunks_and_extents_v2(root);
12404         else
12405                 ret = check_chunks_and_extents(root);
12406         if (ret)
12407                 error(
12408                 "errors found in extent allocation tree or chunk allocation");
12409
12410         ret = repair_root_items(info);
12411         if (ret < 0)
12412                 goto close_out;
12413         if (repair) {
12414                 fprintf(stderr, "Fixed %d roots.\n", ret);
12415                 ret = 0;
12416         } else if (ret > 0) {
12417                 fprintf(stderr,
12418                        "Found %d roots with an outdated root item.\n",
12419                        ret);
12420                 fprintf(stderr,
12421                         "Please run a filesystem check with the option --repair to fix them.\n");
12422                 ret = 1;
12423                 goto close_out;
12424         }
12425
12426         if (!ctx.progress_enabled) {
12427                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12428                         fprintf(stderr, "checking free space tree\n");
12429                 else
12430                         fprintf(stderr, "checking free space cache\n");
12431         }
12432         ret = check_space_cache(root);
12433         if (ret)
12434                 goto out;
12435
12436         /*
12437          * We used to have to have these hole extents in between our real
12438          * extents so if we don't have this flag set we need to make sure there
12439          * are no gaps in the file extents for inodes, otherwise we can just
12440          * ignore it when this happens.
12441          */
12442         no_holes = btrfs_fs_incompat(root->fs_info,
12443                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12444         if (!ctx.progress_enabled)
12445                 fprintf(stderr, "checking fs roots\n");
12446         ret = check_fs_roots(root, &root_cache);
12447         if (ret)
12448                 goto out;
12449
12450         fprintf(stderr, "checking csums\n");
12451         ret = check_csums(root);
12452         if (ret)
12453                 goto out;
12454
12455         fprintf(stderr, "checking root refs\n");
12456         ret = check_root_refs(root, &root_cache);
12457         if (ret)
12458                 goto out;
12459
12460         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12461                 struct extent_buffer *eb;
12462
12463                 eb = list_first_entry(&root->fs_info->recow_ebs,
12464                                       struct extent_buffer, recow);
12465                 list_del_init(&eb->recow);
12466                 ret = recow_extent_buffer(root, eb);
12467                 if (ret)
12468                         break;
12469         }
12470
12471         while (!list_empty(&delete_items)) {
12472                 struct bad_item *bad;
12473
12474                 bad = list_first_entry(&delete_items, struct bad_item, list);
12475                 list_del_init(&bad->list);
12476                 if (repair)
12477                         ret = delete_bad_item(root, bad);
12478                 free(bad);
12479         }
12480
12481         if (info->quota_enabled) {
12482                 int err;
12483                 fprintf(stderr, "checking quota groups\n");
12484                 err = qgroup_verify_all(info);
12485                 if (err)
12486                         goto out;
12487                 report_qgroups(0);
12488                 err = repair_qgroups(info, &qgroups_repaired);
12489                 if (err)
12490                         goto out;
12491         }
12492
12493         if (!list_empty(&root->fs_info->recow_ebs)) {
12494                 error("transid errors in file system");
12495                 ret = 1;
12496         }
12497 out:
12498         /* Don't override original ret */
12499         if (!ret && qgroups_repaired)
12500                 ret = qgroups_repaired;
12501
12502         if (found_old_backref) { /*
12503                  * there was a disk format change when mixed
12504                  * backref was in testing tree. The old format
12505                  * existed about one week.
12506                  */
12507                 printf("\n * Found old mixed backref format. "
12508                        "The old format is not supported! *"
12509                        "\n * Please mount the FS in readonly mode, "
12510                        "backup data and re-format the FS. *\n\n");
12511                 ret = 1;
12512         }
12513         printf("found %llu bytes used err is %d\n",
12514                (unsigned long long)bytes_used, ret);
12515         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12516         printf("total tree bytes: %llu\n",
12517                (unsigned long long)total_btree_bytes);
12518         printf("total fs tree bytes: %llu\n",
12519                (unsigned long long)total_fs_tree_bytes);
12520         printf("total extent tree bytes: %llu\n",
12521                (unsigned long long)total_extent_tree_bytes);
12522         printf("btree space waste bytes: %llu\n",
12523                (unsigned long long)btree_space_waste);
12524         printf("file data blocks allocated: %llu\n referenced %llu\n",
12525                 (unsigned long long)data_bytes_allocated,
12526                 (unsigned long long)data_bytes_referenced);
12527
12528         free_qgroup_counts();
12529         free_root_recs_tree(&root_cache);
12530 close_out:
12531         close_ctree(root);
12532 err_out:
12533         if (ctx.progress_enabled)
12534                 task_deinit(ctx.info);
12535
12536         return ret;
12537 }