btrfs-progs: check: introduce function to check inode item
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
3843 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
3844
3845 /*
3846  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3847  * INODE_REF/INODE_EXTREF match.
3848  *
3849  * @root:       the root of the fs/file tree
3850  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3851  * @key:        the key of the DIR_ITEM/DIR_INDEX
3852  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3853  *              distinguish root_dir between normal dir/file
3854  * @name:       the name in the INODE_REF/INODE_EXTREF
3855  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3856  * @mode:       the st_mode of INODE_ITEM
3857  *
3858  * Return 0 if no error occurred.
3859  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3860  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3861  * dir/file.
3862  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3863  * not match for normal dir/file.
3864  */
3865 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3866                          struct btrfs_key *key, u64 index, char *name,
3867                          u32 namelen, u32 mode)
3868 {
3869         struct btrfs_path path;
3870         struct extent_buffer *node;
3871         struct btrfs_dir_item *di;
3872         struct btrfs_key location;
3873         char namebuf[BTRFS_NAME_LEN] = {0};
3874         u32 total;
3875         u32 cur = 0;
3876         u32 len;
3877         u32 name_len;
3878         u32 data_len;
3879         u8 filetype;
3880         int slot;
3881         int ret;
3882
3883         btrfs_init_path(&path);
3884         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3885         if (ret < 0) {
3886                 ret = DIR_ITEM_MISSING;
3887                 goto out;
3888         }
3889
3890         /* Process root dir and goto out*/
3891         if (index == 0) {
3892                 if (ret == 0) {
3893                         ret = ROOT_DIR_ERROR;
3894                         error(
3895                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3896                                 root->objectid,
3897                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3898                                         "REF" : "EXTREF",
3899                                 ref_key->objectid, ref_key->offset,
3900                                 key->type == BTRFS_DIR_ITEM_KEY ?
3901                                         "DIR_ITEM" : "DIR_INDEX");
3902                 } else {
3903                         ret = 0;
3904                 }
3905
3906                 goto out;
3907         }
3908
3909         /* Process normal file/dir */
3910         if (ret > 0) {
3911                 ret = DIR_ITEM_MISSING;
3912                 error(
3913                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3914                         root->objectid,
3915                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3916                         ref_key->objectid, ref_key->offset,
3917                         key->type == BTRFS_DIR_ITEM_KEY ?
3918                                 "DIR_ITEM" : "DIR_INDEX",
3919                         key->objectid, key->offset, namelen, name,
3920                         imode_to_type(mode));
3921                 goto out;
3922         }
3923
3924         /* Check whether inode_id/filetype/name match */
3925         node = path.nodes[0];
3926         slot = path.slots[0];
3927         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3928         total = btrfs_item_size_nr(node, slot);
3929         while (cur < total) {
3930                 ret = DIR_ITEM_MISMATCH;
3931                 name_len = btrfs_dir_name_len(node, di);
3932                 data_len = btrfs_dir_data_len(node, di);
3933
3934                 btrfs_dir_item_key_to_cpu(node, di, &location);
3935                 if (location.objectid != ref_key->objectid ||
3936                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3937                     location.offset != 0)
3938                         goto next;
3939
3940                 filetype = btrfs_dir_type(node, di);
3941                 if (imode_to_type(mode) != filetype)
3942                         goto next;
3943
3944                 if (name_len <= BTRFS_NAME_LEN) {
3945                         len = name_len;
3946                 } else {
3947                         len = BTRFS_NAME_LEN;
3948                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3949                         root->objectid,
3950                         key->type == BTRFS_DIR_ITEM_KEY ?
3951                         "DIR_ITEM" : "DIR_INDEX",
3952                         key->objectid, key->offset, name_len);
3953                 }
3954                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3955                 if (len != namelen || strncmp(namebuf, name, len))
3956                         goto next;
3957
3958                 ret = 0;
3959                 goto out;
3960 next:
3961                 len = sizeof(*di) + name_len + data_len;
3962                 di = (struct btrfs_dir_item *)((char *)di + len);
3963                 cur += len;
3964         }
3965         if (ret == DIR_ITEM_MISMATCH)
3966                 error(
3967                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3968                         root->objectid,
3969                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3970                         ref_key->objectid, ref_key->offset,
3971                         key->type == BTRFS_DIR_ITEM_KEY ?
3972                                 "DIR_ITEM" : "DIR_INDEX",
3973                         key->objectid, key->offset, namelen, name,
3974                         imode_to_type(mode));
3975 out:
3976         btrfs_release_path(&path);
3977         return ret;
3978 }
3979
3980 /*
3981  * Traverse the given INODE_REF and call find_dir_item() to find related
3982  * DIR_ITEM/DIR_INDEX.
3983  *
3984  * @root:       the root of the fs/file tree
3985  * @ref_key:    the key of the INODE_REF
3986  * @refs:       the count of INODE_REF
3987  * @mode:       the st_mode of INODE_ITEM
3988  *
3989  * Return 0 if no error occurred.
3990  */
3991 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3992                            struct extent_buffer *node, int slot, u64 *refs,
3993                            int mode)
3994 {
3995         struct btrfs_key key;
3996         struct btrfs_inode_ref *ref;
3997         char namebuf[BTRFS_NAME_LEN] = {0};
3998         u32 total;
3999         u32 cur = 0;
4000         u32 len;
4001         u32 name_len;
4002         u64 index;
4003         int ret, err = 0;
4004
4005         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4006         total = btrfs_item_size_nr(node, slot);
4007
4008 next:
4009         /* Update inode ref count */
4010         (*refs)++;
4011
4012         index = btrfs_inode_ref_index(node, ref);
4013         name_len = btrfs_inode_ref_name_len(node, ref);
4014         if (name_len <= BTRFS_NAME_LEN) {
4015                 len = name_len;
4016         } else {
4017                 len = BTRFS_NAME_LEN;
4018                 warning("root %llu INODE_REF[%llu %llu] name too long",
4019                         root->objectid, ref_key->objectid, ref_key->offset);
4020         }
4021
4022         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4023
4024         /* Check root dir ref name */
4025         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4026                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4027                       root->objectid, ref_key->objectid, ref_key->offset,
4028                       namebuf);
4029                 err |= ROOT_DIR_ERROR;
4030         }
4031
4032         /* Find related DIR_INDEX */
4033         key.objectid = ref_key->offset;
4034         key.type = BTRFS_DIR_INDEX_KEY;
4035         key.offset = index;
4036         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4037         err |= ret;
4038
4039         /* Find related dir_item */
4040         key.objectid = ref_key->offset;
4041         key.type = BTRFS_DIR_ITEM_KEY;
4042         key.offset = btrfs_name_hash(namebuf, len);
4043         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4044         err |= ret;
4045
4046         len = sizeof(*ref) + name_len;
4047         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4048         cur += len;
4049         if (cur < total)
4050                 goto next;
4051
4052         return err;
4053 }
4054
4055 /*
4056  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4057  * DIR_ITEM/DIR_INDEX.
4058  *
4059  * @root:       the root of the fs/file tree
4060  * @ref_key:    the key of the INODE_EXTREF
4061  * @refs:       the count of INODE_EXTREF
4062  * @mode:       the st_mode of INODE_ITEM
4063  *
4064  * Return 0 if no error occurred.
4065  */
4066 static int check_inode_extref(struct btrfs_root *root,
4067                               struct btrfs_key *ref_key,
4068                               struct extent_buffer *node, int slot, u64 *refs,
4069                               int mode)
4070 {
4071         struct btrfs_key key;
4072         struct btrfs_inode_extref *extref;
4073         char namebuf[BTRFS_NAME_LEN] = {0};
4074         u32 total;
4075         u32 cur = 0;
4076         u32 len;
4077         u32 name_len;
4078         u64 index;
4079         u64 parent;
4080         int ret;
4081         int err = 0;
4082
4083         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4084         total = btrfs_item_size_nr(node, slot);
4085
4086 next:
4087         /* update inode ref count */
4088         (*refs)++;
4089         name_len = btrfs_inode_extref_name_len(node, extref);
4090         index = btrfs_inode_extref_index(node, extref);
4091         parent = btrfs_inode_extref_parent(node, extref);
4092         if (name_len <= BTRFS_NAME_LEN) {
4093                 len = name_len;
4094         } else {
4095                 len = BTRFS_NAME_LEN;
4096                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4097                         root->objectid, ref_key->objectid, ref_key->offset);
4098         }
4099         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4100
4101         /* Check root dir ref name */
4102         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4103                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4104                       root->objectid, ref_key->objectid, ref_key->offset,
4105                       namebuf);
4106                 err |= ROOT_DIR_ERROR;
4107         }
4108
4109         /* find related dir_index */
4110         key.objectid = parent;
4111         key.type = BTRFS_DIR_INDEX_KEY;
4112         key.offset = index;
4113         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4114         err |= ret;
4115
4116         /* find related dir_item */
4117         key.objectid = parent;
4118         key.type = BTRFS_DIR_ITEM_KEY;
4119         key.offset = btrfs_name_hash(namebuf, len);
4120         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4121         err |= ret;
4122
4123         len = sizeof(*extref) + name_len;
4124         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4125         cur += len;
4126
4127         if (cur < total)
4128                 goto next;
4129
4130         return err;
4131 }
4132
4133 /*
4134  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4135  * DIR_ITEM/DIR_INDEX match.
4136  *
4137  * @root:       the root of the fs/file tree
4138  * @key:        the key of the INODE_REF/INODE_EXTREF
4139  * @name:       the name in the INODE_REF/INODE_EXTREF
4140  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4141  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4142  * to (u64)-1
4143  * @ext_ref:    the EXTENDED_IREF feature
4144  *
4145  * Return 0 if no error occurred.
4146  * Return >0 for error bitmap
4147  */
4148 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4149                           char *name, int namelen, u64 index,
4150                           unsigned int ext_ref)
4151 {
4152         struct btrfs_path path;
4153         struct btrfs_inode_ref *ref;
4154         struct btrfs_inode_extref *extref;
4155         struct extent_buffer *node;
4156         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4157         u32 total;
4158         u32 cur = 0;
4159         u32 len;
4160         u32 ref_namelen;
4161         u64 ref_index;
4162         u64 parent;
4163         u64 dir_id;
4164         int slot;
4165         int ret;
4166
4167         btrfs_init_path(&path);
4168         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4169         if (ret) {
4170                 ret = INODE_REF_MISSING;
4171                 goto extref;
4172         }
4173
4174         node = path.nodes[0];
4175         slot = path.slots[0];
4176
4177         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4178         total = btrfs_item_size_nr(node, slot);
4179
4180         /* Iterate all entry of INODE_REF */
4181         while (cur < total) {
4182                 ret = INODE_REF_MISSING;
4183
4184                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4185                 ref_index = btrfs_inode_ref_index(node, ref);
4186                 if (index != (u64)-1 && index != ref_index)
4187                         goto next_ref;
4188
4189                 if (ref_namelen <= BTRFS_NAME_LEN) {
4190                         len = ref_namelen;
4191                 } else {
4192                         len = BTRFS_NAME_LEN;
4193                         warning("root %llu INODE %s[%llu %llu] name too long",
4194                                 root->objectid,
4195                                 key->type == BTRFS_INODE_REF_KEY ?
4196                                         "REF" : "EXTREF",
4197                                 key->objectid, key->offset);
4198                 }
4199                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4200                                    len);
4201
4202                 if (len != namelen || strncmp(ref_namebuf, name, len))
4203                         goto next_ref;
4204
4205                 ret = 0;
4206                 goto out;
4207 next_ref:
4208                 len = sizeof(*ref) + ref_namelen;
4209                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4210                 cur += len;
4211         }
4212
4213 extref:
4214         /* Skip if not support EXTENDED_IREF feature */
4215         if (!ext_ref)
4216                 goto out;
4217
4218         btrfs_release_path(&path);
4219         btrfs_init_path(&path);
4220
4221         dir_id = key->offset;
4222         key->type = BTRFS_INODE_EXTREF_KEY;
4223         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4224
4225         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4226         if (ret) {
4227                 ret = INODE_REF_MISSING;
4228                 goto out;
4229         }
4230
4231         node = path.nodes[0];
4232         slot = path.slots[0];
4233
4234         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4235         cur = 0;
4236         total = btrfs_item_size_nr(node, slot);
4237
4238         /* Iterate all entry of INODE_EXTREF */
4239         while (cur < total) {
4240                 ret = INODE_REF_MISSING;
4241
4242                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4243                 ref_index = btrfs_inode_extref_index(node, extref);
4244                 parent = btrfs_inode_extref_parent(node, extref);
4245                 if (index != (u64)-1 && index != ref_index)
4246                         goto next_extref;
4247
4248                 if (parent != dir_id)
4249                         goto next_extref;
4250
4251                 if (ref_namelen <= BTRFS_NAME_LEN) {
4252                         len = ref_namelen;
4253                 } else {
4254                         len = BTRFS_NAME_LEN;
4255                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4256                                 root->objectid,
4257                                 key->type == BTRFS_INODE_REF_KEY ?
4258                                         "REF" : "EXTREF",
4259                                 key->objectid, key->offset);
4260                 }
4261                 read_extent_buffer(node, ref_namebuf,
4262                                    (unsigned long)(extref + 1), len);
4263
4264                 if (len != namelen || strncmp(ref_namebuf, name, len))
4265                         goto next_extref;
4266
4267                 ret = 0;
4268                 goto out;
4269
4270 next_extref:
4271                 len = sizeof(*extref) + ref_namelen;
4272                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4273                 cur += len;
4274
4275         }
4276 out:
4277         btrfs_release_path(&path);
4278         return ret;
4279 }
4280
4281 /*
4282  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4283  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4284  *
4285  * @root:       the root of the fs/file tree
4286  * @key:        the key of the INODE_REF/INODE_EXTREF
4287  * @size:       the st_size of the INODE_ITEM
4288  * @ext_ref:    the EXTENDED_IREF feature
4289  *
4290  * Return 0 if no error occurred.
4291  */
4292 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4293                           struct extent_buffer *node, int slot, u64 *size,
4294                           unsigned int ext_ref)
4295 {
4296         struct btrfs_dir_item *di;
4297         struct btrfs_inode_item *ii;
4298         struct btrfs_path path;
4299         struct btrfs_key location;
4300         char namebuf[BTRFS_NAME_LEN] = {0};
4301         u32 total;
4302         u32 cur = 0;
4303         u32 len;
4304         u32 name_len;
4305         u32 data_len;
4306         u8 filetype;
4307         u32 mode;
4308         u64 index;
4309         int ret;
4310         int err = 0;
4311
4312         /*
4313          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4314          * ignore index check.
4315          */
4316         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4317
4318         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4319         total = btrfs_item_size_nr(node, slot);
4320
4321         while (cur < total) {
4322                 data_len = btrfs_dir_data_len(node, di);
4323                 if (data_len)
4324                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4325                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4326                               "DIR_ITEM" : "DIR_INDEX",
4327                               key->objectid, key->offset, data_len);
4328
4329                 name_len = btrfs_dir_name_len(node, di);
4330                 if (name_len <= BTRFS_NAME_LEN) {
4331                         len = name_len;
4332                 } else {
4333                         len = BTRFS_NAME_LEN;
4334                         warning("root %llu %s[%llu %llu] name too long",
4335                                 root->objectid,
4336                                 key->type == BTRFS_DIR_ITEM_KEY ?
4337                                 "DIR_ITEM" : "DIR_INDEX",
4338                                 key->objectid, key->offset);
4339                 }
4340                 (*size) += name_len;
4341
4342                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4343                 filetype = btrfs_dir_type(node, di);
4344
4345                 btrfs_init_path(&path);
4346                 btrfs_dir_item_key_to_cpu(node, di, &location);
4347
4348                 /* Ignore related ROOT_ITEM check */
4349                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4350                         goto next;
4351
4352                 /* Check relative INODE_ITEM(existence/filetype) */
4353                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4354                 if (ret) {
4355                         err |= INODE_ITEM_MISSING;
4356                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4357                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4358                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4359                               key->offset, location.objectid, name_len,
4360                               namebuf, filetype);
4361                         goto next;
4362                 }
4363
4364                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4365                                     struct btrfs_inode_item);
4366                 mode = btrfs_inode_mode(path.nodes[0], ii);
4367
4368                 if (imode_to_type(mode) != filetype) {
4369                         err |= INODE_ITEM_MISMATCH;
4370                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4371                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4372                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4373                               key->offset, name_len, namebuf, filetype);
4374                 }
4375
4376                 /* Check relative INODE_REF/INODE_EXTREF */
4377                 location.type = BTRFS_INODE_REF_KEY;
4378                 location.offset = key->objectid;
4379                 ret = find_inode_ref(root, &location, namebuf, len,
4380                                        index, ext_ref);
4381                 err |= ret;
4382                 if (ret & INODE_REF_MISSING)
4383                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4384                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4385                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4386                               key->offset, name_len, namebuf, filetype);
4387
4388 next:
4389                 btrfs_release_path(&path);
4390                 len = sizeof(*di) + name_len + data_len;
4391                 di = (struct btrfs_dir_item *)((char *)di + len);
4392                 cur += len;
4393
4394                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4395                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4396                               root->objectid, key->objectid, key->offset);
4397                         break;
4398                 }
4399         }
4400
4401         return err;
4402 }
4403
4404 /*
4405  * Check file extent datasum/hole, update the size of the file extents,
4406  * check and update the last offset of the file extent.
4407  *
4408  * @root:       the root of fs/file tree.
4409  * @fkey:       the key of the file extent.
4410  * @nodatasum:  INODE_NODATASUM feature.
4411  * @size:       the sum of all EXTENT_DATA items size for this inode.
4412  * @end:        the offset of the last extent.
4413  *
4414  * Return 0 if no error occurred.
4415  */
4416 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4417                              struct extent_buffer *node, int slot,
4418                              unsigned int nodatasum, u64 *size, u64 *end)
4419 {
4420         struct btrfs_file_extent_item *fi;
4421         u64 disk_bytenr;
4422         u64 disk_num_bytes;
4423         u64 extent_num_bytes;
4424         u64 found;
4425         unsigned int extent_type;
4426         unsigned int is_hole;
4427         int ret;
4428         int err = 0;
4429
4430         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4431
4432         extent_type = btrfs_file_extent_type(node, fi);
4433         /* Skip if file extent is inline */
4434         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4435                 struct btrfs_item *e = btrfs_item_nr(slot);
4436                 u32 item_inline_len;
4437
4438                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4439                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4440                 if (extent_num_bytes == 0 ||
4441                     extent_num_bytes != item_inline_len)
4442                         err |= FILE_EXTENT_ERROR;
4443                 *size += extent_num_bytes;
4444                 return err;
4445         }
4446
4447         /* Check extent type */
4448         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4449                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4450                 err |= FILE_EXTENT_ERROR;
4451                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4452                       root->objectid, fkey->objectid, fkey->offset);
4453                 return err;
4454         }
4455
4456         /* Check REG_EXTENT/PREALLOC_EXTENT */
4457         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4458         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4459         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4460         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4461
4462         /* Check EXTENT_DATA datasum */
4463         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4464         if (found > 0 && nodatasum) {
4465                 err |= ODD_CSUM_ITEM;
4466                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4467                       root->objectid, fkey->objectid, fkey->offset);
4468         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4469                    !is_hole &&
4470                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4471                 err |= CSUM_ITEM_MISSING;
4472                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4473                       root->objectid, fkey->objectid, fkey->offset);
4474         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4475                 err |= ODD_CSUM_ITEM;
4476                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4477                       root->objectid, fkey->objectid, fkey->offset);
4478         }
4479
4480         /* Check EXTENT_DATA hole */
4481         if (no_holes && is_hole) {
4482                 err |= FILE_EXTENT_ERROR;
4483                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4484                       root->objectid, fkey->objectid, fkey->offset);
4485         } else if (!no_holes && *end != fkey->offset) {
4486                 err |= FILE_EXTENT_ERROR;
4487                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4488                       root->objectid, fkey->objectid, fkey->offset);
4489         }
4490
4491         *end += extent_num_bytes;
4492         if (!is_hole)
4493                 *size += extent_num_bytes;
4494
4495         return err;
4496 }
4497
4498 /*
4499  * Check INODE_ITEM and related ITEMs (the same inode number)
4500  * 1. check link count
4501  * 2. check inode ref/extref
4502  * 3. check dir item/index
4503  *
4504  * @ext_ref:    the EXTENDED_IREF feature
4505  *
4506  * Return 0 if no error occurred.
4507  * Return >0 for error or hit the traversal is done(by error bitmap)
4508  */
4509 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4510                             unsigned int ext_ref)
4511 {
4512         struct extent_buffer *node;
4513         struct btrfs_inode_item *ii;
4514         struct btrfs_key key;
4515         u64 inode_id;
4516         u32 mode;
4517         u64 nlink;
4518         u64 nbytes;
4519         u64 isize;
4520         u64 size = 0;
4521         u64 refs = 0;
4522         u64 extent_end = 0;
4523         u64 extent_size = 0;
4524         unsigned int dir;
4525         unsigned int nodatasum;
4526         int slot;
4527         int ret;
4528         int err = 0;
4529
4530         node = path->nodes[0];
4531         slot = path->slots[0];
4532
4533         btrfs_item_key_to_cpu(node, &key, slot);
4534         inode_id = key.objectid;
4535
4536         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4537                 ret = btrfs_next_item(root, path);
4538                 if (ret > 0)
4539                         err |= LAST_ITEM;
4540                 return err;
4541         }
4542
4543         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4544         isize = btrfs_inode_size(node, ii);
4545         nbytes = btrfs_inode_nbytes(node, ii);
4546         mode = btrfs_inode_mode(node, ii);
4547         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4548         nlink = btrfs_inode_nlink(node, ii);
4549         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4550
4551         while (1) {
4552                 ret = btrfs_next_item(root, path);
4553                 if (ret < 0) {
4554                         /* out will fill 'err' rusing current statistics */
4555                         goto out;
4556                 } else if (ret > 0) {
4557                         err |= LAST_ITEM;
4558                         goto out;
4559                 }
4560
4561                 node = path->nodes[0];
4562                 slot = path->slots[0];
4563                 btrfs_item_key_to_cpu(node, &key, slot);
4564                 if (key.objectid != inode_id)
4565                         goto out;
4566
4567                 switch (key.type) {
4568                 case BTRFS_INODE_REF_KEY:
4569                         ret = check_inode_ref(root, &key, node, slot, &refs,
4570                                               mode);
4571                         err |= ret;
4572                         break;
4573                 case BTRFS_INODE_EXTREF_KEY:
4574                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4575                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4576                                         root->objectid, key.objectid,
4577                                         key.offset);
4578                         ret = check_inode_extref(root, &key, node, slot, &refs,
4579                                                  mode);
4580                         err |= ret;
4581                         break;
4582                 case BTRFS_DIR_ITEM_KEY:
4583                 case BTRFS_DIR_INDEX_KEY:
4584                         if (!dir) {
4585                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4586                                         root->objectid, inode_id,
4587                                         imode_to_type(mode), key.objectid,
4588                                         key.offset);
4589                         }
4590                         ret = check_dir_item(root, &key, node, slot, &size,
4591                                              ext_ref);
4592                         err |= ret;
4593                         break;
4594                 case BTRFS_EXTENT_DATA_KEY:
4595                         if (dir) {
4596                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4597                                         root->objectid, inode_id, key.objectid,
4598                                         key.offset);
4599                         }
4600                         ret = check_file_extent(root, &key, node, slot,
4601                                                 nodatasum, &extent_size,
4602                                                 &extent_end);
4603                         err |= ret;
4604                         break;
4605                 case BTRFS_XATTR_ITEM_KEY:
4606                         break;
4607                 default:
4608                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4609                               key.objectid, key.type, key.offset);
4610                 }
4611         }
4612
4613 out:
4614         /* verify INODE_ITEM nlink/isize/nbytes */
4615         if (dir) {
4616                 if (nlink != 1) {
4617                         err |= LINK_COUNT_ERROR;
4618                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4619                               root->objectid, inode_id, nlink);
4620                 }
4621
4622                 /*
4623                  * Just a warning, as dir inode nbytes is just an
4624                  * instructive value.
4625                  */
4626                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4627                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4628                                 root->objectid, inode_id, root->nodesize);
4629                 }
4630
4631                 if (isize != size) {
4632                         err |= ISIZE_ERROR;
4633                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4634                               root->objectid, inode_id, isize, size);
4635                 }
4636         } else {
4637                 if (nlink != refs) {
4638                         err |= LINK_COUNT_ERROR;
4639                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4640                               root->objectid, inode_id, nlink, refs);
4641                 } else if (!nlink) {
4642                         err |= ORPHAN_ITEM;
4643                 }
4644
4645                 if (!nbytes && !no_holes && extent_end < isize) {
4646                         err |= NBYTES_ERROR;
4647                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4648                               root->objectid, inode_id, isize);
4649                 }
4650
4651                 if (nbytes != extent_size) {
4652                         err |= NBYTES_ERROR;
4653                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4654                               root->objectid, inode_id, nbytes, extent_size);
4655                 }
4656         }
4657
4658         return err;
4659 }
4660
4661 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4662 {
4663         struct list_head *cur = rec->backrefs.next;
4664         struct extent_backref *back;
4665         struct tree_backref *tback;
4666         struct data_backref *dback;
4667         u64 found = 0;
4668         int err = 0;
4669
4670         while(cur != &rec->backrefs) {
4671                 back = to_extent_backref(cur);
4672                 cur = cur->next;
4673                 if (!back->found_extent_tree) {
4674                         err = 1;
4675                         if (!print_errs)
4676                                 goto out;
4677                         if (back->is_data) {
4678                                 dback = to_data_backref(back);
4679                                 fprintf(stderr, "Backref %llu %s %llu"
4680                                         " owner %llu offset %llu num_refs %lu"
4681                                         " not found in extent tree\n",
4682                                         (unsigned long long)rec->start,
4683                                         back->full_backref ?
4684                                         "parent" : "root",
4685                                         back->full_backref ?
4686                                         (unsigned long long)dback->parent:
4687                                         (unsigned long long)dback->root,
4688                                         (unsigned long long)dback->owner,
4689                                         (unsigned long long)dback->offset,
4690                                         (unsigned long)dback->num_refs);
4691                         } else {
4692                                 tback = to_tree_backref(back);
4693                                 fprintf(stderr, "Backref %llu parent %llu"
4694                                         " root %llu not found in extent tree\n",
4695                                         (unsigned long long)rec->start,
4696                                         (unsigned long long)tback->parent,
4697                                         (unsigned long long)tback->root);
4698                         }
4699                 }
4700                 if (!back->is_data && !back->found_ref) {
4701                         err = 1;
4702                         if (!print_errs)
4703                                 goto out;
4704                         tback = to_tree_backref(back);
4705                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4706                                 (unsigned long long)rec->start,
4707                                 back->full_backref ? "parent" : "root",
4708                                 back->full_backref ?
4709                                 (unsigned long long)tback->parent :
4710                                 (unsigned long long)tback->root, back);
4711                 }
4712                 if (back->is_data) {
4713                         dback = to_data_backref(back);
4714                         if (dback->found_ref != dback->num_refs) {
4715                                 err = 1;
4716                                 if (!print_errs)
4717                                         goto out;
4718                                 fprintf(stderr, "Incorrect local backref count"
4719                                         " on %llu %s %llu owner %llu"
4720                                         " offset %llu found %u wanted %u back %p\n",
4721                                         (unsigned long long)rec->start,
4722                                         back->full_backref ?
4723                                         "parent" : "root",
4724                                         back->full_backref ?
4725                                         (unsigned long long)dback->parent:
4726                                         (unsigned long long)dback->root,
4727                                         (unsigned long long)dback->owner,
4728                                         (unsigned long long)dback->offset,
4729                                         dback->found_ref, dback->num_refs, back);
4730                         }
4731                         if (dback->disk_bytenr != rec->start) {
4732                                 err = 1;
4733                                 if (!print_errs)
4734                                         goto out;
4735                                 fprintf(stderr, "Backref disk bytenr does not"
4736                                         " match extent record, bytenr=%llu, "
4737                                         "ref bytenr=%llu\n",
4738                                         (unsigned long long)rec->start,
4739                                         (unsigned long long)dback->disk_bytenr);
4740                         }
4741
4742                         if (dback->bytes != rec->nr) {
4743                                 err = 1;
4744                                 if (!print_errs)
4745                                         goto out;
4746                                 fprintf(stderr, "Backref bytes do not match "
4747                                         "extent backref, bytenr=%llu, ref "
4748                                         "bytes=%llu, backref bytes=%llu\n",
4749                                         (unsigned long long)rec->start,
4750                                         (unsigned long long)rec->nr,
4751                                         (unsigned long long)dback->bytes);
4752                         }
4753                 }
4754                 if (!back->is_data) {
4755                         found += 1;
4756                 } else {
4757                         dback = to_data_backref(back);
4758                         found += dback->found_ref;
4759                 }
4760         }
4761         if (found != rec->refs) {
4762                 err = 1;
4763                 if (!print_errs)
4764                         goto out;
4765                 fprintf(stderr, "Incorrect global backref count "
4766                         "on %llu found %llu wanted %llu\n",
4767                         (unsigned long long)rec->start,
4768                         (unsigned long long)found,
4769                         (unsigned long long)rec->refs);
4770         }
4771 out:
4772         return err;
4773 }
4774
4775 static int free_all_extent_backrefs(struct extent_record *rec)
4776 {
4777         struct extent_backref *back;
4778         struct list_head *cur;
4779         while (!list_empty(&rec->backrefs)) {
4780                 cur = rec->backrefs.next;
4781                 back = to_extent_backref(cur);
4782                 list_del(cur);
4783                 free(back);
4784         }
4785         return 0;
4786 }
4787
4788 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4789                                      struct cache_tree *extent_cache)
4790 {
4791         struct cache_extent *cache;
4792         struct extent_record *rec;
4793
4794         while (1) {
4795                 cache = first_cache_extent(extent_cache);
4796                 if (!cache)
4797                         break;
4798                 rec = container_of(cache, struct extent_record, cache);
4799                 remove_cache_extent(extent_cache, cache);
4800                 free_all_extent_backrefs(rec);
4801                 free(rec);
4802         }
4803 }
4804
4805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4806                                  struct extent_record *rec)
4807 {
4808         if (rec->content_checked && rec->owner_ref_checked &&
4809             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4810             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4811             !rec->bad_full_backref && !rec->crossing_stripes &&
4812             !rec->wrong_chunk_type) {
4813                 remove_cache_extent(extent_cache, &rec->cache);
4814                 free_all_extent_backrefs(rec);
4815                 list_del_init(&rec->list);
4816                 free(rec);
4817         }
4818         return 0;
4819 }
4820
4821 static int check_owner_ref(struct btrfs_root *root,
4822                             struct extent_record *rec,
4823                             struct extent_buffer *buf)
4824 {
4825         struct extent_backref *node;
4826         struct tree_backref *back;
4827         struct btrfs_root *ref_root;
4828         struct btrfs_key key;
4829         struct btrfs_path path;
4830         struct extent_buffer *parent;
4831         int level;
4832         int found = 0;
4833         int ret;
4834
4835         list_for_each_entry(node, &rec->backrefs, list) {
4836                 if (node->is_data)
4837                         continue;
4838                 if (!node->found_ref)
4839                         continue;
4840                 if (node->full_backref)
4841                         continue;
4842                 back = to_tree_backref(node);
4843                 if (btrfs_header_owner(buf) == back->root)
4844                         return 0;
4845         }
4846         BUG_ON(rec->is_root);
4847
4848         /* try to find the block by search corresponding fs tree */
4849         key.objectid = btrfs_header_owner(buf);
4850         key.type = BTRFS_ROOT_ITEM_KEY;
4851         key.offset = (u64)-1;
4852
4853         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4854         if (IS_ERR(ref_root))
4855                 return 1;
4856
4857         level = btrfs_header_level(buf);
4858         if (level == 0)
4859                 btrfs_item_key_to_cpu(buf, &key, 0);
4860         else
4861                 btrfs_node_key_to_cpu(buf, &key, 0);
4862
4863         btrfs_init_path(&path);
4864         path.lowest_level = level + 1;
4865         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4866         if (ret < 0)
4867                 return 0;
4868
4869         parent = path.nodes[level + 1];
4870         if (parent && buf->start == btrfs_node_blockptr(parent,
4871                                                         path.slots[level + 1]))
4872                 found = 1;
4873
4874         btrfs_release_path(&path);
4875         return found ? 0 : 1;
4876 }
4877
4878 static int is_extent_tree_record(struct extent_record *rec)
4879 {
4880         struct list_head *cur = rec->backrefs.next;
4881         struct extent_backref *node;
4882         struct tree_backref *back;
4883         int is_extent = 0;
4884
4885         while(cur != &rec->backrefs) {
4886                 node = to_extent_backref(cur);
4887                 cur = cur->next;
4888                 if (node->is_data)
4889                         return 0;
4890                 back = to_tree_backref(node);
4891                 if (node->full_backref)
4892                         return 0;
4893                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4894                         is_extent = 1;
4895         }
4896         return is_extent;
4897 }
4898
4899
4900 static int record_bad_block_io(struct btrfs_fs_info *info,
4901                                struct cache_tree *extent_cache,
4902                                u64 start, u64 len)
4903 {
4904         struct extent_record *rec;
4905         struct cache_extent *cache;
4906         struct btrfs_key key;
4907
4908         cache = lookup_cache_extent(extent_cache, start, len);
4909         if (!cache)
4910                 return 0;
4911
4912         rec = container_of(cache, struct extent_record, cache);
4913         if (!is_extent_tree_record(rec))
4914                 return 0;
4915
4916         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4917         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4918 }
4919
4920 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4921                        struct extent_buffer *buf, int slot)
4922 {
4923         if (btrfs_header_level(buf)) {
4924                 struct btrfs_key_ptr ptr1, ptr2;
4925
4926                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4927                                    sizeof(struct btrfs_key_ptr));
4928                 read_extent_buffer(buf, &ptr2,
4929                                    btrfs_node_key_ptr_offset(slot + 1),
4930                                    sizeof(struct btrfs_key_ptr));
4931                 write_extent_buffer(buf, &ptr1,
4932                                     btrfs_node_key_ptr_offset(slot + 1),
4933                                     sizeof(struct btrfs_key_ptr));
4934                 write_extent_buffer(buf, &ptr2,
4935                                     btrfs_node_key_ptr_offset(slot),
4936                                     sizeof(struct btrfs_key_ptr));
4937                 if (slot == 0) {
4938                         struct btrfs_disk_key key;
4939                         btrfs_node_key(buf, &key, 0);
4940                         btrfs_fixup_low_keys(root, path, &key,
4941                                              btrfs_header_level(buf) + 1);
4942                 }
4943         } else {
4944                 struct btrfs_item *item1, *item2;
4945                 struct btrfs_key k1, k2;
4946                 char *item1_data, *item2_data;
4947                 u32 item1_offset, item2_offset, item1_size, item2_size;
4948
4949                 item1 = btrfs_item_nr(slot);
4950                 item2 = btrfs_item_nr(slot + 1);
4951                 btrfs_item_key_to_cpu(buf, &k1, slot);
4952                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4953                 item1_offset = btrfs_item_offset(buf, item1);
4954                 item2_offset = btrfs_item_offset(buf, item2);
4955                 item1_size = btrfs_item_size(buf, item1);
4956                 item2_size = btrfs_item_size(buf, item2);
4957
4958                 item1_data = malloc(item1_size);
4959                 if (!item1_data)
4960                         return -ENOMEM;
4961                 item2_data = malloc(item2_size);
4962                 if (!item2_data) {
4963                         free(item1_data);
4964                         return -ENOMEM;
4965                 }
4966
4967                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4968                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4969
4970                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4971                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4972                 free(item1_data);
4973                 free(item2_data);
4974
4975                 btrfs_set_item_offset(buf, item1, item2_offset);
4976                 btrfs_set_item_offset(buf, item2, item1_offset);
4977                 btrfs_set_item_size(buf, item1, item2_size);
4978                 btrfs_set_item_size(buf, item2, item1_size);
4979
4980                 path->slots[0] = slot;
4981                 btrfs_set_item_key_unsafe(root, path, &k2);
4982                 path->slots[0] = slot + 1;
4983                 btrfs_set_item_key_unsafe(root, path, &k1);
4984         }
4985         return 0;
4986 }
4987
4988 static int fix_key_order(struct btrfs_trans_handle *trans,
4989                          struct btrfs_root *root,
4990                          struct btrfs_path *path)
4991 {
4992         struct extent_buffer *buf;
4993         struct btrfs_key k1, k2;
4994         int i;
4995         int level = path->lowest_level;
4996         int ret = -EIO;
4997
4998         buf = path->nodes[level];
4999         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5000                 if (level) {
5001                         btrfs_node_key_to_cpu(buf, &k1, i);
5002                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5003                 } else {
5004                         btrfs_item_key_to_cpu(buf, &k1, i);
5005                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5006                 }
5007                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5008                         continue;
5009                 ret = swap_values(root, path, buf, i);
5010                 if (ret)
5011                         break;
5012                 btrfs_mark_buffer_dirty(buf);
5013                 i = 0;
5014         }
5015         return ret;
5016 }
5017
5018 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5019                              struct btrfs_root *root,
5020                              struct btrfs_path *path,
5021                              struct extent_buffer *buf, int slot)
5022 {
5023         struct btrfs_key key;
5024         int nritems = btrfs_header_nritems(buf);
5025
5026         btrfs_item_key_to_cpu(buf, &key, slot);
5027
5028         /* These are all the keys we can deal with missing. */
5029         if (key.type != BTRFS_DIR_INDEX_KEY &&
5030             key.type != BTRFS_EXTENT_ITEM_KEY &&
5031             key.type != BTRFS_METADATA_ITEM_KEY &&
5032             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5033             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5034                 return -1;
5035
5036         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5037                (unsigned long long)key.objectid, key.type,
5038                (unsigned long long)key.offset, slot, buf->start);
5039         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5040                               btrfs_item_nr_offset(slot + 1),
5041                               sizeof(struct btrfs_item) *
5042                               (nritems - slot - 1));
5043         btrfs_set_header_nritems(buf, nritems - 1);
5044         if (slot == 0) {
5045                 struct btrfs_disk_key disk_key;
5046
5047                 btrfs_item_key(buf, &disk_key, 0);
5048                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5049         }
5050         btrfs_mark_buffer_dirty(buf);
5051         return 0;
5052 }
5053
5054 static int fix_item_offset(struct btrfs_trans_handle *trans,
5055                            struct btrfs_root *root,
5056                            struct btrfs_path *path)
5057 {
5058         struct extent_buffer *buf;
5059         int i;
5060         int ret = 0;
5061
5062         /* We should only get this for leaves */
5063         BUG_ON(path->lowest_level);
5064         buf = path->nodes[0];
5065 again:
5066         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5067                 unsigned int shift = 0, offset;
5068
5069                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5070                     BTRFS_LEAF_DATA_SIZE(root)) {
5071                         if (btrfs_item_end_nr(buf, i) >
5072                             BTRFS_LEAF_DATA_SIZE(root)) {
5073                                 ret = delete_bogus_item(trans, root, path,
5074                                                         buf, i);
5075                                 if (!ret)
5076                                         goto again;
5077                                 fprintf(stderr, "item is off the end of the "
5078                                         "leaf, can't fix\n");
5079                                 ret = -EIO;
5080                                 break;
5081                         }
5082                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5083                                 btrfs_item_end_nr(buf, i);
5084                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5085                            btrfs_item_offset_nr(buf, i - 1)) {
5086                         if (btrfs_item_end_nr(buf, i) >
5087                             btrfs_item_offset_nr(buf, i - 1)) {
5088                                 ret = delete_bogus_item(trans, root, path,
5089                                                         buf, i);
5090                                 if (!ret)
5091                                         goto again;
5092                                 fprintf(stderr, "items overlap, can't fix\n");
5093                                 ret = -EIO;
5094                                 break;
5095                         }
5096                         shift = btrfs_item_offset_nr(buf, i - 1) -
5097                                 btrfs_item_end_nr(buf, i);
5098                 }
5099                 if (!shift)
5100                         continue;
5101
5102                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5103                        i, shift, (unsigned long long)buf->start);
5104                 offset = btrfs_item_offset_nr(buf, i);
5105                 memmove_extent_buffer(buf,
5106                                       btrfs_leaf_data(buf) + offset + shift,
5107                                       btrfs_leaf_data(buf) + offset,
5108                                       btrfs_item_size_nr(buf, i));
5109                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5110                                       offset + shift);
5111                 btrfs_mark_buffer_dirty(buf);
5112         }
5113
5114         /*
5115          * We may have moved things, in which case we want to exit so we don't
5116          * write those changes out.  Once we have proper abort functionality in
5117          * progs this can be changed to something nicer.
5118          */
5119         BUG_ON(ret);
5120         return ret;
5121 }
5122
5123 /*
5124  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5125  * then just return -EIO.
5126  */
5127 static int try_to_fix_bad_block(struct btrfs_root *root,
5128                                 struct extent_buffer *buf,
5129                                 enum btrfs_tree_block_status status)
5130 {
5131         struct btrfs_trans_handle *trans;
5132         struct ulist *roots;
5133         struct ulist_node *node;
5134         struct btrfs_root *search_root;
5135         struct btrfs_path path;
5136         struct ulist_iterator iter;
5137         struct btrfs_key root_key, key;
5138         int ret;
5139
5140         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5141             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5142                 return -EIO;
5143
5144         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5145         if (ret)
5146                 return -EIO;
5147
5148         btrfs_init_path(&path);
5149         ULIST_ITER_INIT(&iter);
5150         while ((node = ulist_next(roots, &iter))) {
5151                 root_key.objectid = node->val;
5152                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5153                 root_key.offset = (u64)-1;
5154
5155                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5156                 if (IS_ERR(root)) {
5157                         ret = -EIO;
5158                         break;
5159                 }
5160
5161
5162                 trans = btrfs_start_transaction(search_root, 0);
5163                 if (IS_ERR(trans)) {
5164                         ret = PTR_ERR(trans);
5165                         break;
5166                 }
5167
5168                 path.lowest_level = btrfs_header_level(buf);
5169                 path.skip_check_block = 1;
5170                 if (path.lowest_level)
5171                         btrfs_node_key_to_cpu(buf, &key, 0);
5172                 else
5173                         btrfs_item_key_to_cpu(buf, &key, 0);
5174                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5175                 if (ret) {
5176                         ret = -EIO;
5177                         btrfs_commit_transaction(trans, search_root);
5178                         break;
5179                 }
5180                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5181                         ret = fix_key_order(trans, search_root, &path);
5182                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5183                         ret = fix_item_offset(trans, search_root, &path);
5184                 if (ret) {
5185                         btrfs_commit_transaction(trans, search_root);
5186                         break;
5187                 }
5188                 btrfs_release_path(&path);
5189                 btrfs_commit_transaction(trans, search_root);
5190         }
5191         ulist_free(roots);
5192         btrfs_release_path(&path);
5193         return ret;
5194 }
5195
5196 static int check_block(struct btrfs_root *root,
5197                        struct cache_tree *extent_cache,
5198                        struct extent_buffer *buf, u64 flags)
5199 {
5200         struct extent_record *rec;
5201         struct cache_extent *cache;
5202         struct btrfs_key key;
5203         enum btrfs_tree_block_status status;
5204         int ret = 0;
5205         int level;
5206
5207         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5208         if (!cache)
5209                 return 1;
5210         rec = container_of(cache, struct extent_record, cache);
5211         rec->generation = btrfs_header_generation(buf);
5212
5213         level = btrfs_header_level(buf);
5214         if (btrfs_header_nritems(buf) > 0) {
5215
5216                 if (level == 0)
5217                         btrfs_item_key_to_cpu(buf, &key, 0);
5218                 else
5219                         btrfs_node_key_to_cpu(buf, &key, 0);
5220
5221                 rec->info_objectid = key.objectid;
5222         }
5223         rec->info_level = level;
5224
5225         if (btrfs_is_leaf(buf))
5226                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5227         else
5228                 status = btrfs_check_node(root, &rec->parent_key, buf);
5229
5230         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5231                 if (repair)
5232                         status = try_to_fix_bad_block(root, buf, status);
5233                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5234                         ret = -EIO;
5235                         fprintf(stderr, "bad block %llu\n",
5236                                 (unsigned long long)buf->start);
5237                 } else {
5238                         /*
5239                          * Signal to callers we need to start the scan over
5240                          * again since we'll have cowed blocks.
5241                          */
5242                         ret = -EAGAIN;
5243                 }
5244         } else {
5245                 rec->content_checked = 1;
5246                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5247                         rec->owner_ref_checked = 1;
5248                 else {
5249                         ret = check_owner_ref(root, rec, buf);
5250                         if (!ret)
5251                                 rec->owner_ref_checked = 1;
5252                 }
5253         }
5254         if (!ret)
5255                 maybe_free_extent_rec(extent_cache, rec);
5256         return ret;
5257 }
5258
5259 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5260                                                 u64 parent, u64 root)
5261 {
5262         struct list_head *cur = rec->backrefs.next;
5263         struct extent_backref *node;
5264         struct tree_backref *back;
5265
5266         while(cur != &rec->backrefs) {
5267                 node = to_extent_backref(cur);
5268                 cur = cur->next;
5269                 if (node->is_data)
5270                         continue;
5271                 back = to_tree_backref(node);
5272                 if (parent > 0) {
5273                         if (!node->full_backref)
5274                                 continue;
5275                         if (parent == back->parent)
5276                                 return back;
5277                 } else {
5278                         if (node->full_backref)
5279                                 continue;
5280                         if (back->root == root)
5281                                 return back;
5282                 }
5283         }
5284         return NULL;
5285 }
5286
5287 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5288                                                 u64 parent, u64 root)
5289 {
5290         struct tree_backref *ref = malloc(sizeof(*ref));
5291
5292         if (!ref)
5293                 return NULL;
5294         memset(&ref->node, 0, sizeof(ref->node));
5295         if (parent > 0) {
5296                 ref->parent = parent;
5297                 ref->node.full_backref = 1;
5298         } else {
5299                 ref->root = root;
5300                 ref->node.full_backref = 0;
5301         }
5302         list_add_tail(&ref->node.list, &rec->backrefs);
5303
5304         return ref;
5305 }
5306
5307 static struct data_backref *find_data_backref(struct extent_record *rec,
5308                                                 u64 parent, u64 root,
5309                                                 u64 owner, u64 offset,
5310                                                 int found_ref,
5311                                                 u64 disk_bytenr, u64 bytes)
5312 {
5313         struct list_head *cur = rec->backrefs.next;
5314         struct extent_backref *node;
5315         struct data_backref *back;
5316
5317         while(cur != &rec->backrefs) {
5318                 node = to_extent_backref(cur);
5319                 cur = cur->next;
5320                 if (!node->is_data)
5321                         continue;
5322                 back = to_data_backref(node);
5323                 if (parent > 0) {
5324                         if (!node->full_backref)
5325                                 continue;
5326                         if (parent == back->parent)
5327                                 return back;
5328                 } else {
5329                         if (node->full_backref)
5330                                 continue;
5331                         if (back->root == root && back->owner == owner &&
5332                             back->offset == offset) {
5333                                 if (found_ref && node->found_ref &&
5334                                     (back->bytes != bytes ||
5335                                     back->disk_bytenr != disk_bytenr))
5336                                         continue;
5337                                 return back;
5338                         }
5339                 }
5340         }
5341         return NULL;
5342 }
5343
5344 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5345                                                 u64 parent, u64 root,
5346                                                 u64 owner, u64 offset,
5347                                                 u64 max_size)
5348 {
5349         struct data_backref *ref = malloc(sizeof(*ref));
5350
5351         if (!ref)
5352                 return NULL;
5353         memset(&ref->node, 0, sizeof(ref->node));
5354         ref->node.is_data = 1;
5355
5356         if (parent > 0) {
5357                 ref->parent = parent;
5358                 ref->owner = 0;
5359                 ref->offset = 0;
5360                 ref->node.full_backref = 1;
5361         } else {
5362                 ref->root = root;
5363                 ref->owner = owner;
5364                 ref->offset = offset;
5365                 ref->node.full_backref = 0;
5366         }
5367         ref->bytes = max_size;
5368         ref->found_ref = 0;
5369         ref->num_refs = 0;
5370         list_add_tail(&ref->node.list, &rec->backrefs);
5371         if (max_size > rec->max_size)
5372                 rec->max_size = max_size;
5373         return ref;
5374 }
5375
5376 /* Check if the type of extent matches with its chunk */
5377 static void check_extent_type(struct extent_record *rec)
5378 {
5379         struct btrfs_block_group_cache *bg_cache;
5380
5381         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5382         if (!bg_cache)
5383                 return;
5384
5385         /* data extent, check chunk directly*/
5386         if (!rec->metadata) {
5387                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5388                         rec->wrong_chunk_type = 1;
5389                 return;
5390         }
5391
5392         /* metadata extent, check the obvious case first */
5393         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5394                                  BTRFS_BLOCK_GROUP_METADATA))) {
5395                 rec->wrong_chunk_type = 1;
5396                 return;
5397         }
5398
5399         /*
5400          * Check SYSTEM extent, as it's also marked as metadata, we can only
5401          * make sure it's a SYSTEM extent by its backref
5402          */
5403         if (!list_empty(&rec->backrefs)) {
5404                 struct extent_backref *node;
5405                 struct tree_backref *tback;
5406                 u64 bg_type;
5407
5408                 node = to_extent_backref(rec->backrefs.next);
5409                 if (node->is_data) {
5410                         /* tree block shouldn't have data backref */
5411                         rec->wrong_chunk_type = 1;
5412                         return;
5413                 }
5414                 tback = container_of(node, struct tree_backref, node);
5415
5416                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5417                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5418                 else
5419                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5420                 if (!(bg_cache->flags & bg_type))
5421                         rec->wrong_chunk_type = 1;
5422         }
5423 }
5424
5425 /*
5426  * Allocate a new extent record, fill default values from @tmpl and insert int
5427  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5428  * the cache, otherwise it fails.
5429  */
5430 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5431                 struct extent_record *tmpl)
5432 {
5433         struct extent_record *rec;
5434         int ret = 0;
5435
5436         rec = malloc(sizeof(*rec));
5437         if (!rec)
5438                 return -ENOMEM;
5439         rec->start = tmpl->start;
5440         rec->max_size = tmpl->max_size;
5441         rec->nr = max(tmpl->nr, tmpl->max_size);
5442         rec->found_rec = tmpl->found_rec;
5443         rec->content_checked = tmpl->content_checked;
5444         rec->owner_ref_checked = tmpl->owner_ref_checked;
5445         rec->num_duplicates = 0;
5446         rec->metadata = tmpl->metadata;
5447         rec->flag_block_full_backref = FLAG_UNSET;
5448         rec->bad_full_backref = 0;
5449         rec->crossing_stripes = 0;
5450         rec->wrong_chunk_type = 0;
5451         rec->is_root = tmpl->is_root;
5452         rec->refs = tmpl->refs;
5453         rec->extent_item_refs = tmpl->extent_item_refs;
5454         rec->parent_generation = tmpl->parent_generation;
5455         INIT_LIST_HEAD(&rec->backrefs);
5456         INIT_LIST_HEAD(&rec->dups);
5457         INIT_LIST_HEAD(&rec->list);
5458         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5459         rec->cache.start = tmpl->start;
5460         rec->cache.size = tmpl->nr;
5461         ret = insert_cache_extent(extent_cache, &rec->cache);
5462         if (ret) {
5463                 free(rec);
5464                 return ret;
5465         }
5466         bytes_used += rec->nr;
5467
5468         if (tmpl->metadata)
5469                 rec->crossing_stripes = check_crossing_stripes(global_info,
5470                                 rec->start, global_info->tree_root->nodesize);
5471         check_extent_type(rec);
5472         return ret;
5473 }
5474
5475 /*
5476  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5477  * some are hints:
5478  * - refs              - if found, increase refs
5479  * - is_root           - if found, set
5480  * - content_checked   - if found, set
5481  * - owner_ref_checked - if found, set
5482  *
5483  * If not found, create a new one, initialize and insert.
5484  */
5485 static int add_extent_rec(struct cache_tree *extent_cache,
5486                 struct extent_record *tmpl)
5487 {
5488         struct extent_record *rec;
5489         struct cache_extent *cache;
5490         int ret = 0;
5491         int dup = 0;
5492
5493         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5494         if (cache) {
5495                 rec = container_of(cache, struct extent_record, cache);
5496                 if (tmpl->refs)
5497                         rec->refs++;
5498                 if (rec->nr == 1)
5499                         rec->nr = max(tmpl->nr, tmpl->max_size);
5500
5501                 /*
5502                  * We need to make sure to reset nr to whatever the extent
5503                  * record says was the real size, this way we can compare it to
5504                  * the backrefs.
5505                  */
5506                 if (tmpl->found_rec) {
5507                         if (tmpl->start != rec->start || rec->found_rec) {
5508                                 struct extent_record *tmp;
5509
5510                                 dup = 1;
5511                                 if (list_empty(&rec->list))
5512                                         list_add_tail(&rec->list,
5513                                                       &duplicate_extents);
5514
5515                                 /*
5516                                  * We have to do this song and dance in case we
5517                                  * find an extent record that falls inside of
5518                                  * our current extent record but does not have
5519                                  * the same objectid.
5520                                  */
5521                                 tmp = malloc(sizeof(*tmp));
5522                                 if (!tmp)
5523                                         return -ENOMEM;
5524                                 tmp->start = tmpl->start;
5525                                 tmp->max_size = tmpl->max_size;
5526                                 tmp->nr = tmpl->nr;
5527                                 tmp->found_rec = 1;
5528                                 tmp->metadata = tmpl->metadata;
5529                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5530                                 INIT_LIST_HEAD(&tmp->list);
5531                                 list_add_tail(&tmp->list, &rec->dups);
5532                                 rec->num_duplicates++;
5533                         } else {
5534                                 rec->nr = tmpl->nr;
5535                                 rec->found_rec = 1;
5536                         }
5537                 }
5538
5539                 if (tmpl->extent_item_refs && !dup) {
5540                         if (rec->extent_item_refs) {
5541                                 fprintf(stderr, "block %llu rec "
5542                                         "extent_item_refs %llu, passed %llu\n",
5543                                         (unsigned long long)tmpl->start,
5544                                         (unsigned long long)
5545                                                         rec->extent_item_refs,
5546                                         (unsigned long long)tmpl->extent_item_refs);
5547                         }
5548                         rec->extent_item_refs = tmpl->extent_item_refs;
5549                 }
5550                 if (tmpl->is_root)
5551                         rec->is_root = 1;
5552                 if (tmpl->content_checked)
5553                         rec->content_checked = 1;
5554                 if (tmpl->owner_ref_checked)
5555                         rec->owner_ref_checked = 1;
5556                 memcpy(&rec->parent_key, &tmpl->parent_key,
5557                                 sizeof(tmpl->parent_key));
5558                 if (tmpl->parent_generation)
5559                         rec->parent_generation = tmpl->parent_generation;
5560                 if (rec->max_size < tmpl->max_size)
5561                         rec->max_size = tmpl->max_size;
5562
5563                 /*
5564                  * A metadata extent can't cross stripe_len boundary, otherwise
5565                  * kernel scrub won't be able to handle it.
5566                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5567                  * it.
5568                  */
5569                 if (tmpl->metadata)
5570                         rec->crossing_stripes = check_crossing_stripes(
5571                                         global_info, rec->start,
5572                                         global_info->tree_root->nodesize);
5573                 check_extent_type(rec);
5574                 maybe_free_extent_rec(extent_cache, rec);
5575                 return ret;
5576         }
5577
5578         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5579
5580         return ret;
5581 }
5582
5583 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5584                             u64 parent, u64 root, int found_ref)
5585 {
5586         struct extent_record *rec;
5587         struct tree_backref *back;
5588         struct cache_extent *cache;
5589         int ret;
5590
5591         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5592         if (!cache) {
5593                 struct extent_record tmpl;
5594
5595                 memset(&tmpl, 0, sizeof(tmpl));
5596                 tmpl.start = bytenr;
5597                 tmpl.nr = 1;
5598                 tmpl.metadata = 1;
5599
5600                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5601                 if (ret)
5602                         return ret;
5603
5604                 /* really a bug in cache_extent implement now */
5605                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5606                 if (!cache)
5607                         return -ENOENT;
5608         }
5609
5610         rec = container_of(cache, struct extent_record, cache);
5611         if (rec->start != bytenr) {
5612                 /*
5613                  * Several cause, from unaligned bytenr to over lapping extents
5614                  */
5615                 return -EEXIST;
5616         }
5617
5618         back = find_tree_backref(rec, parent, root);
5619         if (!back) {
5620                 back = alloc_tree_backref(rec, parent, root);
5621                 if (!back)
5622                         return -ENOMEM;
5623         }
5624
5625         if (found_ref) {
5626                 if (back->node.found_ref) {
5627                         fprintf(stderr, "Extent back ref already exists "
5628                                 "for %llu parent %llu root %llu \n",
5629                                 (unsigned long long)bytenr,
5630                                 (unsigned long long)parent,
5631                                 (unsigned long long)root);
5632                 }
5633                 back->node.found_ref = 1;
5634         } else {
5635                 if (back->node.found_extent_tree) {
5636                         fprintf(stderr, "Extent back ref already exists "
5637                                 "for %llu parent %llu root %llu \n",
5638                                 (unsigned long long)bytenr,
5639                                 (unsigned long long)parent,
5640                                 (unsigned long long)root);
5641                 }
5642                 back->node.found_extent_tree = 1;
5643         }
5644         check_extent_type(rec);
5645         maybe_free_extent_rec(extent_cache, rec);
5646         return 0;
5647 }
5648
5649 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5650                             u64 parent, u64 root, u64 owner, u64 offset,
5651                             u32 num_refs, int found_ref, u64 max_size)
5652 {
5653         struct extent_record *rec;
5654         struct data_backref *back;
5655         struct cache_extent *cache;
5656         int ret;
5657
5658         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5659         if (!cache) {
5660                 struct extent_record tmpl;
5661
5662                 memset(&tmpl, 0, sizeof(tmpl));
5663                 tmpl.start = bytenr;
5664                 tmpl.nr = 1;
5665                 tmpl.max_size = max_size;
5666
5667                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5668                 if (ret)
5669                         return ret;
5670
5671                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5672                 if (!cache)
5673                         abort();
5674         }
5675
5676         rec = container_of(cache, struct extent_record, cache);
5677         if (rec->max_size < max_size)
5678                 rec->max_size = max_size;
5679
5680         /*
5681          * If found_ref is set then max_size is the real size and must match the
5682          * existing refs.  So if we have already found a ref then we need to
5683          * make sure that this ref matches the existing one, otherwise we need
5684          * to add a new backref so we can notice that the backrefs don't match
5685          * and we need to figure out who is telling the truth.  This is to
5686          * account for that awful fsync bug I introduced where we'd end up with
5687          * a btrfs_file_extent_item that would have its length include multiple
5688          * prealloc extents or point inside of a prealloc extent.
5689          */
5690         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5691                                  bytenr, max_size);
5692         if (!back) {
5693                 back = alloc_data_backref(rec, parent, root, owner, offset,
5694                                           max_size);
5695                 BUG_ON(!back);
5696         }
5697
5698         if (found_ref) {
5699                 BUG_ON(num_refs != 1);
5700                 if (back->node.found_ref)
5701                         BUG_ON(back->bytes != max_size);
5702                 back->node.found_ref = 1;
5703                 back->found_ref += 1;
5704                 back->bytes = max_size;
5705                 back->disk_bytenr = bytenr;
5706                 rec->refs += 1;
5707                 rec->content_checked = 1;
5708                 rec->owner_ref_checked = 1;
5709         } else {
5710                 if (back->node.found_extent_tree) {
5711                         fprintf(stderr, "Extent back ref already exists "
5712                                 "for %llu parent %llu root %llu "
5713                                 "owner %llu offset %llu num_refs %lu\n",
5714                                 (unsigned long long)bytenr,
5715                                 (unsigned long long)parent,
5716                                 (unsigned long long)root,
5717                                 (unsigned long long)owner,
5718                                 (unsigned long long)offset,
5719                                 (unsigned long)num_refs);
5720                 }
5721                 back->num_refs = num_refs;
5722                 back->node.found_extent_tree = 1;
5723         }
5724         maybe_free_extent_rec(extent_cache, rec);
5725         return 0;
5726 }
5727
5728 static int add_pending(struct cache_tree *pending,
5729                        struct cache_tree *seen, u64 bytenr, u32 size)
5730 {
5731         int ret;
5732         ret = add_cache_extent(seen, bytenr, size);
5733         if (ret)
5734                 return ret;
5735         add_cache_extent(pending, bytenr, size);
5736         return 0;
5737 }
5738
5739 static int pick_next_pending(struct cache_tree *pending,
5740                         struct cache_tree *reada,
5741                         struct cache_tree *nodes,
5742                         u64 last, struct block_info *bits, int bits_nr,
5743                         int *reada_bits)
5744 {
5745         unsigned long node_start = last;
5746         struct cache_extent *cache;
5747         int ret;
5748
5749         cache = search_cache_extent(reada, 0);
5750         if (cache) {
5751                 bits[0].start = cache->start;
5752                 bits[0].size = cache->size;
5753                 *reada_bits = 1;
5754                 return 1;
5755         }
5756         *reada_bits = 0;
5757         if (node_start > 32768)
5758                 node_start -= 32768;
5759
5760         cache = search_cache_extent(nodes, node_start);
5761         if (!cache)
5762                 cache = search_cache_extent(nodes, 0);
5763
5764         if (!cache) {
5765                  cache = search_cache_extent(pending, 0);
5766                  if (!cache)
5767                          return 0;
5768                  ret = 0;
5769                  do {
5770                          bits[ret].start = cache->start;
5771                          bits[ret].size = cache->size;
5772                          cache = next_cache_extent(cache);
5773                          ret++;
5774                  } while (cache && ret < bits_nr);
5775                  return ret;
5776         }
5777
5778         ret = 0;
5779         do {
5780                 bits[ret].start = cache->start;
5781                 bits[ret].size = cache->size;
5782                 cache = next_cache_extent(cache);
5783                 ret++;
5784         } while (cache && ret < bits_nr);
5785
5786         if (bits_nr - ret > 8) {
5787                 u64 lookup = bits[0].start + bits[0].size;
5788                 struct cache_extent *next;
5789                 next = search_cache_extent(pending, lookup);
5790                 while(next) {
5791                         if (next->start - lookup > 32768)
5792                                 break;
5793                         bits[ret].start = next->start;
5794                         bits[ret].size = next->size;
5795                         lookup = next->start + next->size;
5796                         ret++;
5797                         if (ret == bits_nr)
5798                                 break;
5799                         next = next_cache_extent(next);
5800                         if (!next)
5801                                 break;
5802                 }
5803         }
5804         return ret;
5805 }
5806
5807 static void free_chunk_record(struct cache_extent *cache)
5808 {
5809         struct chunk_record *rec;
5810
5811         rec = container_of(cache, struct chunk_record, cache);
5812         list_del_init(&rec->list);
5813         list_del_init(&rec->dextents);
5814         free(rec);
5815 }
5816
5817 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5818 {
5819         cache_tree_free_extents(chunk_cache, free_chunk_record);
5820 }
5821
5822 static void free_device_record(struct rb_node *node)
5823 {
5824         struct device_record *rec;
5825
5826         rec = container_of(node, struct device_record, node);
5827         free(rec);
5828 }
5829
5830 FREE_RB_BASED_TREE(device_cache, free_device_record);
5831
5832 int insert_block_group_record(struct block_group_tree *tree,
5833                               struct block_group_record *bg_rec)
5834 {
5835         int ret;
5836
5837         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5838         if (ret)
5839                 return ret;
5840
5841         list_add_tail(&bg_rec->list, &tree->block_groups);
5842         return 0;
5843 }
5844
5845 static void free_block_group_record(struct cache_extent *cache)
5846 {
5847         struct block_group_record *rec;
5848
5849         rec = container_of(cache, struct block_group_record, cache);
5850         list_del_init(&rec->list);
5851         free(rec);
5852 }
5853
5854 void free_block_group_tree(struct block_group_tree *tree)
5855 {
5856         cache_tree_free_extents(&tree->tree, free_block_group_record);
5857 }
5858
5859 int insert_device_extent_record(struct device_extent_tree *tree,
5860                                 struct device_extent_record *de_rec)
5861 {
5862         int ret;
5863
5864         /*
5865          * Device extent is a bit different from the other extents, because
5866          * the extents which belong to the different devices may have the
5867          * same start and size, so we need use the special extent cache
5868          * search/insert functions.
5869          */
5870         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5871         if (ret)
5872                 return ret;
5873
5874         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5875         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5876         return 0;
5877 }
5878
5879 static void free_device_extent_record(struct cache_extent *cache)
5880 {
5881         struct device_extent_record *rec;
5882
5883         rec = container_of(cache, struct device_extent_record, cache);
5884         if (!list_empty(&rec->chunk_list))
5885                 list_del_init(&rec->chunk_list);
5886         if (!list_empty(&rec->device_list))
5887                 list_del_init(&rec->device_list);
5888         free(rec);
5889 }
5890
5891 void free_device_extent_tree(struct device_extent_tree *tree)
5892 {
5893         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5894 }
5895
5896 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5897 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5898                                  struct extent_buffer *leaf, int slot)
5899 {
5900         struct btrfs_extent_ref_v0 *ref0;
5901         struct btrfs_key key;
5902         int ret;
5903
5904         btrfs_item_key_to_cpu(leaf, &key, slot);
5905         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5906         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5907                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5908                                 0, 0);
5909         } else {
5910                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5911                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5912         }
5913         return ret;
5914 }
5915 #endif
5916
5917 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5918                                             struct btrfs_key *key,
5919                                             int slot)
5920 {
5921         struct btrfs_chunk *ptr;
5922         struct chunk_record *rec;
5923         int num_stripes, i;
5924
5925         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5926         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5927
5928         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5929         if (!rec) {
5930                 fprintf(stderr, "memory allocation failed\n");
5931                 exit(-1);
5932         }
5933
5934         INIT_LIST_HEAD(&rec->list);
5935         INIT_LIST_HEAD(&rec->dextents);
5936         rec->bg_rec = NULL;
5937
5938         rec->cache.start = key->offset;
5939         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5940
5941         rec->generation = btrfs_header_generation(leaf);
5942
5943         rec->objectid = key->objectid;
5944         rec->type = key->type;
5945         rec->offset = key->offset;
5946
5947         rec->length = rec->cache.size;
5948         rec->owner = btrfs_chunk_owner(leaf, ptr);
5949         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5950         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5951         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5952         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5953         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5954         rec->num_stripes = num_stripes;
5955         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5956
5957         for (i = 0; i < rec->num_stripes; ++i) {
5958                 rec->stripes[i].devid =
5959                         btrfs_stripe_devid_nr(leaf, ptr, i);
5960                 rec->stripes[i].offset =
5961                         btrfs_stripe_offset_nr(leaf, ptr, i);
5962                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5963                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5964                                 BTRFS_UUID_SIZE);
5965         }
5966
5967         return rec;
5968 }
5969
5970 static int process_chunk_item(struct cache_tree *chunk_cache,
5971                               struct btrfs_key *key, struct extent_buffer *eb,
5972                               int slot)
5973 {
5974         struct chunk_record *rec;
5975         struct btrfs_chunk *chunk;
5976         int ret = 0;
5977
5978         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5979         /*
5980          * Do extra check for this chunk item,
5981          *
5982          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5983          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5984          * and owner<->key_type check.
5985          */
5986         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5987                                       key->offset);
5988         if (ret < 0) {
5989                 error("chunk(%llu, %llu) is not valid, ignore it",
5990                       key->offset, btrfs_chunk_length(eb, chunk));
5991                 return 0;
5992         }
5993         rec = btrfs_new_chunk_record(eb, key, slot);
5994         ret = insert_cache_extent(chunk_cache, &rec->cache);
5995         if (ret) {
5996                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5997                         rec->offset, rec->length);
5998                 free(rec);
5999         }
6000
6001         return ret;
6002 }
6003
6004 static int process_device_item(struct rb_root *dev_cache,
6005                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6006 {
6007         struct btrfs_dev_item *ptr;
6008         struct device_record *rec;
6009         int ret = 0;
6010
6011         ptr = btrfs_item_ptr(eb,
6012                 slot, struct btrfs_dev_item);
6013
6014         rec = malloc(sizeof(*rec));
6015         if (!rec) {
6016                 fprintf(stderr, "memory allocation failed\n");
6017                 return -ENOMEM;
6018         }
6019
6020         rec->devid = key->offset;
6021         rec->generation = btrfs_header_generation(eb);
6022
6023         rec->objectid = key->objectid;
6024         rec->type = key->type;
6025         rec->offset = key->offset;
6026
6027         rec->devid = btrfs_device_id(eb, ptr);
6028         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6029         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6030
6031         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6032         if (ret) {
6033                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6034                 free(rec);
6035         }
6036
6037         return ret;
6038 }
6039
6040 struct block_group_record *
6041 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6042                              int slot)
6043 {
6044         struct btrfs_block_group_item *ptr;
6045         struct block_group_record *rec;
6046
6047         rec = calloc(1, sizeof(*rec));
6048         if (!rec) {
6049                 fprintf(stderr, "memory allocation failed\n");
6050                 exit(-1);
6051         }
6052
6053         rec->cache.start = key->objectid;
6054         rec->cache.size = key->offset;
6055
6056         rec->generation = btrfs_header_generation(leaf);
6057
6058         rec->objectid = key->objectid;
6059         rec->type = key->type;
6060         rec->offset = key->offset;
6061
6062         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6063         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6064
6065         INIT_LIST_HEAD(&rec->list);
6066
6067         return rec;
6068 }
6069
6070 static int process_block_group_item(struct block_group_tree *block_group_cache,
6071                                     struct btrfs_key *key,
6072                                     struct extent_buffer *eb, int slot)
6073 {
6074         struct block_group_record *rec;
6075         int ret = 0;
6076
6077         rec = btrfs_new_block_group_record(eb, key, slot);
6078         ret = insert_block_group_record(block_group_cache, rec);
6079         if (ret) {
6080                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6081                         rec->objectid, rec->offset);
6082                 free(rec);
6083         }
6084
6085         return ret;
6086 }
6087
6088 struct device_extent_record *
6089 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6090                                struct btrfs_key *key, int slot)
6091 {
6092         struct device_extent_record *rec;
6093         struct btrfs_dev_extent *ptr;
6094
6095         rec = calloc(1, sizeof(*rec));
6096         if (!rec) {
6097                 fprintf(stderr, "memory allocation failed\n");
6098                 exit(-1);
6099         }
6100
6101         rec->cache.objectid = key->objectid;
6102         rec->cache.start = key->offset;
6103
6104         rec->generation = btrfs_header_generation(leaf);
6105
6106         rec->objectid = key->objectid;
6107         rec->type = key->type;
6108         rec->offset = key->offset;
6109
6110         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6111         rec->chunk_objecteid =
6112                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6113         rec->chunk_offset =
6114                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6115         rec->length = btrfs_dev_extent_length(leaf, ptr);
6116         rec->cache.size = rec->length;
6117
6118         INIT_LIST_HEAD(&rec->chunk_list);
6119         INIT_LIST_HEAD(&rec->device_list);
6120
6121         return rec;
6122 }
6123
6124 static int
6125 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6126                            struct btrfs_key *key, struct extent_buffer *eb,
6127                            int slot)
6128 {
6129         struct device_extent_record *rec;
6130         int ret;
6131
6132         rec = btrfs_new_device_extent_record(eb, key, slot);
6133         ret = insert_device_extent_record(dev_extent_cache, rec);
6134         if (ret) {
6135                 fprintf(stderr,
6136                         "Device extent[%llu, %llu, %llu] existed.\n",
6137                         rec->objectid, rec->offset, rec->length);
6138                 free(rec);
6139         }
6140
6141         return ret;
6142 }
6143
6144 static int process_extent_item(struct btrfs_root *root,
6145                                struct cache_tree *extent_cache,
6146                                struct extent_buffer *eb, int slot)
6147 {
6148         struct btrfs_extent_item *ei;
6149         struct btrfs_extent_inline_ref *iref;
6150         struct btrfs_extent_data_ref *dref;
6151         struct btrfs_shared_data_ref *sref;
6152         struct btrfs_key key;
6153         struct extent_record tmpl;
6154         unsigned long end;
6155         unsigned long ptr;
6156         int ret;
6157         int type;
6158         u32 item_size = btrfs_item_size_nr(eb, slot);
6159         u64 refs = 0;
6160         u64 offset;
6161         u64 num_bytes;
6162         int metadata = 0;
6163
6164         btrfs_item_key_to_cpu(eb, &key, slot);
6165
6166         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6167                 metadata = 1;
6168                 num_bytes = root->nodesize;
6169         } else {
6170                 num_bytes = key.offset;
6171         }
6172
6173         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6174                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6175                       key.objectid, root->sectorsize);
6176                 return -EIO;
6177         }
6178         if (item_size < sizeof(*ei)) {
6179 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6180                 struct btrfs_extent_item_v0 *ei0;
6181                 BUG_ON(item_size != sizeof(*ei0));
6182                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6183                 refs = btrfs_extent_refs_v0(eb, ei0);
6184 #else
6185                 BUG();
6186 #endif
6187                 memset(&tmpl, 0, sizeof(tmpl));
6188                 tmpl.start = key.objectid;
6189                 tmpl.nr = num_bytes;
6190                 tmpl.extent_item_refs = refs;
6191                 tmpl.metadata = metadata;
6192                 tmpl.found_rec = 1;
6193                 tmpl.max_size = num_bytes;
6194
6195                 return add_extent_rec(extent_cache, &tmpl);
6196         }
6197
6198         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6199         refs = btrfs_extent_refs(eb, ei);
6200         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6201                 metadata = 1;
6202         else
6203                 metadata = 0;
6204         if (metadata && num_bytes != root->nodesize) {
6205                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6206                       num_bytes, root->nodesize);
6207                 return -EIO;
6208         }
6209         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6210                 error("ignore invalid data extent, length %llu is not aligned to %u",
6211                       num_bytes, root->sectorsize);
6212                 return -EIO;
6213         }
6214
6215         memset(&tmpl, 0, sizeof(tmpl));
6216         tmpl.start = key.objectid;
6217         tmpl.nr = num_bytes;
6218         tmpl.extent_item_refs = refs;
6219         tmpl.metadata = metadata;
6220         tmpl.found_rec = 1;
6221         tmpl.max_size = num_bytes;
6222         add_extent_rec(extent_cache, &tmpl);
6223
6224         ptr = (unsigned long)(ei + 1);
6225         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6226             key.type == BTRFS_EXTENT_ITEM_KEY)
6227                 ptr += sizeof(struct btrfs_tree_block_info);
6228
6229         end = (unsigned long)ei + item_size;
6230         while (ptr < end) {
6231                 iref = (struct btrfs_extent_inline_ref *)ptr;
6232                 type = btrfs_extent_inline_ref_type(eb, iref);
6233                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6234                 switch (type) {
6235                 case BTRFS_TREE_BLOCK_REF_KEY:
6236                         ret = add_tree_backref(extent_cache, key.objectid,
6237                                         0, offset, 0);
6238                         if (ret < 0)
6239                                 error("add_tree_backref failed: %s",
6240                                       strerror(-ret));
6241                         break;
6242                 case BTRFS_SHARED_BLOCK_REF_KEY:
6243                         ret = add_tree_backref(extent_cache, key.objectid,
6244                                         offset, 0, 0);
6245                         if (ret < 0)
6246                                 error("add_tree_backref failed: %s",
6247                                       strerror(-ret));
6248                         break;
6249                 case BTRFS_EXTENT_DATA_REF_KEY:
6250                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6251                         add_data_backref(extent_cache, key.objectid, 0,
6252                                         btrfs_extent_data_ref_root(eb, dref),
6253                                         btrfs_extent_data_ref_objectid(eb,
6254                                                                        dref),
6255                                         btrfs_extent_data_ref_offset(eb, dref),
6256                                         btrfs_extent_data_ref_count(eb, dref),
6257                                         0, num_bytes);
6258                         break;
6259                 case BTRFS_SHARED_DATA_REF_KEY:
6260                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6261                         add_data_backref(extent_cache, key.objectid, offset,
6262                                         0, 0, 0,
6263                                         btrfs_shared_data_ref_count(eb, sref),
6264                                         0, num_bytes);
6265                         break;
6266                 default:
6267                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6268                                 key.objectid, key.type, num_bytes);
6269                         goto out;
6270                 }
6271                 ptr += btrfs_extent_inline_ref_size(type);
6272         }
6273         WARN_ON(ptr > end);
6274 out:
6275         return 0;
6276 }
6277
6278 static int check_cache_range(struct btrfs_root *root,
6279                              struct btrfs_block_group_cache *cache,
6280                              u64 offset, u64 bytes)
6281 {
6282         struct btrfs_free_space *entry;
6283         u64 *logical;
6284         u64 bytenr;
6285         int stripe_len;
6286         int i, nr, ret;
6287
6288         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6289                 bytenr = btrfs_sb_offset(i);
6290                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6291                                        cache->key.objectid, bytenr, 0,
6292                                        &logical, &nr, &stripe_len);
6293                 if (ret)
6294                         return ret;
6295
6296                 while (nr--) {
6297                         if (logical[nr] + stripe_len <= offset)
6298                                 continue;
6299                         if (offset + bytes <= logical[nr])
6300                                 continue;
6301                         if (logical[nr] == offset) {
6302                                 if (stripe_len >= bytes) {
6303                                         free(logical);
6304                                         return 0;
6305                                 }
6306                                 bytes -= stripe_len;
6307                                 offset += stripe_len;
6308                         } else if (logical[nr] < offset) {
6309                                 if (logical[nr] + stripe_len >=
6310                                     offset + bytes) {
6311                                         free(logical);
6312                                         return 0;
6313                                 }
6314                                 bytes = (offset + bytes) -
6315                                         (logical[nr] + stripe_len);
6316                                 offset = logical[nr] + stripe_len;
6317                         } else {
6318                                 /*
6319                                  * Could be tricky, the super may land in the
6320                                  * middle of the area we're checking.  First
6321                                  * check the easiest case, it's at the end.
6322                                  */
6323                                 if (logical[nr] + stripe_len >=
6324                                     bytes + offset) {
6325                                         bytes = logical[nr] - offset;
6326                                         continue;
6327                                 }
6328
6329                                 /* Check the left side */
6330                                 ret = check_cache_range(root, cache,
6331                                                         offset,
6332                                                         logical[nr] - offset);
6333                                 if (ret) {
6334                                         free(logical);
6335                                         return ret;
6336                                 }
6337
6338                                 /* Now we continue with the right side */
6339                                 bytes = (offset + bytes) -
6340                                         (logical[nr] + stripe_len);
6341                                 offset = logical[nr] + stripe_len;
6342                         }
6343                 }
6344
6345                 free(logical);
6346         }
6347
6348         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6349         if (!entry) {
6350                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6351                         offset, offset+bytes);
6352                 return -EINVAL;
6353         }
6354
6355         if (entry->offset != offset) {
6356                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6357                         entry->offset);
6358                 return -EINVAL;
6359         }
6360
6361         if (entry->bytes != bytes) {
6362                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6363                         bytes, entry->bytes, offset);
6364                 return -EINVAL;
6365         }
6366
6367         unlink_free_space(cache->free_space_ctl, entry);
6368         free(entry);
6369         return 0;
6370 }
6371
6372 static int verify_space_cache(struct btrfs_root *root,
6373                               struct btrfs_block_group_cache *cache)
6374 {
6375         struct btrfs_path path;
6376         struct extent_buffer *leaf;
6377         struct btrfs_key key;
6378         u64 last;
6379         int ret = 0;
6380
6381         root = root->fs_info->extent_root;
6382
6383         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6384
6385         btrfs_init_path(&path);
6386         key.objectid = last;
6387         key.offset = 0;
6388         key.type = BTRFS_EXTENT_ITEM_KEY;
6389         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6390         if (ret < 0)
6391                 goto out;
6392         ret = 0;
6393         while (1) {
6394                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6395                         ret = btrfs_next_leaf(root, &path);
6396                         if (ret < 0)
6397                                 goto out;
6398                         if (ret > 0) {
6399                                 ret = 0;
6400                                 break;
6401                         }
6402                 }
6403                 leaf = path.nodes[0];
6404                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6405                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6406                         break;
6407                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6408                     key.type != BTRFS_METADATA_ITEM_KEY) {
6409                         path.slots[0]++;
6410                         continue;
6411                 }
6412
6413                 if (last == key.objectid) {
6414                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6415                                 last = key.objectid + key.offset;
6416                         else
6417                                 last = key.objectid + root->nodesize;
6418                         path.slots[0]++;
6419                         continue;
6420                 }
6421
6422                 ret = check_cache_range(root, cache, last,
6423                                         key.objectid - last);
6424                 if (ret)
6425                         break;
6426                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6427                         last = key.objectid + key.offset;
6428                 else
6429                         last = key.objectid + root->nodesize;
6430                 path.slots[0]++;
6431         }
6432
6433         if (last < cache->key.objectid + cache->key.offset)
6434                 ret = check_cache_range(root, cache, last,
6435                                         cache->key.objectid +
6436                                         cache->key.offset - last);
6437
6438 out:
6439         btrfs_release_path(&path);
6440
6441         if (!ret &&
6442             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6443                 fprintf(stderr, "There are still entries left in the space "
6444                         "cache\n");
6445                 ret = -EINVAL;
6446         }
6447
6448         return ret;
6449 }
6450
6451 static int check_space_cache(struct btrfs_root *root)
6452 {
6453         struct btrfs_block_group_cache *cache;
6454         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6455         int ret;
6456         int error = 0;
6457
6458         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6459             btrfs_super_generation(root->fs_info->super_copy) !=
6460             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6461                 printf("cache and super generation don't match, space cache "
6462                        "will be invalidated\n");
6463                 return 0;
6464         }
6465
6466         if (ctx.progress_enabled) {
6467                 ctx.tp = TASK_FREE_SPACE;
6468                 task_start(ctx.info);
6469         }
6470
6471         while (1) {
6472                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6473                 if (!cache)
6474                         break;
6475
6476                 start = cache->key.objectid + cache->key.offset;
6477                 if (!cache->free_space_ctl) {
6478                         if (btrfs_init_free_space_ctl(cache,
6479                                                       root->sectorsize)) {
6480                                 ret = -ENOMEM;
6481                                 break;
6482                         }
6483                 } else {
6484                         btrfs_remove_free_space_cache(cache);
6485                 }
6486
6487                 if (btrfs_fs_compat_ro(root->fs_info,
6488                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6489                         ret = exclude_super_stripes(root, cache);
6490                         if (ret) {
6491                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6492                                         strerror(-ret));
6493                                 error++;
6494                                 continue;
6495                         }
6496                         ret = load_free_space_tree(root->fs_info, cache);
6497                         free_excluded_extents(root, cache);
6498                         if (ret < 0) {
6499                                 fprintf(stderr, "could not load free space tree: %s\n",
6500                                         strerror(-ret));
6501                                 error++;
6502                                 continue;
6503                         }
6504                         error += ret;
6505                 } else {
6506                         ret = load_free_space_cache(root->fs_info, cache);
6507                         if (!ret)
6508                                 continue;
6509                 }
6510
6511                 ret = verify_space_cache(root, cache);
6512                 if (ret) {
6513                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6514                                 cache->key.objectid);
6515                         error++;
6516                 }
6517         }
6518
6519         task_stop(ctx.info);
6520
6521         return error ? -EINVAL : 0;
6522 }
6523
6524 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6525                         u64 num_bytes, unsigned long leaf_offset,
6526                         struct extent_buffer *eb) {
6527
6528         u64 offset = 0;
6529         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6530         char *data;
6531         unsigned long csum_offset;
6532         u32 csum;
6533         u32 csum_expected;
6534         u64 read_len;
6535         u64 data_checked = 0;
6536         u64 tmp;
6537         int ret = 0;
6538         int mirror;
6539         int num_copies;
6540
6541         if (num_bytes % root->sectorsize)
6542                 return -EINVAL;
6543
6544         data = malloc(num_bytes);
6545         if (!data)
6546                 return -ENOMEM;
6547
6548         while (offset < num_bytes) {
6549                 mirror = 0;
6550 again:
6551                 read_len = num_bytes - offset;
6552                 /* read as much space once a time */
6553                 ret = read_extent_data(root, data + offset,
6554                                 bytenr + offset, &read_len, mirror);
6555                 if (ret)
6556                         goto out;
6557                 data_checked = 0;
6558                 /* verify every 4k data's checksum */
6559                 while (data_checked < read_len) {
6560                         csum = ~(u32)0;
6561                         tmp = offset + data_checked;
6562
6563                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6564                                                csum, root->sectorsize);
6565                         btrfs_csum_final(csum, (u8 *)&csum);
6566
6567                         csum_offset = leaf_offset +
6568                                  tmp / root->sectorsize * csum_size;
6569                         read_extent_buffer(eb, (char *)&csum_expected,
6570                                            csum_offset, csum_size);
6571                         /* try another mirror */
6572                         if (csum != csum_expected) {
6573                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6574                                                 mirror, bytenr + tmp,
6575                                                 csum, csum_expected);
6576                                 num_copies = btrfs_num_copies(
6577                                                 &root->fs_info->mapping_tree,
6578                                                 bytenr, num_bytes);
6579                                 if (mirror < num_copies - 1) {
6580                                         mirror += 1;
6581                                         goto again;
6582                                 }
6583                         }
6584                         data_checked += root->sectorsize;
6585                 }
6586                 offset += read_len;
6587         }
6588 out:
6589         free(data);
6590         return ret;
6591 }
6592
6593 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6594                                u64 num_bytes)
6595 {
6596         struct btrfs_path path;
6597         struct extent_buffer *leaf;
6598         struct btrfs_key key;
6599         int ret;
6600
6601         btrfs_init_path(&path);
6602         key.objectid = bytenr;
6603         key.type = BTRFS_EXTENT_ITEM_KEY;
6604         key.offset = (u64)-1;
6605
6606 again:
6607         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6608                                 0, 0);
6609         if (ret < 0) {
6610                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6611                 btrfs_release_path(&path);
6612                 return ret;
6613         } else if (ret) {
6614                 if (path.slots[0] > 0) {
6615                         path.slots[0]--;
6616                 } else {
6617                         ret = btrfs_prev_leaf(root, &path);
6618                         if (ret < 0) {
6619                                 goto out;
6620                         } else if (ret > 0) {
6621                                 ret = 0;
6622                                 goto out;
6623                         }
6624                 }
6625         }
6626
6627         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6628
6629         /*
6630          * Block group items come before extent items if they have the same
6631          * bytenr, so walk back one more just in case.  Dear future traveller,
6632          * first congrats on mastering time travel.  Now if it's not too much
6633          * trouble could you go back to 2006 and tell Chris to make the
6634          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6635          * EXTENT_ITEM_KEY please?
6636          */
6637         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6638                 if (path.slots[0] > 0) {
6639                         path.slots[0]--;
6640                 } else {
6641                         ret = btrfs_prev_leaf(root, &path);
6642                         if (ret < 0) {
6643                                 goto out;
6644                         } else if (ret > 0) {
6645                                 ret = 0;
6646                                 goto out;
6647                         }
6648                 }
6649                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6650         }
6651
6652         while (num_bytes) {
6653                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6654                         ret = btrfs_next_leaf(root, &path);
6655                         if (ret < 0) {
6656                                 fprintf(stderr, "Error going to next leaf "
6657                                         "%d\n", ret);
6658                                 btrfs_release_path(&path);
6659                                 return ret;
6660                         } else if (ret) {
6661                                 break;
6662                         }
6663                 }
6664                 leaf = path.nodes[0];
6665                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6666                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6667                         path.slots[0]++;
6668                         continue;
6669                 }
6670                 if (key.objectid + key.offset < bytenr) {
6671                         path.slots[0]++;
6672                         continue;
6673                 }
6674                 if (key.objectid > bytenr + num_bytes)
6675                         break;
6676
6677                 if (key.objectid == bytenr) {
6678                         if (key.offset >= num_bytes) {
6679                                 num_bytes = 0;
6680                                 break;
6681                         }
6682                         num_bytes -= key.offset;
6683                         bytenr += key.offset;
6684                 } else if (key.objectid < bytenr) {
6685                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6686                                 num_bytes = 0;
6687                                 break;
6688                         }
6689                         num_bytes = (bytenr + num_bytes) -
6690                                 (key.objectid + key.offset);
6691                         bytenr = key.objectid + key.offset;
6692                 } else {
6693                         if (key.objectid + key.offset < bytenr + num_bytes) {
6694                                 u64 new_start = key.objectid + key.offset;
6695                                 u64 new_bytes = bytenr + num_bytes - new_start;
6696
6697                                 /*
6698                                  * Weird case, the extent is in the middle of
6699                                  * our range, we'll have to search one side
6700                                  * and then the other.  Not sure if this happens
6701                                  * in real life, but no harm in coding it up
6702                                  * anyway just in case.
6703                                  */
6704                                 btrfs_release_path(&path);
6705                                 ret = check_extent_exists(root, new_start,
6706                                                           new_bytes);
6707                                 if (ret) {
6708                                         fprintf(stderr, "Right section didn't "
6709                                                 "have a record\n");
6710                                         break;
6711                                 }
6712                                 num_bytes = key.objectid - bytenr;
6713                                 goto again;
6714                         }
6715                         num_bytes = key.objectid - bytenr;
6716                 }
6717                 path.slots[0]++;
6718         }
6719         ret = 0;
6720
6721 out:
6722         if (num_bytes && !ret) {
6723                 fprintf(stderr, "There are no extents for csum range "
6724                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6725                 ret = 1;
6726         }
6727
6728         btrfs_release_path(&path);
6729         return ret;
6730 }
6731
6732 static int check_csums(struct btrfs_root *root)
6733 {
6734         struct btrfs_path path;
6735         struct extent_buffer *leaf;
6736         struct btrfs_key key;
6737         u64 offset = 0, num_bytes = 0;
6738         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6739         int errors = 0;
6740         int ret;
6741         u64 data_len;
6742         unsigned long leaf_offset;
6743
6744         root = root->fs_info->csum_root;
6745         if (!extent_buffer_uptodate(root->node)) {
6746                 fprintf(stderr, "No valid csum tree found\n");
6747                 return -ENOENT;
6748         }
6749
6750         btrfs_init_path(&path);
6751         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6752         key.type = BTRFS_EXTENT_CSUM_KEY;
6753         key.offset = 0;
6754         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6755         if (ret < 0) {
6756                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6757                 btrfs_release_path(&path);
6758                 return ret;
6759         }
6760
6761         if (ret > 0 && path.slots[0])
6762                 path.slots[0]--;
6763         ret = 0;
6764
6765         while (1) {
6766                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6767                         ret = btrfs_next_leaf(root, &path);
6768                         if (ret < 0) {
6769                                 fprintf(stderr, "Error going to next leaf "
6770                                         "%d\n", ret);
6771                                 break;
6772                         }
6773                         if (ret)
6774                                 break;
6775                 }
6776                 leaf = path.nodes[0];
6777
6778                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6779                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6780                         path.slots[0]++;
6781                         continue;
6782                 }
6783
6784                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6785                               csum_size) * root->sectorsize;
6786                 if (!check_data_csum)
6787                         goto skip_csum_check;
6788                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6789                 ret = check_extent_csums(root, key.offset, data_len,
6790                                          leaf_offset, leaf);
6791                 if (ret)
6792                         break;
6793 skip_csum_check:
6794                 if (!num_bytes) {
6795                         offset = key.offset;
6796                 } else if (key.offset != offset + num_bytes) {
6797                         ret = check_extent_exists(root, offset, num_bytes);
6798                         if (ret) {
6799                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6800                                         "there is no extent record\n",
6801                                         offset, offset+num_bytes);
6802                                 errors++;
6803                         }
6804                         offset = key.offset;
6805                         num_bytes = 0;
6806                 }
6807                 num_bytes += data_len;
6808                 path.slots[0]++;
6809         }
6810
6811         btrfs_release_path(&path);
6812         return errors;
6813 }
6814
6815 static int is_dropped_key(struct btrfs_key *key,
6816                           struct btrfs_key *drop_key) {
6817         if (key->objectid < drop_key->objectid)
6818                 return 1;
6819         else if (key->objectid == drop_key->objectid) {
6820                 if (key->type < drop_key->type)
6821                         return 1;
6822                 else if (key->type == drop_key->type) {
6823                         if (key->offset < drop_key->offset)
6824                                 return 1;
6825                 }
6826         }
6827         return 0;
6828 }
6829
6830 /*
6831  * Here are the rules for FULL_BACKREF.
6832  *
6833  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6834  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6835  *      FULL_BACKREF set.
6836  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6837  *    if it happened after the relocation occurred since we'll have dropped the
6838  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6839  *    have no real way to know for sure.
6840  *
6841  * We process the blocks one root at a time, and we start from the lowest root
6842  * objectid and go to the highest.  So we can just lookup the owner backref for
6843  * the record and if we don't find it then we know it doesn't exist and we have
6844  * a FULL BACKREF.
6845  *
6846  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6847  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6848  * be set or not and then we can check later once we've gathered all the refs.
6849  */
6850 static int calc_extent_flag(struct btrfs_root *root,
6851                            struct cache_tree *extent_cache,
6852                            struct extent_buffer *buf,
6853                            struct root_item_record *ri,
6854                            u64 *flags)
6855 {
6856         struct extent_record *rec;
6857         struct cache_extent *cache;
6858         struct tree_backref *tback;
6859         u64 owner = 0;
6860
6861         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6862         /* we have added this extent before */
6863         if (!cache)
6864                 return -ENOENT;
6865
6866         rec = container_of(cache, struct extent_record, cache);
6867
6868         /*
6869          * Except file/reloc tree, we can not have
6870          * FULL BACKREF MODE
6871          */
6872         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6873                 goto normal;
6874         /*
6875          * root node
6876          */
6877         if (buf->start == ri->bytenr)
6878                 goto normal;
6879
6880         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6881                 goto full_backref;
6882
6883         owner = btrfs_header_owner(buf);
6884         if (owner == ri->objectid)
6885                 goto normal;
6886
6887         tback = find_tree_backref(rec, 0, owner);
6888         if (!tback)
6889                 goto full_backref;
6890 normal:
6891         *flags = 0;
6892         if (rec->flag_block_full_backref != FLAG_UNSET &&
6893             rec->flag_block_full_backref != 0)
6894                 rec->bad_full_backref = 1;
6895         return 0;
6896 full_backref:
6897         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6898         if (rec->flag_block_full_backref != FLAG_UNSET &&
6899             rec->flag_block_full_backref != 1)
6900                 rec->bad_full_backref = 1;
6901         return 0;
6902 }
6903
6904 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6905 {
6906         fprintf(stderr, "Invalid key type(");
6907         print_key_type(stderr, 0, key_type);
6908         fprintf(stderr, ") found in root(");
6909         print_objectid(stderr, rootid, 0);
6910         fprintf(stderr, ")\n");
6911 }
6912
6913 /*
6914  * Check if the key is valid with its extent buffer.
6915  *
6916  * This is a early check in case invalid key exists in a extent buffer
6917  * This is not comprehensive yet, but should prevent wrong key/item passed
6918  * further
6919  */
6920 static int check_type_with_root(u64 rootid, u8 key_type)
6921 {
6922         switch (key_type) {
6923         /* Only valid in chunk tree */
6924         case BTRFS_DEV_ITEM_KEY:
6925         case BTRFS_CHUNK_ITEM_KEY:
6926                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6927                         goto err;
6928                 break;
6929         /* valid in csum and log tree */
6930         case BTRFS_CSUM_TREE_OBJECTID:
6931                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6932                       is_fstree(rootid)))
6933                         goto err;
6934                 break;
6935         case BTRFS_EXTENT_ITEM_KEY:
6936         case BTRFS_METADATA_ITEM_KEY:
6937         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6938                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6939                         goto err;
6940                 break;
6941         case BTRFS_ROOT_ITEM_KEY:
6942                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6943                         goto err;
6944                 break;
6945         case BTRFS_DEV_EXTENT_KEY:
6946                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6947                         goto err;
6948                 break;
6949         }
6950         return 0;
6951 err:
6952         report_mismatch_key_root(key_type, rootid);
6953         return -EINVAL;
6954 }
6955
6956 static int run_next_block(struct btrfs_root *root,
6957                           struct block_info *bits,
6958                           int bits_nr,
6959                           u64 *last,
6960                           struct cache_tree *pending,
6961                           struct cache_tree *seen,
6962                           struct cache_tree *reada,
6963                           struct cache_tree *nodes,
6964                           struct cache_tree *extent_cache,
6965                           struct cache_tree *chunk_cache,
6966                           struct rb_root *dev_cache,
6967                           struct block_group_tree *block_group_cache,
6968                           struct device_extent_tree *dev_extent_cache,
6969                           struct root_item_record *ri)
6970 {
6971         struct extent_buffer *buf;
6972         struct extent_record *rec = NULL;
6973         u64 bytenr;
6974         u32 size;
6975         u64 parent;
6976         u64 owner;
6977         u64 flags;
6978         u64 ptr;
6979         u64 gen = 0;
6980         int ret = 0;
6981         int i;
6982         int nritems;
6983         struct btrfs_key key;
6984         struct cache_extent *cache;
6985         int reada_bits;
6986
6987         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6988                                     bits_nr, &reada_bits);
6989         if (nritems == 0)
6990                 return 1;
6991
6992         if (!reada_bits) {
6993                 for(i = 0; i < nritems; i++) {
6994                         ret = add_cache_extent(reada, bits[i].start,
6995                                                bits[i].size);
6996                         if (ret == -EEXIST)
6997                                 continue;
6998
6999                         /* fixme, get the parent transid */
7000                         readahead_tree_block(root, bits[i].start,
7001                                              bits[i].size, 0);
7002                 }
7003         }
7004         *last = bits[0].start;
7005         bytenr = bits[0].start;
7006         size = bits[0].size;
7007
7008         cache = lookup_cache_extent(pending, bytenr, size);
7009         if (cache) {
7010                 remove_cache_extent(pending, cache);
7011                 free(cache);
7012         }
7013         cache = lookup_cache_extent(reada, bytenr, size);
7014         if (cache) {
7015                 remove_cache_extent(reada, cache);
7016                 free(cache);
7017         }
7018         cache = lookup_cache_extent(nodes, bytenr, size);
7019         if (cache) {
7020                 remove_cache_extent(nodes, cache);
7021                 free(cache);
7022         }
7023         cache = lookup_cache_extent(extent_cache, bytenr, size);
7024         if (cache) {
7025                 rec = container_of(cache, struct extent_record, cache);
7026                 gen = rec->parent_generation;
7027         }
7028
7029         /* fixme, get the real parent transid */
7030         buf = read_tree_block(root, bytenr, size, gen);
7031         if (!extent_buffer_uptodate(buf)) {
7032                 record_bad_block_io(root->fs_info,
7033                                     extent_cache, bytenr, size);
7034                 goto out;
7035         }
7036
7037         nritems = btrfs_header_nritems(buf);
7038
7039         flags = 0;
7040         if (!init_extent_tree) {
7041                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7042                                        btrfs_header_level(buf), 1, NULL,
7043                                        &flags);
7044                 if (ret < 0) {
7045                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7046                         if (ret < 0) {
7047                                 fprintf(stderr, "Couldn't calc extent flags\n");
7048                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7049                         }
7050                 }
7051         } else {
7052                 flags = 0;
7053                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7054                 if (ret < 0) {
7055                         fprintf(stderr, "Couldn't calc extent flags\n");
7056                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7057                 }
7058         }
7059
7060         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7061                 if (ri != NULL &&
7062                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7063                     ri->objectid == btrfs_header_owner(buf)) {
7064                         /*
7065                          * Ok we got to this block from it's original owner and
7066                          * we have FULL_BACKREF set.  Relocation can leave
7067                          * converted blocks over so this is altogether possible,
7068                          * however it's not possible if the generation > the
7069                          * last snapshot, so check for this case.
7070                          */
7071                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7072                             btrfs_header_generation(buf) > ri->last_snapshot) {
7073                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7074                                 rec->bad_full_backref = 1;
7075                         }
7076                 }
7077         } else {
7078                 if (ri != NULL &&
7079                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7080                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7081                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7082                         rec->bad_full_backref = 1;
7083                 }
7084         }
7085
7086         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7087                 rec->flag_block_full_backref = 1;
7088                 parent = bytenr;
7089                 owner = 0;
7090         } else {
7091                 rec->flag_block_full_backref = 0;
7092                 parent = 0;
7093                 owner = btrfs_header_owner(buf);
7094         }
7095
7096         ret = check_block(root, extent_cache, buf, flags);
7097         if (ret)
7098                 goto out;
7099
7100         if (btrfs_is_leaf(buf)) {
7101                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7102                 for (i = 0; i < nritems; i++) {
7103                         struct btrfs_file_extent_item *fi;
7104                         btrfs_item_key_to_cpu(buf, &key, i);
7105                         /*
7106                          * Check key type against the leaf owner.
7107                          * Could filter quite a lot of early error if
7108                          * owner is correct
7109                          */
7110                         if (check_type_with_root(btrfs_header_owner(buf),
7111                                                  key.type)) {
7112                                 fprintf(stderr, "ignoring invalid key\n");
7113                                 continue;
7114                         }
7115                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7116                                 process_extent_item(root, extent_cache, buf,
7117                                                     i);
7118                                 continue;
7119                         }
7120                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7121                                 process_extent_item(root, extent_cache, buf,
7122                                                     i);
7123                                 continue;
7124                         }
7125                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7126                                 total_csum_bytes +=
7127                                         btrfs_item_size_nr(buf, i);
7128                                 continue;
7129                         }
7130                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7131                                 process_chunk_item(chunk_cache, &key, buf, i);
7132                                 continue;
7133                         }
7134                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7135                                 process_device_item(dev_cache, &key, buf, i);
7136                                 continue;
7137                         }
7138                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7139                                 process_block_group_item(block_group_cache,
7140                                         &key, buf, i);
7141                                 continue;
7142                         }
7143                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7144                                 process_device_extent_item(dev_extent_cache,
7145                                         &key, buf, i);
7146                                 continue;
7147
7148                         }
7149                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7150 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7151                                 process_extent_ref_v0(extent_cache, buf, i);
7152 #else
7153                                 BUG();
7154 #endif
7155                                 continue;
7156                         }
7157
7158                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7159                                 ret = add_tree_backref(extent_cache,
7160                                                 key.objectid, 0, key.offset, 0);
7161                                 if (ret < 0)
7162                                         error("add_tree_backref failed: %s",
7163                                               strerror(-ret));
7164                                 continue;
7165                         }
7166                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7167                                 ret = add_tree_backref(extent_cache,
7168                                                 key.objectid, key.offset, 0, 0);
7169                                 if (ret < 0)
7170                                         error("add_tree_backref failed: %s",
7171                                               strerror(-ret));
7172                                 continue;
7173                         }
7174                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7175                                 struct btrfs_extent_data_ref *ref;
7176                                 ref = btrfs_item_ptr(buf, i,
7177                                                 struct btrfs_extent_data_ref);
7178                                 add_data_backref(extent_cache,
7179                                         key.objectid, 0,
7180                                         btrfs_extent_data_ref_root(buf, ref),
7181                                         btrfs_extent_data_ref_objectid(buf,
7182                                                                        ref),
7183                                         btrfs_extent_data_ref_offset(buf, ref),
7184                                         btrfs_extent_data_ref_count(buf, ref),
7185                                         0, root->sectorsize);
7186                                 continue;
7187                         }
7188                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7189                                 struct btrfs_shared_data_ref *ref;
7190                                 ref = btrfs_item_ptr(buf, i,
7191                                                 struct btrfs_shared_data_ref);
7192                                 add_data_backref(extent_cache,
7193                                         key.objectid, key.offset, 0, 0, 0,
7194                                         btrfs_shared_data_ref_count(buf, ref),
7195                                         0, root->sectorsize);
7196                                 continue;
7197                         }
7198                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7199                                 struct bad_item *bad;
7200
7201                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7202                                         continue;
7203                                 if (!owner)
7204                                         continue;
7205                                 bad = malloc(sizeof(struct bad_item));
7206                                 if (!bad)
7207                                         continue;
7208                                 INIT_LIST_HEAD(&bad->list);
7209                                 memcpy(&bad->key, &key,
7210                                        sizeof(struct btrfs_key));
7211                                 bad->root_id = owner;
7212                                 list_add_tail(&bad->list, &delete_items);
7213                                 continue;
7214                         }
7215                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7216                                 continue;
7217                         fi = btrfs_item_ptr(buf, i,
7218                                             struct btrfs_file_extent_item);
7219                         if (btrfs_file_extent_type(buf, fi) ==
7220                             BTRFS_FILE_EXTENT_INLINE)
7221                                 continue;
7222                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7223                                 continue;
7224
7225                         data_bytes_allocated +=
7226                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7227                         if (data_bytes_allocated < root->sectorsize) {
7228                                 abort();
7229                         }
7230                         data_bytes_referenced +=
7231                                 btrfs_file_extent_num_bytes(buf, fi);
7232                         add_data_backref(extent_cache,
7233                                 btrfs_file_extent_disk_bytenr(buf, fi),
7234                                 parent, owner, key.objectid, key.offset -
7235                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7236                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7237                 }
7238         } else {
7239                 int level;
7240                 struct btrfs_key first_key;
7241
7242                 first_key.objectid = 0;
7243
7244                 if (nritems > 0)
7245                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7246                 level = btrfs_header_level(buf);
7247                 for (i = 0; i < nritems; i++) {
7248                         struct extent_record tmpl;
7249
7250                         ptr = btrfs_node_blockptr(buf, i);
7251                         size = root->nodesize;
7252                         btrfs_node_key_to_cpu(buf, &key, i);
7253                         if (ri != NULL) {
7254                                 if ((level == ri->drop_level)
7255                                     && is_dropped_key(&key, &ri->drop_key)) {
7256                                         continue;
7257                                 }
7258                         }
7259
7260                         memset(&tmpl, 0, sizeof(tmpl));
7261                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7262                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7263                         tmpl.start = ptr;
7264                         tmpl.nr = size;
7265                         tmpl.refs = 1;
7266                         tmpl.metadata = 1;
7267                         tmpl.max_size = size;
7268                         ret = add_extent_rec(extent_cache, &tmpl);
7269                         if (ret < 0)
7270                                 goto out;
7271
7272                         ret = add_tree_backref(extent_cache, ptr, parent,
7273                                         owner, 1);
7274                         if (ret < 0) {
7275                                 error("add_tree_backref failed: %s",
7276                                       strerror(-ret));
7277                                 continue;
7278                         }
7279
7280                         if (level > 1) {
7281                                 add_pending(nodes, seen, ptr, size);
7282                         } else {
7283                                 add_pending(pending, seen, ptr, size);
7284                         }
7285                 }
7286                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7287                                       nritems) * sizeof(struct btrfs_key_ptr);
7288         }
7289         total_btree_bytes += buf->len;
7290         if (fs_root_objectid(btrfs_header_owner(buf)))
7291                 total_fs_tree_bytes += buf->len;
7292         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7293                 total_extent_tree_bytes += buf->len;
7294         if (!found_old_backref &&
7295             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7296             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7297             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7298                 found_old_backref = 1;
7299 out:
7300         free_extent_buffer(buf);
7301         return ret;
7302 }
7303
7304 static int add_root_to_pending(struct extent_buffer *buf,
7305                                struct cache_tree *extent_cache,
7306                                struct cache_tree *pending,
7307                                struct cache_tree *seen,
7308                                struct cache_tree *nodes,
7309                                u64 objectid)
7310 {
7311         struct extent_record tmpl;
7312         int ret;
7313
7314         if (btrfs_header_level(buf) > 0)
7315                 add_pending(nodes, seen, buf->start, buf->len);
7316         else
7317                 add_pending(pending, seen, buf->start, buf->len);
7318
7319         memset(&tmpl, 0, sizeof(tmpl));
7320         tmpl.start = buf->start;
7321         tmpl.nr = buf->len;
7322         tmpl.is_root = 1;
7323         tmpl.refs = 1;
7324         tmpl.metadata = 1;
7325         tmpl.max_size = buf->len;
7326         add_extent_rec(extent_cache, &tmpl);
7327
7328         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7329             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7330                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7331                                 0, 1);
7332         else
7333                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7334                                 1);
7335         return ret;
7336 }
7337
7338 /* as we fix the tree, we might be deleting blocks that
7339  * we're tracking for repair.  This hook makes sure we
7340  * remove any backrefs for blocks as we are fixing them.
7341  */
7342 static int free_extent_hook(struct btrfs_trans_handle *trans,
7343                             struct btrfs_root *root,
7344                             u64 bytenr, u64 num_bytes, u64 parent,
7345                             u64 root_objectid, u64 owner, u64 offset,
7346                             int refs_to_drop)
7347 {
7348         struct extent_record *rec;
7349         struct cache_extent *cache;
7350         int is_data;
7351         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7352
7353         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7354         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7355         if (!cache)
7356                 return 0;
7357
7358         rec = container_of(cache, struct extent_record, cache);
7359         if (is_data) {
7360                 struct data_backref *back;
7361                 back = find_data_backref(rec, parent, root_objectid, owner,
7362                                          offset, 1, bytenr, num_bytes);
7363                 if (!back)
7364                         goto out;
7365                 if (back->node.found_ref) {
7366                         back->found_ref -= refs_to_drop;
7367                         if (rec->refs)
7368                                 rec->refs -= refs_to_drop;
7369                 }
7370                 if (back->node.found_extent_tree) {
7371                         back->num_refs -= refs_to_drop;
7372                         if (rec->extent_item_refs)
7373                                 rec->extent_item_refs -= refs_to_drop;
7374                 }
7375                 if (back->found_ref == 0)
7376                         back->node.found_ref = 0;
7377                 if (back->num_refs == 0)
7378                         back->node.found_extent_tree = 0;
7379
7380                 if (!back->node.found_extent_tree && back->node.found_ref) {
7381                         list_del(&back->node.list);
7382                         free(back);
7383                 }
7384         } else {
7385                 struct tree_backref *back;
7386                 back = find_tree_backref(rec, parent, root_objectid);
7387                 if (!back)
7388                         goto out;
7389                 if (back->node.found_ref) {
7390                         if (rec->refs)
7391                                 rec->refs--;
7392                         back->node.found_ref = 0;
7393                 }
7394                 if (back->node.found_extent_tree) {
7395                         if (rec->extent_item_refs)
7396                                 rec->extent_item_refs--;
7397                         back->node.found_extent_tree = 0;
7398                 }
7399                 if (!back->node.found_extent_tree && back->node.found_ref) {
7400                         list_del(&back->node.list);
7401                         free(back);
7402                 }
7403         }
7404         maybe_free_extent_rec(extent_cache, rec);
7405 out:
7406         return 0;
7407 }
7408
7409 static int delete_extent_records(struct btrfs_trans_handle *trans,
7410                                  struct btrfs_root *root,
7411                                  struct btrfs_path *path,
7412                                  u64 bytenr, u64 new_len)
7413 {
7414         struct btrfs_key key;
7415         struct btrfs_key found_key;
7416         struct extent_buffer *leaf;
7417         int ret;
7418         int slot;
7419
7420
7421         key.objectid = bytenr;
7422         key.type = (u8)-1;
7423         key.offset = (u64)-1;
7424
7425         while(1) {
7426                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7427                                         &key, path, 0, 1);
7428                 if (ret < 0)
7429                         break;
7430
7431                 if (ret > 0) {
7432                         ret = 0;
7433                         if (path->slots[0] == 0)
7434                                 break;
7435                         path->slots[0]--;
7436                 }
7437                 ret = 0;
7438
7439                 leaf = path->nodes[0];
7440                 slot = path->slots[0];
7441
7442                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7443                 if (found_key.objectid != bytenr)
7444                         break;
7445
7446                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7447                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7448                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7449                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7450                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7451                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7452                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7453                         btrfs_release_path(path);
7454                         if (found_key.type == 0) {
7455                                 if (found_key.offset == 0)
7456                                         break;
7457                                 key.offset = found_key.offset - 1;
7458                                 key.type = found_key.type;
7459                         }
7460                         key.type = found_key.type - 1;
7461                         key.offset = (u64)-1;
7462                         continue;
7463                 }
7464
7465                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7466                         found_key.objectid, found_key.type, found_key.offset);
7467
7468                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7469                 if (ret)
7470                         break;
7471                 btrfs_release_path(path);
7472
7473                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7474                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7475                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7476                                 found_key.offset : root->nodesize;
7477
7478                         ret = btrfs_update_block_group(trans, root, bytenr,
7479                                                        bytes, 0, 0);
7480                         if (ret)
7481                                 break;
7482                 }
7483         }
7484
7485         btrfs_release_path(path);
7486         return ret;
7487 }
7488
7489 /*
7490  * for a single backref, this will allocate a new extent
7491  * and add the backref to it.
7492  */
7493 static int record_extent(struct btrfs_trans_handle *trans,
7494                          struct btrfs_fs_info *info,
7495                          struct btrfs_path *path,
7496                          struct extent_record *rec,
7497                          struct extent_backref *back,
7498                          int allocated, u64 flags)
7499 {
7500         int ret;
7501         struct btrfs_root *extent_root = info->extent_root;
7502         struct extent_buffer *leaf;
7503         struct btrfs_key ins_key;
7504         struct btrfs_extent_item *ei;
7505         struct data_backref *dback;
7506         struct btrfs_tree_block_info *bi;
7507
7508         if (!back->is_data)
7509                 rec->max_size = max_t(u64, rec->max_size,
7510                                     info->extent_root->nodesize);
7511
7512         if (!allocated) {
7513                 u32 item_size = sizeof(*ei);
7514
7515                 if (!back->is_data)
7516                         item_size += sizeof(*bi);
7517
7518                 ins_key.objectid = rec->start;
7519                 ins_key.offset = rec->max_size;
7520                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7521
7522                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7523                                         &ins_key, item_size);
7524                 if (ret)
7525                         goto fail;
7526
7527                 leaf = path->nodes[0];
7528                 ei = btrfs_item_ptr(leaf, path->slots[0],
7529                                     struct btrfs_extent_item);
7530
7531                 btrfs_set_extent_refs(leaf, ei, 0);
7532                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7533
7534                 if (back->is_data) {
7535                         btrfs_set_extent_flags(leaf, ei,
7536                                                BTRFS_EXTENT_FLAG_DATA);
7537                 } else {
7538                         struct btrfs_disk_key copy_key;;
7539
7540                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7541                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7542                                              sizeof(*bi));
7543
7544                         btrfs_set_disk_key_objectid(&copy_key,
7545                                                     rec->info_objectid);
7546                         btrfs_set_disk_key_type(&copy_key, 0);
7547                         btrfs_set_disk_key_offset(&copy_key, 0);
7548
7549                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7550                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7551
7552                         btrfs_set_extent_flags(leaf, ei,
7553                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7554                 }
7555
7556                 btrfs_mark_buffer_dirty(leaf);
7557                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7558                                                rec->max_size, 1, 0);
7559                 if (ret)
7560                         goto fail;
7561                 btrfs_release_path(path);
7562         }
7563
7564         if (back->is_data) {
7565                 u64 parent;
7566                 int i;
7567
7568                 dback = to_data_backref(back);
7569                 if (back->full_backref)
7570                         parent = dback->parent;
7571                 else
7572                         parent = 0;
7573
7574                 for (i = 0; i < dback->found_ref; i++) {
7575                         /* if parent != 0, we're doing a full backref
7576                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7577                          * just makes the backref allocator create a data
7578                          * backref
7579                          */
7580                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7581                                                    rec->start, rec->max_size,
7582                                                    parent,
7583                                                    dback->root,
7584                                                    parent ?
7585                                                    BTRFS_FIRST_FREE_OBJECTID :
7586                                                    dback->owner,
7587                                                    dback->offset);
7588                         if (ret)
7589                                 break;
7590                 }
7591                 fprintf(stderr, "adding new data backref"
7592                                 " on %llu %s %llu owner %llu"
7593                                 " offset %llu found %d\n",
7594                                 (unsigned long long)rec->start,
7595                                 back->full_backref ?
7596                                 "parent" : "root",
7597                                 back->full_backref ?
7598                                 (unsigned long long)parent :
7599                                 (unsigned long long)dback->root,
7600                                 (unsigned long long)dback->owner,
7601                                 (unsigned long long)dback->offset,
7602                                 dback->found_ref);
7603         } else {
7604                 u64 parent;
7605                 struct tree_backref *tback;
7606
7607                 tback = to_tree_backref(back);
7608                 if (back->full_backref)
7609                         parent = tback->parent;
7610                 else
7611                         parent = 0;
7612
7613                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7614                                            rec->start, rec->max_size,
7615                                            parent, tback->root, 0, 0);
7616                 fprintf(stderr, "adding new tree backref on "
7617                         "start %llu len %llu parent %llu root %llu\n",
7618                         rec->start, rec->max_size, parent, tback->root);
7619         }
7620 fail:
7621         btrfs_release_path(path);
7622         return ret;
7623 }
7624
7625 static struct extent_entry *find_entry(struct list_head *entries,
7626                                        u64 bytenr, u64 bytes)
7627 {
7628         struct extent_entry *entry = NULL;
7629
7630         list_for_each_entry(entry, entries, list) {
7631                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7632                         return entry;
7633         }
7634
7635         return NULL;
7636 }
7637
7638 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7639 {
7640         struct extent_entry *entry, *best = NULL, *prev = NULL;
7641
7642         list_for_each_entry(entry, entries, list) {
7643                 /*
7644                  * If there are as many broken entries as entries then we know
7645                  * not to trust this particular entry.
7646                  */
7647                 if (entry->broken == entry->count)
7648                         continue;
7649
7650                 /*
7651                  * Special case, when there are only two entries and 'best' is
7652                  * the first one
7653                  */
7654                 if (!prev) {
7655                         best = entry;
7656                         prev = entry;
7657                         continue;
7658                 }
7659
7660                 /*
7661                  * If our current entry == best then we can't be sure our best
7662                  * is really the best, so we need to keep searching.
7663                  */
7664                 if (best && best->count == entry->count) {
7665                         prev = entry;
7666                         best = NULL;
7667                         continue;
7668                 }
7669
7670                 /* Prev == entry, not good enough, have to keep searching */
7671                 if (!prev->broken && prev->count == entry->count)
7672                         continue;
7673
7674                 if (!best)
7675                         best = (prev->count > entry->count) ? prev : entry;
7676                 else if (best->count < entry->count)
7677                         best = entry;
7678                 prev = entry;
7679         }
7680
7681         return best;
7682 }
7683
7684 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7685                       struct data_backref *dback, struct extent_entry *entry)
7686 {
7687         struct btrfs_trans_handle *trans;
7688         struct btrfs_root *root;
7689         struct btrfs_file_extent_item *fi;
7690         struct extent_buffer *leaf;
7691         struct btrfs_key key;
7692         u64 bytenr, bytes;
7693         int ret, err;
7694
7695         key.objectid = dback->root;
7696         key.type = BTRFS_ROOT_ITEM_KEY;
7697         key.offset = (u64)-1;
7698         root = btrfs_read_fs_root(info, &key);
7699         if (IS_ERR(root)) {
7700                 fprintf(stderr, "Couldn't find root for our ref\n");
7701                 return -EINVAL;
7702         }
7703
7704         /*
7705          * The backref points to the original offset of the extent if it was
7706          * split, so we need to search down to the offset we have and then walk
7707          * forward until we find the backref we're looking for.
7708          */
7709         key.objectid = dback->owner;
7710         key.type = BTRFS_EXTENT_DATA_KEY;
7711         key.offset = dback->offset;
7712         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7713         if (ret < 0) {
7714                 fprintf(stderr, "Error looking up ref %d\n", ret);
7715                 return ret;
7716         }
7717
7718         while (1) {
7719                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7720                         ret = btrfs_next_leaf(root, path);
7721                         if (ret) {
7722                                 fprintf(stderr, "Couldn't find our ref, next\n");
7723                                 return -EINVAL;
7724                         }
7725                 }
7726                 leaf = path->nodes[0];
7727                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7728                 if (key.objectid != dback->owner ||
7729                     key.type != BTRFS_EXTENT_DATA_KEY) {
7730                         fprintf(stderr, "Couldn't find our ref, search\n");
7731                         return -EINVAL;
7732                 }
7733                 fi = btrfs_item_ptr(leaf, path->slots[0],
7734                                     struct btrfs_file_extent_item);
7735                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7736                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7737
7738                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7739                         break;
7740                 path->slots[0]++;
7741         }
7742
7743         btrfs_release_path(path);
7744
7745         trans = btrfs_start_transaction(root, 1);
7746         if (IS_ERR(trans))
7747                 return PTR_ERR(trans);
7748
7749         /*
7750          * Ok we have the key of the file extent we want to fix, now we can cow
7751          * down to the thing and fix it.
7752          */
7753         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7754         if (ret < 0) {
7755                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7756                         key.objectid, key.type, key.offset, ret);
7757                 goto out;
7758         }
7759         if (ret > 0) {
7760                 fprintf(stderr, "Well that's odd, we just found this key "
7761                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7762                         key.offset);
7763                 ret = -EINVAL;
7764                 goto out;
7765         }
7766         leaf = path->nodes[0];
7767         fi = btrfs_item_ptr(leaf, path->slots[0],
7768                             struct btrfs_file_extent_item);
7769
7770         if (btrfs_file_extent_compression(leaf, fi) &&
7771             dback->disk_bytenr != entry->bytenr) {
7772                 fprintf(stderr, "Ref doesn't match the record start and is "
7773                         "compressed, please take a btrfs-image of this file "
7774                         "system and send it to a btrfs developer so they can "
7775                         "complete this functionality for bytenr %Lu\n",
7776                         dback->disk_bytenr);
7777                 ret = -EINVAL;
7778                 goto out;
7779         }
7780
7781         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7782                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7783         } else if (dback->disk_bytenr > entry->bytenr) {
7784                 u64 off_diff, offset;
7785
7786                 off_diff = dback->disk_bytenr - entry->bytenr;
7787                 offset = btrfs_file_extent_offset(leaf, fi);
7788                 if (dback->disk_bytenr + offset +
7789                     btrfs_file_extent_num_bytes(leaf, fi) >
7790                     entry->bytenr + entry->bytes) {
7791                         fprintf(stderr, "Ref is past the entry end, please "
7792                                 "take a btrfs-image of this file system and "
7793                                 "send it to a btrfs developer, ref %Lu\n",
7794                                 dback->disk_bytenr);
7795                         ret = -EINVAL;
7796                         goto out;
7797                 }
7798                 offset += off_diff;
7799                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7800                 btrfs_set_file_extent_offset(leaf, fi, offset);
7801         } else if (dback->disk_bytenr < entry->bytenr) {
7802                 u64 offset;
7803
7804                 offset = btrfs_file_extent_offset(leaf, fi);
7805                 if (dback->disk_bytenr + offset < entry->bytenr) {
7806                         fprintf(stderr, "Ref is before the entry start, please"
7807                                 " take a btrfs-image of this file system and "
7808                                 "send it to a btrfs developer, ref %Lu\n",
7809                                 dback->disk_bytenr);
7810                         ret = -EINVAL;
7811                         goto out;
7812                 }
7813
7814                 offset += dback->disk_bytenr;
7815                 offset -= entry->bytenr;
7816                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7817                 btrfs_set_file_extent_offset(leaf, fi, offset);
7818         }
7819
7820         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7821
7822         /*
7823          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7824          * only do this if we aren't using compression, otherwise it's a
7825          * trickier case.
7826          */
7827         if (!btrfs_file_extent_compression(leaf, fi))
7828                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7829         else
7830                 printf("ram bytes may be wrong?\n");
7831         btrfs_mark_buffer_dirty(leaf);
7832 out:
7833         err = btrfs_commit_transaction(trans, root);
7834         btrfs_release_path(path);
7835         return ret ? ret : err;
7836 }
7837
7838 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7839                            struct extent_record *rec)
7840 {
7841         struct extent_backref *back;
7842         struct data_backref *dback;
7843         struct extent_entry *entry, *best = NULL;
7844         LIST_HEAD(entries);
7845         int nr_entries = 0;
7846         int broken_entries = 0;
7847         int ret = 0;
7848         short mismatch = 0;
7849
7850         /*
7851          * Metadata is easy and the backrefs should always agree on bytenr and
7852          * size, if not we've got bigger issues.
7853          */
7854         if (rec->metadata)
7855                 return 0;
7856
7857         list_for_each_entry(back, &rec->backrefs, list) {
7858                 if (back->full_backref || !back->is_data)
7859                         continue;
7860
7861                 dback = to_data_backref(back);
7862
7863                 /*
7864                  * We only pay attention to backrefs that we found a real
7865                  * backref for.
7866                  */
7867                 if (dback->found_ref == 0)
7868                         continue;
7869
7870                 /*
7871                  * For now we only catch when the bytes don't match, not the
7872                  * bytenr.  We can easily do this at the same time, but I want
7873                  * to have a fs image to test on before we just add repair
7874                  * functionality willy-nilly so we know we won't screw up the
7875                  * repair.
7876                  */
7877
7878                 entry = find_entry(&entries, dback->disk_bytenr,
7879                                    dback->bytes);
7880                 if (!entry) {
7881                         entry = malloc(sizeof(struct extent_entry));
7882                         if (!entry) {
7883                                 ret = -ENOMEM;
7884                                 goto out;
7885                         }
7886                         memset(entry, 0, sizeof(*entry));
7887                         entry->bytenr = dback->disk_bytenr;
7888                         entry->bytes = dback->bytes;
7889                         list_add_tail(&entry->list, &entries);
7890                         nr_entries++;
7891                 }
7892
7893                 /*
7894                  * If we only have on entry we may think the entries agree when
7895                  * in reality they don't so we have to do some extra checking.
7896                  */
7897                 if (dback->disk_bytenr != rec->start ||
7898                     dback->bytes != rec->nr || back->broken)
7899                         mismatch = 1;
7900
7901                 if (back->broken) {
7902                         entry->broken++;
7903                         broken_entries++;
7904                 }
7905
7906                 entry->count++;
7907         }
7908
7909         /* Yay all the backrefs agree, carry on good sir */
7910         if (nr_entries <= 1 && !mismatch)
7911                 goto out;
7912
7913         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7914                 "%Lu\n", rec->start);
7915
7916         /*
7917          * First we want to see if the backrefs can agree amongst themselves who
7918          * is right, so figure out which one of the entries has the highest
7919          * count.
7920          */
7921         best = find_most_right_entry(&entries);
7922
7923         /*
7924          * Ok so we may have an even split between what the backrefs think, so
7925          * this is where we use the extent ref to see what it thinks.
7926          */
7927         if (!best) {
7928                 entry = find_entry(&entries, rec->start, rec->nr);
7929                 if (!entry && (!broken_entries || !rec->found_rec)) {
7930                         fprintf(stderr, "Backrefs don't agree with each other "
7931                                 "and extent record doesn't agree with anybody,"
7932                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7933                                 rec->start, rec->nr);
7934                         ret = -EINVAL;
7935                         goto out;
7936                 } else if (!entry) {
7937                         /*
7938                          * Ok our backrefs were broken, we'll assume this is the
7939                          * correct value and add an entry for this range.
7940                          */
7941                         entry = malloc(sizeof(struct extent_entry));
7942                         if (!entry) {
7943                                 ret = -ENOMEM;
7944                                 goto out;
7945                         }
7946                         memset(entry, 0, sizeof(*entry));
7947                         entry->bytenr = rec->start;
7948                         entry->bytes = rec->nr;
7949                         list_add_tail(&entry->list, &entries);
7950                         nr_entries++;
7951                 }
7952                 entry->count++;
7953                 best = find_most_right_entry(&entries);
7954                 if (!best) {
7955                         fprintf(stderr, "Backrefs and extent record evenly "
7956                                 "split on who is right, this is going to "
7957                                 "require user input to fix bytenr %Lu bytes "
7958                                 "%Lu\n", rec->start, rec->nr);
7959                         ret = -EINVAL;
7960                         goto out;
7961                 }
7962         }
7963
7964         /*
7965          * I don't think this can happen currently as we'll abort() if we catch
7966          * this case higher up, but in case somebody removes that we still can't
7967          * deal with it properly here yet, so just bail out of that's the case.
7968          */
7969         if (best->bytenr != rec->start) {
7970                 fprintf(stderr, "Extent start and backref starts don't match, "
7971                         "please use btrfs-image on this file system and send "
7972                         "it to a btrfs developer so they can make fsck fix "
7973                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7974                         rec->start, rec->nr);
7975                 ret = -EINVAL;
7976                 goto out;
7977         }
7978
7979         /*
7980          * Ok great we all agreed on an extent record, let's go find the real
7981          * references and fix up the ones that don't match.
7982          */
7983         list_for_each_entry(back, &rec->backrefs, list) {
7984                 if (back->full_backref || !back->is_data)
7985                         continue;
7986
7987                 dback = to_data_backref(back);
7988
7989                 /*
7990                  * Still ignoring backrefs that don't have a real ref attached
7991                  * to them.
7992                  */
7993                 if (dback->found_ref == 0)
7994                         continue;
7995
7996                 if (dback->bytes == best->bytes &&
7997                     dback->disk_bytenr == best->bytenr)
7998                         continue;
7999
8000                 ret = repair_ref(info, path, dback, best);
8001                 if (ret)
8002                         goto out;
8003         }
8004
8005         /*
8006          * Ok we messed with the actual refs, which means we need to drop our
8007          * entire cache and go back and rescan.  I know this is a huge pain and
8008          * adds a lot of extra work, but it's the only way to be safe.  Once all
8009          * the backrefs agree we may not need to do anything to the extent
8010          * record itself.
8011          */
8012         ret = -EAGAIN;
8013 out:
8014         while (!list_empty(&entries)) {
8015                 entry = list_entry(entries.next, struct extent_entry, list);
8016                 list_del_init(&entry->list);
8017                 free(entry);
8018         }
8019         return ret;
8020 }
8021
8022 static int process_duplicates(struct btrfs_root *root,
8023                               struct cache_tree *extent_cache,
8024                               struct extent_record *rec)
8025 {
8026         struct extent_record *good, *tmp;
8027         struct cache_extent *cache;
8028         int ret;
8029
8030         /*
8031          * If we found a extent record for this extent then return, or if we
8032          * have more than one duplicate we are likely going to need to delete
8033          * something.
8034          */
8035         if (rec->found_rec || rec->num_duplicates > 1)
8036                 return 0;
8037
8038         /* Shouldn't happen but just in case */
8039         BUG_ON(!rec->num_duplicates);
8040
8041         /*
8042          * So this happens if we end up with a backref that doesn't match the
8043          * actual extent entry.  So either the backref is bad or the extent
8044          * entry is bad.  Either way we want to have the extent_record actually
8045          * reflect what we found in the extent_tree, so we need to take the
8046          * duplicate out and use that as the extent_record since the only way we
8047          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8048          */
8049         remove_cache_extent(extent_cache, &rec->cache);
8050
8051         good = to_extent_record(rec->dups.next);
8052         list_del_init(&good->list);
8053         INIT_LIST_HEAD(&good->backrefs);
8054         INIT_LIST_HEAD(&good->dups);
8055         good->cache.start = good->start;
8056         good->cache.size = good->nr;
8057         good->content_checked = 0;
8058         good->owner_ref_checked = 0;
8059         good->num_duplicates = 0;
8060         good->refs = rec->refs;
8061         list_splice_init(&rec->backrefs, &good->backrefs);
8062         while (1) {
8063                 cache = lookup_cache_extent(extent_cache, good->start,
8064                                             good->nr);
8065                 if (!cache)
8066                         break;
8067                 tmp = container_of(cache, struct extent_record, cache);
8068
8069                 /*
8070                  * If we find another overlapping extent and it's found_rec is
8071                  * set then it's a duplicate and we need to try and delete
8072                  * something.
8073                  */
8074                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8075                         if (list_empty(&good->list))
8076                                 list_add_tail(&good->list,
8077                                               &duplicate_extents);
8078                         good->num_duplicates += tmp->num_duplicates + 1;
8079                         list_splice_init(&tmp->dups, &good->dups);
8080                         list_del_init(&tmp->list);
8081                         list_add_tail(&tmp->list, &good->dups);
8082                         remove_cache_extent(extent_cache, &tmp->cache);
8083                         continue;
8084                 }
8085
8086                 /*
8087                  * Ok we have another non extent item backed extent rec, so lets
8088                  * just add it to this extent and carry on like we did above.
8089                  */
8090                 good->refs += tmp->refs;
8091                 list_splice_init(&tmp->backrefs, &good->backrefs);
8092                 remove_cache_extent(extent_cache, &tmp->cache);
8093                 free(tmp);
8094         }
8095         ret = insert_cache_extent(extent_cache, &good->cache);
8096         BUG_ON(ret);
8097         free(rec);
8098         return good->num_duplicates ? 0 : 1;
8099 }
8100
8101 static int delete_duplicate_records(struct btrfs_root *root,
8102                                     struct extent_record *rec)
8103 {
8104         struct btrfs_trans_handle *trans;
8105         LIST_HEAD(delete_list);
8106         struct btrfs_path path;
8107         struct extent_record *tmp, *good, *n;
8108         int nr_del = 0;
8109         int ret = 0, err;
8110         struct btrfs_key key;
8111
8112         btrfs_init_path(&path);
8113
8114         good = rec;
8115         /* Find the record that covers all of the duplicates. */
8116         list_for_each_entry(tmp, &rec->dups, list) {
8117                 if (good->start < tmp->start)
8118                         continue;
8119                 if (good->nr > tmp->nr)
8120                         continue;
8121
8122                 if (tmp->start + tmp->nr < good->start + good->nr) {
8123                         fprintf(stderr, "Ok we have overlapping extents that "
8124                                 "aren't completely covered by each other, this "
8125                                 "is going to require more careful thought.  "
8126                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8127                                 tmp->start, tmp->nr, good->start, good->nr);
8128                         abort();
8129                 }
8130                 good = tmp;
8131         }
8132
8133         if (good != rec)
8134                 list_add_tail(&rec->list, &delete_list);
8135
8136         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8137                 if (tmp == good)
8138                         continue;
8139                 list_move_tail(&tmp->list, &delete_list);
8140         }
8141
8142         root = root->fs_info->extent_root;
8143         trans = btrfs_start_transaction(root, 1);
8144         if (IS_ERR(trans)) {
8145                 ret = PTR_ERR(trans);
8146                 goto out;
8147         }
8148
8149         list_for_each_entry(tmp, &delete_list, list) {
8150                 if (tmp->found_rec == 0)
8151                         continue;
8152                 key.objectid = tmp->start;
8153                 key.type = BTRFS_EXTENT_ITEM_KEY;
8154                 key.offset = tmp->nr;
8155
8156                 /* Shouldn't happen but just in case */
8157                 if (tmp->metadata) {
8158                         fprintf(stderr, "Well this shouldn't happen, extent "
8159                                 "record overlaps but is metadata? "
8160                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8161                         abort();
8162                 }
8163
8164                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8165                 if (ret) {
8166                         if (ret > 0)
8167                                 ret = -EINVAL;
8168                         break;
8169                 }
8170                 ret = btrfs_del_item(trans, root, &path);
8171                 if (ret)
8172                         break;
8173                 btrfs_release_path(&path);
8174                 nr_del++;
8175         }
8176         err = btrfs_commit_transaction(trans, root);
8177         if (err && !ret)
8178                 ret = err;
8179 out:
8180         while (!list_empty(&delete_list)) {
8181                 tmp = to_extent_record(delete_list.next);
8182                 list_del_init(&tmp->list);
8183                 if (tmp == rec)
8184                         continue;
8185                 free(tmp);
8186         }
8187
8188         while (!list_empty(&rec->dups)) {
8189                 tmp = to_extent_record(rec->dups.next);
8190                 list_del_init(&tmp->list);
8191                 free(tmp);
8192         }
8193
8194         btrfs_release_path(&path);
8195
8196         if (!ret && !nr_del)
8197                 rec->num_duplicates = 0;
8198
8199         return ret ? ret : nr_del;
8200 }
8201
8202 static int find_possible_backrefs(struct btrfs_fs_info *info,
8203                                   struct btrfs_path *path,
8204                                   struct cache_tree *extent_cache,
8205                                   struct extent_record *rec)
8206 {
8207         struct btrfs_root *root;
8208         struct extent_backref *back;
8209         struct data_backref *dback;
8210         struct cache_extent *cache;
8211         struct btrfs_file_extent_item *fi;
8212         struct btrfs_key key;
8213         u64 bytenr, bytes;
8214         int ret;
8215
8216         list_for_each_entry(back, &rec->backrefs, list) {
8217                 /* Don't care about full backrefs (poor unloved backrefs) */
8218                 if (back->full_backref || !back->is_data)
8219                         continue;
8220
8221                 dback = to_data_backref(back);
8222
8223                 /* We found this one, we don't need to do a lookup */
8224                 if (dback->found_ref)
8225                         continue;
8226
8227                 key.objectid = dback->root;
8228                 key.type = BTRFS_ROOT_ITEM_KEY;
8229                 key.offset = (u64)-1;
8230
8231                 root = btrfs_read_fs_root(info, &key);
8232
8233                 /* No root, definitely a bad ref, skip */
8234                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8235                         continue;
8236                 /* Other err, exit */
8237                 if (IS_ERR(root))
8238                         return PTR_ERR(root);
8239
8240                 key.objectid = dback->owner;
8241                 key.type = BTRFS_EXTENT_DATA_KEY;
8242                 key.offset = dback->offset;
8243                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8244                 if (ret) {
8245                         btrfs_release_path(path);
8246                         if (ret < 0)
8247                                 return ret;
8248                         /* Didn't find it, we can carry on */
8249                         ret = 0;
8250                         continue;
8251                 }
8252
8253                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8254                                     struct btrfs_file_extent_item);
8255                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8256                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8257                 btrfs_release_path(path);
8258                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8259                 if (cache) {
8260                         struct extent_record *tmp;
8261                         tmp = container_of(cache, struct extent_record, cache);
8262
8263                         /*
8264                          * If we found an extent record for the bytenr for this
8265                          * particular backref then we can't add it to our
8266                          * current extent record.  We only want to add backrefs
8267                          * that don't have a corresponding extent item in the
8268                          * extent tree since they likely belong to this record
8269                          * and we need to fix it if it doesn't match bytenrs.
8270                          */
8271                         if  (tmp->found_rec)
8272                                 continue;
8273                 }
8274
8275                 dback->found_ref += 1;
8276                 dback->disk_bytenr = bytenr;
8277                 dback->bytes = bytes;
8278
8279                 /*
8280                  * Set this so the verify backref code knows not to trust the
8281                  * values in this backref.
8282                  */
8283                 back->broken = 1;
8284         }
8285
8286         return 0;
8287 }
8288
8289 /*
8290  * Record orphan data ref into corresponding root.
8291  *
8292  * Return 0 if the extent item contains data ref and recorded.
8293  * Return 1 if the extent item contains no useful data ref
8294  *   On that case, it may contains only shared_dataref or metadata backref
8295  *   or the file extent exists(this should be handled by the extent bytenr
8296  *   recovery routine)
8297  * Return <0 if something goes wrong.
8298  */
8299 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8300                                       struct extent_record *rec)
8301 {
8302         struct btrfs_key key;
8303         struct btrfs_root *dest_root;
8304         struct extent_backref *back;
8305         struct data_backref *dback;
8306         struct orphan_data_extent *orphan;
8307         struct btrfs_path path;
8308         int recorded_data_ref = 0;
8309         int ret = 0;
8310
8311         if (rec->metadata)
8312                 return 1;
8313         btrfs_init_path(&path);
8314         list_for_each_entry(back, &rec->backrefs, list) {
8315                 if (back->full_backref || !back->is_data ||
8316                     !back->found_extent_tree)
8317                         continue;
8318                 dback = to_data_backref(back);
8319                 if (dback->found_ref)
8320                         continue;
8321                 key.objectid = dback->root;
8322                 key.type = BTRFS_ROOT_ITEM_KEY;
8323                 key.offset = (u64)-1;
8324
8325                 dest_root = btrfs_read_fs_root(fs_info, &key);
8326
8327                 /* For non-exist root we just skip it */
8328                 if (IS_ERR(dest_root) || !dest_root)
8329                         continue;
8330
8331                 key.objectid = dback->owner;
8332                 key.type = BTRFS_EXTENT_DATA_KEY;
8333                 key.offset = dback->offset;
8334
8335                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8336                 btrfs_release_path(&path);
8337                 /*
8338                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8339                  * we need to record it for inode/file extent rebuild.
8340                  * For ret > 0, we record it only for file extent rebuild.
8341                  * For ret == 0, the file extent exists but only bytenr
8342                  * mismatch, let the original bytenr fix routine to handle,
8343                  * don't record it.
8344                  */
8345                 if (ret == 0)
8346                         continue;
8347                 ret = 0;
8348                 orphan = malloc(sizeof(*orphan));
8349                 if (!orphan) {
8350                         ret = -ENOMEM;
8351                         goto out;
8352                 }
8353                 INIT_LIST_HEAD(&orphan->list);
8354                 orphan->root = dback->root;
8355                 orphan->objectid = dback->owner;
8356                 orphan->offset = dback->offset;
8357                 orphan->disk_bytenr = rec->cache.start;
8358                 orphan->disk_len = rec->cache.size;
8359                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8360                 recorded_data_ref = 1;
8361         }
8362 out:
8363         btrfs_release_path(&path);
8364         if (!ret)
8365                 return !recorded_data_ref;
8366         else
8367                 return ret;
8368 }
8369
8370 /*
8371  * when an incorrect extent item is found, this will delete
8372  * all of the existing entries for it and recreate them
8373  * based on what the tree scan found.
8374  */
8375 static int fixup_extent_refs(struct btrfs_fs_info *info,
8376                              struct cache_tree *extent_cache,
8377                              struct extent_record *rec)
8378 {
8379         struct btrfs_trans_handle *trans = NULL;
8380         int ret;
8381         struct btrfs_path path;
8382         struct list_head *cur = rec->backrefs.next;
8383         struct cache_extent *cache;
8384         struct extent_backref *back;
8385         int allocated = 0;
8386         u64 flags = 0;
8387
8388         if (rec->flag_block_full_backref)
8389                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8390
8391         btrfs_init_path(&path);
8392         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8393                 /*
8394                  * Sometimes the backrefs themselves are so broken they don't
8395                  * get attached to any meaningful rec, so first go back and
8396                  * check any of our backrefs that we couldn't find and throw
8397                  * them into the list if we find the backref so that
8398                  * verify_backrefs can figure out what to do.
8399                  */
8400                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8401                 if (ret < 0)
8402                         goto out;
8403         }
8404
8405         /* step one, make sure all of the backrefs agree */
8406         ret = verify_backrefs(info, &path, rec);
8407         if (ret < 0)
8408                 goto out;
8409
8410         trans = btrfs_start_transaction(info->extent_root, 1);
8411         if (IS_ERR(trans)) {
8412                 ret = PTR_ERR(trans);
8413                 goto out;
8414         }
8415
8416         /* step two, delete all the existing records */
8417         ret = delete_extent_records(trans, info->extent_root, &path,
8418                                     rec->start, rec->max_size);
8419
8420         if (ret < 0)
8421                 goto out;
8422
8423         /* was this block corrupt?  If so, don't add references to it */
8424         cache = lookup_cache_extent(info->corrupt_blocks,
8425                                     rec->start, rec->max_size);
8426         if (cache) {
8427                 ret = 0;
8428                 goto out;
8429         }
8430
8431         /* step three, recreate all the refs we did find */
8432         while(cur != &rec->backrefs) {
8433                 back = to_extent_backref(cur);
8434                 cur = cur->next;
8435
8436                 /*
8437                  * if we didn't find any references, don't create a
8438                  * new extent record
8439                  */
8440                 if (!back->found_ref)
8441                         continue;
8442
8443                 rec->bad_full_backref = 0;
8444                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8445                 allocated = 1;
8446
8447                 if (ret)
8448                         goto out;
8449         }
8450 out:
8451         if (trans) {
8452                 int err = btrfs_commit_transaction(trans, info->extent_root);
8453                 if (!ret)
8454                         ret = err;
8455         }
8456
8457         btrfs_release_path(&path);
8458         return ret;
8459 }
8460
8461 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8462                               struct extent_record *rec)
8463 {
8464         struct btrfs_trans_handle *trans;
8465         struct btrfs_root *root = fs_info->extent_root;
8466         struct btrfs_path path;
8467         struct btrfs_extent_item *ei;
8468         struct btrfs_key key;
8469         u64 flags;
8470         int ret = 0;
8471
8472         key.objectid = rec->start;
8473         if (rec->metadata) {
8474                 key.type = BTRFS_METADATA_ITEM_KEY;
8475                 key.offset = rec->info_level;
8476         } else {
8477                 key.type = BTRFS_EXTENT_ITEM_KEY;
8478                 key.offset = rec->max_size;
8479         }
8480
8481         trans = btrfs_start_transaction(root, 0);
8482         if (IS_ERR(trans))
8483                 return PTR_ERR(trans);
8484
8485         btrfs_init_path(&path);
8486         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8487         if (ret < 0) {
8488                 btrfs_release_path(&path);
8489                 btrfs_commit_transaction(trans, root);
8490                 return ret;
8491         } else if (ret) {
8492                 fprintf(stderr, "Didn't find extent for %llu\n",
8493                         (unsigned long long)rec->start);
8494                 btrfs_release_path(&path);
8495                 btrfs_commit_transaction(trans, root);
8496                 return -ENOENT;
8497         }
8498
8499         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8500                             struct btrfs_extent_item);
8501         flags = btrfs_extent_flags(path.nodes[0], ei);
8502         if (rec->flag_block_full_backref) {
8503                 fprintf(stderr, "setting full backref on %llu\n",
8504                         (unsigned long long)key.objectid);
8505                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8506         } else {
8507                 fprintf(stderr, "clearing full backref on %llu\n",
8508                         (unsigned long long)key.objectid);
8509                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8510         }
8511         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8512         btrfs_mark_buffer_dirty(path.nodes[0]);
8513         btrfs_release_path(&path);
8514         return btrfs_commit_transaction(trans, root);
8515 }
8516
8517 /* right now we only prune from the extent allocation tree */
8518 static int prune_one_block(struct btrfs_trans_handle *trans,
8519                            struct btrfs_fs_info *info,
8520                            struct btrfs_corrupt_block *corrupt)
8521 {
8522         int ret;
8523         struct btrfs_path path;
8524         struct extent_buffer *eb;
8525         u64 found;
8526         int slot;
8527         int nritems;
8528         int level = corrupt->level + 1;
8529
8530         btrfs_init_path(&path);
8531 again:
8532         /* we want to stop at the parent to our busted block */
8533         path.lowest_level = level;
8534
8535         ret = btrfs_search_slot(trans, info->extent_root,
8536                                 &corrupt->key, &path, -1, 1);
8537
8538         if (ret < 0)
8539                 goto out;
8540
8541         eb = path.nodes[level];
8542         if (!eb) {
8543                 ret = -ENOENT;
8544                 goto out;
8545         }
8546
8547         /*
8548          * hopefully the search gave us the block we want to prune,
8549          * lets try that first
8550          */
8551         slot = path.slots[level];
8552         found =  btrfs_node_blockptr(eb, slot);
8553         if (found == corrupt->cache.start)
8554                 goto del_ptr;
8555
8556         nritems = btrfs_header_nritems(eb);
8557
8558         /* the search failed, lets scan this node and hope we find it */
8559         for (slot = 0; slot < nritems; slot++) {
8560                 found =  btrfs_node_blockptr(eb, slot);
8561                 if (found == corrupt->cache.start)
8562                         goto del_ptr;
8563         }
8564         /*
8565          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8566          * to this block
8567          */
8568         if (eb == info->extent_root->node) {
8569                 ret = -ENOENT;
8570                 goto out;
8571         } else {
8572                 level++;
8573                 btrfs_release_path(&path);
8574                 goto again;
8575         }
8576
8577 del_ptr:
8578         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8579         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8580
8581 out:
8582         btrfs_release_path(&path);
8583         return ret;
8584 }
8585
8586 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8587 {
8588         struct btrfs_trans_handle *trans = NULL;
8589         struct cache_extent *cache;
8590         struct btrfs_corrupt_block *corrupt;
8591
8592         while (1) {
8593                 cache = search_cache_extent(info->corrupt_blocks, 0);
8594                 if (!cache)
8595                         break;
8596                 if (!trans) {
8597                         trans = btrfs_start_transaction(info->extent_root, 1);
8598                         if (IS_ERR(trans))
8599                                 return PTR_ERR(trans);
8600                 }
8601                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8602                 prune_one_block(trans, info, corrupt);
8603                 remove_cache_extent(info->corrupt_blocks, cache);
8604         }
8605         if (trans)
8606                 return btrfs_commit_transaction(trans, info->extent_root);
8607         return 0;
8608 }
8609
8610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8611 {
8612         struct btrfs_block_group_cache *cache;
8613         u64 start, end;
8614         int ret;
8615
8616         while (1) {
8617                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8618                                             &start, &end, EXTENT_DIRTY);
8619                 if (ret)
8620                         break;
8621                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8622                                    GFP_NOFS);
8623         }
8624
8625         start = 0;
8626         while (1) {
8627                 cache = btrfs_lookup_first_block_group(fs_info, start);
8628                 if (!cache)
8629                         break;
8630                 if (cache->cached)
8631                         cache->cached = 0;
8632                 start = cache->key.objectid + cache->key.offset;
8633         }
8634 }
8635
8636 static int check_extent_refs(struct btrfs_root *root,
8637                              struct cache_tree *extent_cache)
8638 {
8639         struct extent_record *rec;
8640         struct cache_extent *cache;
8641         int err = 0;
8642         int ret = 0;
8643         int fixed = 0;
8644         int had_dups = 0;
8645         int recorded = 0;
8646
8647         if (repair) {
8648                 /*
8649                  * if we're doing a repair, we have to make sure
8650                  * we don't allocate from the problem extents.
8651                  * In the worst case, this will be all the
8652                  * extents in the FS
8653                  */
8654                 cache = search_cache_extent(extent_cache, 0);
8655                 while(cache) {
8656                         rec = container_of(cache, struct extent_record, cache);
8657                         set_extent_dirty(root->fs_info->excluded_extents,
8658                                          rec->start,
8659                                          rec->start + rec->max_size - 1,
8660                                          GFP_NOFS);
8661                         cache = next_cache_extent(cache);
8662                 }
8663
8664                 /* pin down all the corrupted blocks too */
8665                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8666                 while(cache) {
8667                         set_extent_dirty(root->fs_info->excluded_extents,
8668                                          cache->start,
8669                                          cache->start + cache->size - 1,
8670                                          GFP_NOFS);
8671                         cache = next_cache_extent(cache);
8672                 }
8673                 prune_corrupt_blocks(root->fs_info);
8674                 reset_cached_block_groups(root->fs_info);
8675         }
8676
8677         reset_cached_block_groups(root->fs_info);
8678
8679         /*
8680          * We need to delete any duplicate entries we find first otherwise we
8681          * could mess up the extent tree when we have backrefs that actually
8682          * belong to a different extent item and not the weird duplicate one.
8683          */
8684         while (repair && !list_empty(&duplicate_extents)) {
8685                 rec = to_extent_record(duplicate_extents.next);
8686                 list_del_init(&rec->list);
8687
8688                 /* Sometimes we can find a backref before we find an actual
8689                  * extent, so we need to process it a little bit to see if there
8690                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8691                  * if this is a backref screwup.  If we need to delete stuff
8692                  * process_duplicates() will return 0, otherwise it will return
8693                  * 1 and we
8694                  */
8695                 if (process_duplicates(root, extent_cache, rec))
8696                         continue;
8697                 ret = delete_duplicate_records(root, rec);
8698                 if (ret < 0)
8699                         return ret;
8700                 /*
8701                  * delete_duplicate_records will return the number of entries
8702                  * deleted, so if it's greater than 0 then we know we actually
8703                  * did something and we need to remove.
8704                  */
8705                 if (ret)
8706                         had_dups = 1;
8707         }
8708
8709         if (had_dups)
8710                 return -EAGAIN;
8711
8712         while(1) {
8713                 int cur_err = 0;
8714
8715                 fixed = 0;
8716                 recorded = 0;
8717                 cache = search_cache_extent(extent_cache, 0);
8718                 if (!cache)
8719                         break;
8720                 rec = container_of(cache, struct extent_record, cache);
8721                 if (rec->num_duplicates) {
8722                         fprintf(stderr, "extent item %llu has multiple extent "
8723                                 "items\n", (unsigned long long)rec->start);
8724                         err = 1;
8725                         cur_err = 1;
8726                 }
8727
8728                 if (rec->refs != rec->extent_item_refs) {
8729                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8730                                 (unsigned long long)rec->start,
8731                                 (unsigned long long)rec->nr);
8732                         fprintf(stderr, "extent item %llu, found %llu\n",
8733                                 (unsigned long long)rec->extent_item_refs,
8734                                 (unsigned long long)rec->refs);
8735                         ret = record_orphan_data_extents(root->fs_info, rec);
8736                         if (ret < 0)
8737                                 goto repair_abort;
8738                         if (ret == 0) {
8739                                 recorded = 1;
8740                         } else {
8741                                 /*
8742                                  * we can't use the extent to repair file
8743                                  * extent, let the fallback method handle it.
8744                                  */
8745                                 if (!fixed && repair) {
8746                                         ret = fixup_extent_refs(
8747                                                         root->fs_info,
8748                                                         extent_cache, rec);
8749                                         if (ret)
8750                                                 goto repair_abort;
8751                                         fixed = 1;
8752                                 }
8753                         }
8754                         err = 1;
8755                         cur_err = 1;
8756                 }
8757                 if (all_backpointers_checked(rec, 1)) {
8758                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8759                                 (unsigned long long)rec->start,
8760                                 (unsigned long long)rec->nr);
8761
8762                         if (!fixed && !recorded && repair) {
8763                                 ret = fixup_extent_refs(root->fs_info,
8764                                                         extent_cache, rec);
8765                                 if (ret)
8766                                         goto repair_abort;
8767                                 fixed = 1;
8768                         }
8769                         cur_err = 1;
8770                         err = 1;
8771                 }
8772                 if (!rec->owner_ref_checked) {
8773                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8774                                 (unsigned long long)rec->start,
8775                                 (unsigned long long)rec->nr);
8776                         if (!fixed && !recorded && repair) {
8777                                 ret = fixup_extent_refs(root->fs_info,
8778                                                         extent_cache, rec);
8779                                 if (ret)
8780                                         goto repair_abort;
8781                                 fixed = 1;
8782                         }
8783                         err = 1;
8784                         cur_err = 1;
8785                 }
8786                 if (rec->bad_full_backref) {
8787                         fprintf(stderr, "bad full backref, on [%llu]\n",
8788                                 (unsigned long long)rec->start);
8789                         if (repair) {
8790                                 ret = fixup_extent_flags(root->fs_info, rec);
8791                                 if (ret)
8792                                         goto repair_abort;
8793                                 fixed = 1;
8794                         }
8795                         err = 1;
8796                         cur_err = 1;
8797                 }
8798                 /*
8799                  * Although it's not a extent ref's problem, we reuse this
8800                  * routine for error reporting.
8801                  * No repair function yet.
8802                  */
8803                 if (rec->crossing_stripes) {
8804                         fprintf(stderr,
8805                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8806                                 rec->start, rec->start + rec->max_size);
8807                         err = 1;
8808                         cur_err = 1;
8809                 }
8810
8811                 if (rec->wrong_chunk_type) {
8812                         fprintf(stderr,
8813                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8814                                 rec->start, rec->start + rec->max_size);
8815                         err = 1;
8816                         cur_err = 1;
8817                 }
8818
8819                 remove_cache_extent(extent_cache, cache);
8820                 free_all_extent_backrefs(rec);
8821                 if (!init_extent_tree && repair && (!cur_err || fixed))
8822                         clear_extent_dirty(root->fs_info->excluded_extents,
8823                                            rec->start,
8824                                            rec->start + rec->max_size - 1,
8825                                            GFP_NOFS);
8826                 free(rec);
8827         }
8828 repair_abort:
8829         if (repair) {
8830                 if (ret && ret != -EAGAIN) {
8831                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8832                         exit(1);
8833                 } else if (!ret) {
8834                         struct btrfs_trans_handle *trans;
8835
8836                         root = root->fs_info->extent_root;
8837                         trans = btrfs_start_transaction(root, 1);
8838                         if (IS_ERR(trans)) {
8839                                 ret = PTR_ERR(trans);
8840                                 goto repair_abort;
8841                         }
8842
8843                         btrfs_fix_block_accounting(trans, root);
8844                         ret = btrfs_commit_transaction(trans, root);
8845                         if (ret)
8846                                 goto repair_abort;
8847                 }
8848                 if (err)
8849                         fprintf(stderr, "repaired damaged extent references\n");
8850                 return ret;
8851         }
8852         return err;
8853 }
8854
8855 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8856 {
8857         u64 stripe_size;
8858
8859         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8860                 stripe_size = length;
8861                 stripe_size /= num_stripes;
8862         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8863                 stripe_size = length * 2;
8864                 stripe_size /= num_stripes;
8865         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8866                 stripe_size = length;
8867                 stripe_size /= (num_stripes - 1);
8868         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8869                 stripe_size = length;
8870                 stripe_size /= (num_stripes - 2);
8871         } else {
8872                 stripe_size = length;
8873         }
8874         return stripe_size;
8875 }
8876
8877 /*
8878  * Check the chunk with its block group/dev list ref:
8879  * Return 0 if all refs seems valid.
8880  * Return 1 if part of refs seems valid, need later check for rebuild ref
8881  * like missing block group and needs to search extent tree to rebuild them.
8882  * Return -1 if essential refs are missing and unable to rebuild.
8883  */
8884 static int check_chunk_refs(struct chunk_record *chunk_rec,
8885                             struct block_group_tree *block_group_cache,
8886                             struct device_extent_tree *dev_extent_cache,
8887                             int silent)
8888 {
8889         struct cache_extent *block_group_item;
8890         struct block_group_record *block_group_rec;
8891         struct cache_extent *dev_extent_item;
8892         struct device_extent_record *dev_extent_rec;
8893         u64 devid;
8894         u64 offset;
8895         u64 length;
8896         int metadump_v2 = 0;
8897         int i;
8898         int ret = 0;
8899
8900         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8901                                                chunk_rec->offset,
8902                                                chunk_rec->length);
8903         if (block_group_item) {
8904                 block_group_rec = container_of(block_group_item,
8905                                                struct block_group_record,
8906                                                cache);
8907                 if (chunk_rec->length != block_group_rec->offset ||
8908                     chunk_rec->offset != block_group_rec->objectid ||
8909                     (!metadump_v2 &&
8910                      chunk_rec->type_flags != block_group_rec->flags)) {
8911                         if (!silent)
8912                                 fprintf(stderr,
8913                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8914                                         chunk_rec->objectid,
8915                                         chunk_rec->type,
8916                                         chunk_rec->offset,
8917                                         chunk_rec->length,
8918                                         chunk_rec->offset,
8919                                         chunk_rec->type_flags,
8920                                         block_group_rec->objectid,
8921                                         block_group_rec->type,
8922                                         block_group_rec->offset,
8923                                         block_group_rec->offset,
8924                                         block_group_rec->objectid,
8925                                         block_group_rec->flags);
8926                         ret = -1;
8927                 } else {
8928                         list_del_init(&block_group_rec->list);
8929                         chunk_rec->bg_rec = block_group_rec;
8930                 }
8931         } else {
8932                 if (!silent)
8933                         fprintf(stderr,
8934                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8935                                 chunk_rec->objectid,
8936                                 chunk_rec->type,
8937                                 chunk_rec->offset,
8938                                 chunk_rec->length,
8939                                 chunk_rec->offset,
8940                                 chunk_rec->type_flags);
8941                 ret = 1;
8942         }
8943
8944         if (metadump_v2)
8945                 return ret;
8946
8947         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8948                                     chunk_rec->num_stripes);
8949         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8950                 devid = chunk_rec->stripes[i].devid;
8951                 offset = chunk_rec->stripes[i].offset;
8952                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8953                                                        devid, offset, length);
8954                 if (dev_extent_item) {
8955                         dev_extent_rec = container_of(dev_extent_item,
8956                                                 struct device_extent_record,
8957                                                 cache);
8958                         if (dev_extent_rec->objectid != devid ||
8959                             dev_extent_rec->offset != offset ||
8960                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8961                             dev_extent_rec->length != length) {
8962                                 if (!silent)
8963                                         fprintf(stderr,
8964                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8965                                                 chunk_rec->objectid,
8966                                                 chunk_rec->type,
8967                                                 chunk_rec->offset,
8968                                                 chunk_rec->stripes[i].devid,
8969                                                 chunk_rec->stripes[i].offset,
8970                                                 dev_extent_rec->objectid,
8971                                                 dev_extent_rec->offset,
8972                                                 dev_extent_rec->length);
8973                                 ret = -1;
8974                         } else {
8975                                 list_move(&dev_extent_rec->chunk_list,
8976                                           &chunk_rec->dextents);
8977                         }
8978                 } else {
8979                         if (!silent)
8980                                 fprintf(stderr,
8981                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8982                                         chunk_rec->objectid,
8983                                         chunk_rec->type,
8984                                         chunk_rec->offset,
8985                                         chunk_rec->stripes[i].devid,
8986                                         chunk_rec->stripes[i].offset);
8987                         ret = -1;
8988                 }
8989         }
8990         return ret;
8991 }
8992
8993 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8994 int check_chunks(struct cache_tree *chunk_cache,
8995                  struct block_group_tree *block_group_cache,
8996                  struct device_extent_tree *dev_extent_cache,
8997                  struct list_head *good, struct list_head *bad,
8998                  struct list_head *rebuild, int silent)
8999 {
9000         struct cache_extent *chunk_item;
9001         struct chunk_record *chunk_rec;
9002         struct block_group_record *bg_rec;
9003         struct device_extent_record *dext_rec;
9004         int err;
9005         int ret = 0;
9006
9007         chunk_item = first_cache_extent(chunk_cache);
9008         while (chunk_item) {
9009                 chunk_rec = container_of(chunk_item, struct chunk_record,
9010                                          cache);
9011                 err = check_chunk_refs(chunk_rec, block_group_cache,
9012                                        dev_extent_cache, silent);
9013                 if (err < 0)
9014                         ret = err;
9015                 if (err == 0 && good)
9016                         list_add_tail(&chunk_rec->list, good);
9017                 if (err > 0 && rebuild)
9018                         list_add_tail(&chunk_rec->list, rebuild);
9019                 if (err < 0 && bad)
9020                         list_add_tail(&chunk_rec->list, bad);
9021                 chunk_item = next_cache_extent(chunk_item);
9022         }
9023
9024         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9025                 if (!silent)
9026                         fprintf(stderr,
9027                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9028                                 bg_rec->objectid,
9029                                 bg_rec->offset,
9030                                 bg_rec->flags);
9031                 if (!ret)
9032                         ret = 1;
9033         }
9034
9035         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9036                             chunk_list) {
9037                 if (!silent)
9038                         fprintf(stderr,
9039                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9040                                 dext_rec->objectid,
9041                                 dext_rec->offset,
9042                                 dext_rec->length);
9043                 if (!ret)
9044                         ret = 1;
9045         }
9046         return ret;
9047 }
9048
9049
9050 static int check_device_used(struct device_record *dev_rec,
9051                              struct device_extent_tree *dext_cache)
9052 {
9053         struct cache_extent *cache;
9054         struct device_extent_record *dev_extent_rec;
9055         u64 total_byte = 0;
9056
9057         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9058         while (cache) {
9059                 dev_extent_rec = container_of(cache,
9060                                               struct device_extent_record,
9061                                               cache);
9062                 if (dev_extent_rec->objectid != dev_rec->devid)
9063                         break;
9064
9065                 list_del_init(&dev_extent_rec->device_list);
9066                 total_byte += dev_extent_rec->length;
9067                 cache = next_cache_extent(cache);
9068         }
9069
9070         if (total_byte != dev_rec->byte_used) {
9071                 fprintf(stderr,
9072                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9073                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9074                         dev_rec->type, dev_rec->offset);
9075                 return -1;
9076         } else {
9077                 return 0;
9078         }
9079 }
9080
9081 /* check btrfs_dev_item -> btrfs_dev_extent */
9082 static int check_devices(struct rb_root *dev_cache,
9083                          struct device_extent_tree *dev_extent_cache)
9084 {
9085         struct rb_node *dev_node;
9086         struct device_record *dev_rec;
9087         struct device_extent_record *dext_rec;
9088         int err;
9089         int ret = 0;
9090
9091         dev_node = rb_first(dev_cache);
9092         while (dev_node) {
9093                 dev_rec = container_of(dev_node, struct device_record, node);
9094                 err = check_device_used(dev_rec, dev_extent_cache);
9095                 if (err)
9096                         ret = err;
9097
9098                 dev_node = rb_next(dev_node);
9099         }
9100         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9101                             device_list) {
9102                 fprintf(stderr,
9103                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9104                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9105                 if (!ret)
9106                         ret = 1;
9107         }
9108         return ret;
9109 }
9110
9111 static int add_root_item_to_list(struct list_head *head,
9112                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9113                                   u8 level, u8 drop_level,
9114                                   int level_size, struct btrfs_key *drop_key)
9115 {
9116
9117         struct root_item_record *ri_rec;
9118         ri_rec = malloc(sizeof(*ri_rec));
9119         if (!ri_rec)
9120                 return -ENOMEM;
9121         ri_rec->bytenr = bytenr;
9122         ri_rec->objectid = objectid;
9123         ri_rec->level = level;
9124         ri_rec->level_size = level_size;
9125         ri_rec->drop_level = drop_level;
9126         ri_rec->last_snapshot = last_snapshot;
9127         if (drop_key)
9128                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9129         list_add_tail(&ri_rec->list, head);
9130
9131         return 0;
9132 }
9133
9134 static void free_root_item_list(struct list_head *list)
9135 {
9136         struct root_item_record *ri_rec;
9137
9138         while (!list_empty(list)) {
9139                 ri_rec = list_first_entry(list, struct root_item_record,
9140                                           list);
9141                 list_del_init(&ri_rec->list);
9142                 free(ri_rec);
9143         }
9144 }
9145
9146 static int deal_root_from_list(struct list_head *list,
9147                                struct btrfs_root *root,
9148                                struct block_info *bits,
9149                                int bits_nr,
9150                                struct cache_tree *pending,
9151                                struct cache_tree *seen,
9152                                struct cache_tree *reada,
9153                                struct cache_tree *nodes,
9154                                struct cache_tree *extent_cache,
9155                                struct cache_tree *chunk_cache,
9156                                struct rb_root *dev_cache,
9157                                struct block_group_tree *block_group_cache,
9158                                struct device_extent_tree *dev_extent_cache)
9159 {
9160         int ret = 0;
9161         u64 last;
9162
9163         while (!list_empty(list)) {
9164                 struct root_item_record *rec;
9165                 struct extent_buffer *buf;
9166                 rec = list_entry(list->next,
9167                                  struct root_item_record, list);
9168                 last = 0;
9169                 buf = read_tree_block(root->fs_info->tree_root,
9170                                       rec->bytenr, rec->level_size, 0);
9171                 if (!extent_buffer_uptodate(buf)) {
9172                         free_extent_buffer(buf);
9173                         ret = -EIO;
9174                         break;
9175                 }
9176                 ret = add_root_to_pending(buf, extent_cache, pending,
9177                                     seen, nodes, rec->objectid);
9178                 if (ret < 0)
9179                         break;
9180                 /*
9181                  * To rebuild extent tree, we need deal with snapshot
9182                  * one by one, otherwise we deal with node firstly which
9183                  * can maximize readahead.
9184                  */
9185                 while (1) {
9186                         ret = run_next_block(root, bits, bits_nr, &last,
9187                                              pending, seen, reada, nodes,
9188                                              extent_cache, chunk_cache,
9189                                              dev_cache, block_group_cache,
9190                                              dev_extent_cache, rec);
9191                         if (ret != 0)
9192                                 break;
9193                 }
9194                 free_extent_buffer(buf);
9195                 list_del(&rec->list);
9196                 free(rec);
9197                 if (ret < 0)
9198                         break;
9199         }
9200         while (ret >= 0) {
9201                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9202                                      reada, nodes, extent_cache, chunk_cache,
9203                                      dev_cache, block_group_cache,
9204                                      dev_extent_cache, NULL);
9205                 if (ret != 0) {
9206                         if (ret > 0)
9207                                 ret = 0;
9208                         break;
9209                 }
9210         }
9211         return ret;
9212 }
9213
9214 static int check_chunks_and_extents(struct btrfs_root *root)
9215 {
9216         struct rb_root dev_cache;
9217         struct cache_tree chunk_cache;
9218         struct block_group_tree block_group_cache;
9219         struct device_extent_tree dev_extent_cache;
9220         struct cache_tree extent_cache;
9221         struct cache_tree seen;
9222         struct cache_tree pending;
9223         struct cache_tree reada;
9224         struct cache_tree nodes;
9225         struct extent_io_tree excluded_extents;
9226         struct cache_tree corrupt_blocks;
9227         struct btrfs_path path;
9228         struct btrfs_key key;
9229         struct btrfs_key found_key;
9230         int ret, err = 0;
9231         struct block_info *bits;
9232         int bits_nr;
9233         struct extent_buffer *leaf;
9234         int slot;
9235         struct btrfs_root_item ri;
9236         struct list_head dropping_trees;
9237         struct list_head normal_trees;
9238         struct btrfs_root *root1;
9239         u64 objectid;
9240         u32 level_size;
9241         u8 level;
9242
9243         dev_cache = RB_ROOT;
9244         cache_tree_init(&chunk_cache);
9245         block_group_tree_init(&block_group_cache);
9246         device_extent_tree_init(&dev_extent_cache);
9247
9248         cache_tree_init(&extent_cache);
9249         cache_tree_init(&seen);
9250         cache_tree_init(&pending);
9251         cache_tree_init(&nodes);
9252         cache_tree_init(&reada);
9253         cache_tree_init(&corrupt_blocks);
9254         extent_io_tree_init(&excluded_extents);
9255         INIT_LIST_HEAD(&dropping_trees);
9256         INIT_LIST_HEAD(&normal_trees);
9257
9258         if (repair) {
9259                 root->fs_info->excluded_extents = &excluded_extents;
9260                 root->fs_info->fsck_extent_cache = &extent_cache;
9261                 root->fs_info->free_extent_hook = free_extent_hook;
9262                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9263         }
9264
9265         bits_nr = 1024;
9266         bits = malloc(bits_nr * sizeof(struct block_info));
9267         if (!bits) {
9268                 perror("malloc");
9269                 exit(1);
9270         }
9271
9272         if (ctx.progress_enabled) {
9273                 ctx.tp = TASK_EXTENTS;
9274                 task_start(ctx.info);
9275         }
9276
9277 again:
9278         root1 = root->fs_info->tree_root;
9279         level = btrfs_header_level(root1->node);
9280         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9281                                     root1->node->start, 0, level, 0,
9282                                     root1->nodesize, NULL);
9283         if (ret < 0)
9284                 goto out;
9285         root1 = root->fs_info->chunk_root;
9286         level = btrfs_header_level(root1->node);
9287         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9288                                     root1->node->start, 0, level, 0,
9289                                     root1->nodesize, NULL);
9290         if (ret < 0)
9291                 goto out;
9292         btrfs_init_path(&path);
9293         key.offset = 0;
9294         key.objectid = 0;
9295         key.type = BTRFS_ROOT_ITEM_KEY;
9296         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9297                                         &key, &path, 0, 0);
9298         if (ret < 0)
9299                 goto out;
9300         while(1) {
9301                 leaf = path.nodes[0];
9302                 slot = path.slots[0];
9303                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9304                         ret = btrfs_next_leaf(root, &path);
9305                         if (ret != 0)
9306                                 break;
9307                         leaf = path.nodes[0];
9308                         slot = path.slots[0];
9309                 }
9310                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9311                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9312                         unsigned long offset;
9313                         u64 last_snapshot;
9314
9315                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9316                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9317                         last_snapshot = btrfs_root_last_snapshot(&ri);
9318                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9319                                 level = btrfs_root_level(&ri);
9320                                 level_size = root->nodesize;
9321                                 ret = add_root_item_to_list(&normal_trees,
9322                                                 found_key.objectid,
9323                                                 btrfs_root_bytenr(&ri),
9324                                                 last_snapshot, level,
9325                                                 0, level_size, NULL);
9326                                 if (ret < 0)
9327                                         goto out;
9328                         } else {
9329                                 level = btrfs_root_level(&ri);
9330                                 level_size = root->nodesize;
9331                                 objectid = found_key.objectid;
9332                                 btrfs_disk_key_to_cpu(&found_key,
9333                                                       &ri.drop_progress);
9334                                 ret = add_root_item_to_list(&dropping_trees,
9335                                                 objectid,
9336                                                 btrfs_root_bytenr(&ri),
9337                                                 last_snapshot, level,
9338                                                 ri.drop_level,
9339                                                 level_size, &found_key);
9340                                 if (ret < 0)
9341                                         goto out;
9342                         }
9343                 }
9344                 path.slots[0]++;
9345         }
9346         btrfs_release_path(&path);
9347
9348         /*
9349          * check_block can return -EAGAIN if it fixes something, please keep
9350          * this in mind when dealing with return values from these functions, if
9351          * we get -EAGAIN we want to fall through and restart the loop.
9352          */
9353         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9354                                   &seen, &reada, &nodes, &extent_cache,
9355                                   &chunk_cache, &dev_cache, &block_group_cache,
9356                                   &dev_extent_cache);
9357         if (ret < 0) {
9358                 if (ret == -EAGAIN)
9359                         goto loop;
9360                 goto out;
9361         }
9362         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9363                                   &pending, &seen, &reada, &nodes,
9364                                   &extent_cache, &chunk_cache, &dev_cache,
9365                                   &block_group_cache, &dev_extent_cache);
9366         if (ret < 0) {
9367                 if (ret == -EAGAIN)
9368                         goto loop;
9369                 goto out;
9370         }
9371
9372         ret = check_chunks(&chunk_cache, &block_group_cache,
9373                            &dev_extent_cache, NULL, NULL, NULL, 0);
9374         if (ret) {
9375                 if (ret == -EAGAIN)
9376                         goto loop;
9377                 err = ret;
9378         }
9379
9380         ret = check_extent_refs(root, &extent_cache);
9381         if (ret < 0) {
9382                 if (ret == -EAGAIN)
9383                         goto loop;
9384                 goto out;
9385         }
9386
9387         ret = check_devices(&dev_cache, &dev_extent_cache);
9388         if (ret && err)
9389                 ret = err;
9390
9391 out:
9392         task_stop(ctx.info);
9393         if (repair) {
9394                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9395                 extent_io_tree_cleanup(&excluded_extents);
9396                 root->fs_info->fsck_extent_cache = NULL;
9397                 root->fs_info->free_extent_hook = NULL;
9398                 root->fs_info->corrupt_blocks = NULL;
9399                 root->fs_info->excluded_extents = NULL;
9400         }
9401         free(bits);
9402         free_chunk_cache_tree(&chunk_cache);
9403         free_device_cache_tree(&dev_cache);
9404         free_block_group_tree(&block_group_cache);
9405         free_device_extent_tree(&dev_extent_cache);
9406         free_extent_cache_tree(&seen);
9407         free_extent_cache_tree(&pending);
9408         free_extent_cache_tree(&reada);
9409         free_extent_cache_tree(&nodes);
9410         return ret;
9411 loop:
9412         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9413         free_extent_cache_tree(&seen);
9414         free_extent_cache_tree(&pending);
9415         free_extent_cache_tree(&reada);
9416         free_extent_cache_tree(&nodes);
9417         free_chunk_cache_tree(&chunk_cache);
9418         free_block_group_tree(&block_group_cache);
9419         free_device_cache_tree(&dev_cache);
9420         free_device_extent_tree(&dev_extent_cache);
9421         free_extent_record_cache(root->fs_info, &extent_cache);
9422         free_root_item_list(&normal_trees);
9423         free_root_item_list(&dropping_trees);
9424         extent_io_tree_cleanup(&excluded_extents);
9425         goto again;
9426 }
9427
9428 /*
9429  * Check backrefs of a tree block given by @bytenr or @eb.
9430  *
9431  * @root:       the root containing the @bytenr or @eb
9432  * @eb:         tree block extent buffer, can be NULL
9433  * @bytenr:     bytenr of the tree block to search
9434  * @level:      tree level of the tree block
9435  * @owner:      owner of the tree block
9436  *
9437  * Return >0 for any error found and output error message
9438  * Return 0 for no error found
9439  */
9440 static int check_tree_block_ref(struct btrfs_root *root,
9441                                 struct extent_buffer *eb, u64 bytenr,
9442                                 int level, u64 owner)
9443 {
9444         struct btrfs_key key;
9445         struct btrfs_root *extent_root = root->fs_info->extent_root;
9446         struct btrfs_path path;
9447         struct btrfs_extent_item *ei;
9448         struct btrfs_extent_inline_ref *iref;
9449         struct extent_buffer *leaf;
9450         unsigned long end;
9451         unsigned long ptr;
9452         int slot;
9453         int skinny_level;
9454         int type;
9455         u32 nodesize = root->nodesize;
9456         u32 item_size;
9457         u64 offset;
9458         int found_ref = 0;
9459         int err = 0;
9460         int ret;
9461
9462         btrfs_init_path(&path);
9463         key.objectid = bytenr;
9464         if (btrfs_fs_incompat(root->fs_info,
9465                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9466                 key.type = BTRFS_METADATA_ITEM_KEY;
9467         else
9468                 key.type = BTRFS_EXTENT_ITEM_KEY;
9469         key.offset = (u64)-1;
9470
9471         /* Search for the backref in extent tree */
9472         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9473         if (ret < 0) {
9474                 err |= BACKREF_MISSING;
9475                 goto out;
9476         }
9477         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9478         if (ret) {
9479                 err |= BACKREF_MISSING;
9480                 goto out;
9481         }
9482
9483         leaf = path.nodes[0];
9484         slot = path.slots[0];
9485         btrfs_item_key_to_cpu(leaf, &key, slot);
9486
9487         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9488
9489         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9490                 skinny_level = (int)key.offset;
9491                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9492         } else {
9493                 struct btrfs_tree_block_info *info;
9494
9495                 info = (struct btrfs_tree_block_info *)(ei + 1);
9496                 skinny_level = btrfs_tree_block_level(leaf, info);
9497                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9498         }
9499
9500         if (eb) {
9501                 u64 header_gen;
9502                 u64 extent_gen;
9503
9504                 if (!(btrfs_extent_flags(leaf, ei) &
9505                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9506                         error(
9507                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9508                                 key.objectid, nodesize,
9509                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9510                         err = BACKREF_MISMATCH;
9511                 }
9512                 header_gen = btrfs_header_generation(eb);
9513                 extent_gen = btrfs_extent_generation(leaf, ei);
9514                 if (header_gen != extent_gen) {
9515                         error(
9516         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9517                                 key.objectid, nodesize, header_gen,
9518                                 extent_gen);
9519                         err = BACKREF_MISMATCH;
9520                 }
9521                 if (level != skinny_level) {
9522                         error(
9523                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9524                                 key.objectid, nodesize, level, skinny_level);
9525                         err = BACKREF_MISMATCH;
9526                 }
9527                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9528                         error(
9529                         "extent[%llu %u] is referred by other roots than %llu",
9530                                 key.objectid, nodesize, root->objectid);
9531                         err = BACKREF_MISMATCH;
9532                 }
9533         }
9534
9535         /*
9536          * Iterate the extent/metadata item to find the exact backref
9537          */
9538         item_size = btrfs_item_size_nr(leaf, slot);
9539         ptr = (unsigned long)iref;
9540         end = (unsigned long)ei + item_size;
9541         while (ptr < end) {
9542                 iref = (struct btrfs_extent_inline_ref *)ptr;
9543                 type = btrfs_extent_inline_ref_type(leaf, iref);
9544                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9545
9546                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9547                         (offset == root->objectid || offset == owner)) {
9548                         found_ref = 1;
9549                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9550                         /* Check if the backref points to valid referencer */
9551                         found_ref = !check_tree_block_ref(root, NULL, offset,
9552                                                           level + 1, owner);
9553                 }
9554
9555                 if (found_ref)
9556                         break;
9557                 ptr += btrfs_extent_inline_ref_size(type);
9558         }
9559
9560         /*
9561          * Inlined extent item doesn't have what we need, check
9562          * TREE_BLOCK_REF_KEY
9563          */
9564         if (!found_ref) {
9565                 btrfs_release_path(&path);
9566                 key.objectid = bytenr;
9567                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9568                 key.offset = root->objectid;
9569
9570                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9571                 if (!ret)
9572                         found_ref = 1;
9573         }
9574         if (!found_ref)
9575                 err |= BACKREF_MISSING;
9576 out:
9577         btrfs_release_path(&path);
9578         if (eb && (err & BACKREF_MISSING))
9579                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9580                         bytenr, nodesize, owner, level);
9581         return err;
9582 }
9583
9584 /*
9585  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9586  *
9587  * Return >0 any error found and output error message
9588  * Return 0 for no error found
9589  */
9590 static int check_extent_data_item(struct btrfs_root *root,
9591                                   struct extent_buffer *eb, int slot)
9592 {
9593         struct btrfs_file_extent_item *fi;
9594         struct btrfs_path path;
9595         struct btrfs_root *extent_root = root->fs_info->extent_root;
9596         struct btrfs_key fi_key;
9597         struct btrfs_key dbref_key;
9598         struct extent_buffer *leaf;
9599         struct btrfs_extent_item *ei;
9600         struct btrfs_extent_inline_ref *iref;
9601         struct btrfs_extent_data_ref *dref;
9602         u64 owner;
9603         u64 file_extent_gen;
9604         u64 disk_bytenr;
9605         u64 disk_num_bytes;
9606         u64 extent_num_bytes;
9607         u64 extent_flags;
9608         u64 extent_gen;
9609         u32 item_size;
9610         unsigned long end;
9611         unsigned long ptr;
9612         int type;
9613         u64 ref_root;
9614         int found_dbackref = 0;
9615         int err = 0;
9616         int ret;
9617
9618         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9619         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9620         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9621
9622         /* Nothing to check for hole and inline data extents */
9623         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9624             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9625                 return 0;
9626
9627         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9628         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9629         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9630
9631         /* Check unaligned disk_num_bytes and num_bytes */
9632         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9633                 error(
9634 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9635                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9636                         root->sectorsize);
9637                 err |= BYTES_UNALIGNED;
9638         } else {
9639                 data_bytes_allocated += disk_num_bytes;
9640         }
9641         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9642                 error(
9643 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9644                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9645                         root->sectorsize);
9646                 err |= BYTES_UNALIGNED;
9647         } else {
9648                 data_bytes_referenced += extent_num_bytes;
9649         }
9650         owner = btrfs_header_owner(eb);
9651
9652         /* Check the extent item of the file extent in extent tree */
9653         btrfs_init_path(&path);
9654         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9655         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9656         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9657
9658         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9659         if (ret) {
9660                 err |= BACKREF_MISSING;
9661                 goto error;
9662         }
9663
9664         leaf = path.nodes[0];
9665         slot = path.slots[0];
9666         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9667
9668         extent_flags = btrfs_extent_flags(leaf, ei);
9669         extent_gen = btrfs_extent_generation(leaf, ei);
9670
9671         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9672                 error(
9673                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9674                     disk_bytenr, disk_num_bytes,
9675                     BTRFS_EXTENT_FLAG_DATA);
9676                 err |= BACKREF_MISMATCH;
9677         }
9678
9679         if (file_extent_gen < extent_gen) {
9680                 error(
9681 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9682                         disk_bytenr, disk_num_bytes, file_extent_gen,
9683                         extent_gen);
9684                 err |= BACKREF_MISMATCH;
9685         }
9686
9687         /* Check data backref inside that extent item */
9688         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9689         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9690         ptr = (unsigned long)iref;
9691         end = (unsigned long)ei + item_size;
9692         while (ptr < end) {
9693                 iref = (struct btrfs_extent_inline_ref *)ptr;
9694                 type = btrfs_extent_inline_ref_type(leaf, iref);
9695                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9696
9697                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9698                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9699                         if (ref_root == owner || ref_root == root->objectid)
9700                                 found_dbackref = 1;
9701                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9702                         found_dbackref = !check_tree_block_ref(root, NULL,
9703                                 btrfs_extent_inline_ref_offset(leaf, iref),
9704                                 0, owner);
9705                 }
9706
9707                 if (found_dbackref)
9708                         break;
9709                 ptr += btrfs_extent_inline_ref_size(type);
9710         }
9711
9712         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9713         if (!found_dbackref) {
9714                 btrfs_release_path(&path);
9715
9716                 btrfs_init_path(&path);
9717                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9718                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9719                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9720                                 fi_key.objectid, fi_key.offset);
9721
9722                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9723                                         &dbref_key, &path, 0, 0);
9724                 if (!ret)
9725                         found_dbackref = 1;
9726         }
9727
9728         if (!found_dbackref)
9729                 err |= BACKREF_MISSING;
9730 error:
9731         btrfs_release_path(&path);
9732         if (err & BACKREF_MISSING) {
9733                 error("data extent[%llu %llu] backref lost",
9734                       disk_bytenr, disk_num_bytes);
9735         }
9736         return err;
9737 }
9738
9739 /*
9740  * Get real tree block level for the case like shared block
9741  * Return >= 0 as tree level
9742  * Return <0 for error
9743  */
9744 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9745 {
9746         struct extent_buffer *eb;
9747         struct btrfs_path path;
9748         struct btrfs_key key;
9749         struct btrfs_extent_item *ei;
9750         u64 flags;
9751         u64 transid;
9752         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9753         u8 backref_level;
9754         u8 header_level;
9755         int ret;
9756
9757         /* Search extent tree for extent generation and level */
9758         key.objectid = bytenr;
9759         key.type = BTRFS_METADATA_ITEM_KEY;
9760         key.offset = (u64)-1;
9761
9762         btrfs_init_path(&path);
9763         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9764         if (ret < 0)
9765                 goto release_out;
9766         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9767         if (ret < 0)
9768                 goto release_out;
9769         if (ret > 0) {
9770                 ret = -ENOENT;
9771                 goto release_out;
9772         }
9773
9774         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9775         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9776                             struct btrfs_extent_item);
9777         flags = btrfs_extent_flags(path.nodes[0], ei);
9778         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9779                 ret = -ENOENT;
9780                 goto release_out;
9781         }
9782
9783         /* Get transid for later read_tree_block() check */
9784         transid = btrfs_extent_generation(path.nodes[0], ei);
9785
9786         /* Get backref level as one source */
9787         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9788                 backref_level = key.offset;
9789         } else {
9790                 struct btrfs_tree_block_info *info;
9791
9792                 info = (struct btrfs_tree_block_info *)(ei + 1);
9793                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9794         }
9795         btrfs_release_path(&path);
9796
9797         /* Get level from tree block as an alternative source */
9798         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9799         if (!extent_buffer_uptodate(eb)) {
9800                 free_extent_buffer(eb);
9801                 return -EIO;
9802         }
9803         header_level = btrfs_header_level(eb);
9804         free_extent_buffer(eb);
9805
9806         if (header_level != backref_level)
9807                 return -EIO;
9808         return header_level;
9809
9810 release_out:
9811         btrfs_release_path(&path);
9812         return ret;
9813 }
9814
9815 /*
9816  * Check if a tree block backref is valid (points to a valid tree block)
9817  * if level == -1, level will be resolved
9818  * Return >0 for any error found and print error message
9819  */
9820 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9821                                     u64 bytenr, int level)
9822 {
9823         struct btrfs_root *root;
9824         struct btrfs_key key;
9825         struct btrfs_path path;
9826         struct extent_buffer *eb;
9827         struct extent_buffer *node;
9828         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9829         int err = 0;
9830         int ret;
9831
9832         /* Query level for level == -1 special case */
9833         if (level == -1)
9834                 level = query_tree_block_level(fs_info, bytenr);
9835         if (level < 0) {
9836                 err |= REFERENCER_MISSING;
9837                 goto out;
9838         }
9839
9840         key.objectid = root_id;
9841         key.type = BTRFS_ROOT_ITEM_KEY;
9842         key.offset = (u64)-1;
9843
9844         root = btrfs_read_fs_root(fs_info, &key);
9845         if (IS_ERR(root)) {
9846                 err |= REFERENCER_MISSING;
9847                 goto out;
9848         }
9849
9850         /* Read out the tree block to get item/node key */
9851         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9852         if (!extent_buffer_uptodate(eb)) {
9853                 err |= REFERENCER_MISSING;
9854                 free_extent_buffer(eb);
9855                 goto out;
9856         }
9857
9858         /* Empty tree, no need to check key */
9859         if (!btrfs_header_nritems(eb) && !level) {
9860                 free_extent_buffer(eb);
9861                 goto out;
9862         }
9863
9864         if (level)
9865                 btrfs_node_key_to_cpu(eb, &key, 0);
9866         else
9867                 btrfs_item_key_to_cpu(eb, &key, 0);
9868
9869         free_extent_buffer(eb);
9870
9871         btrfs_init_path(&path);
9872         path.lowest_level = level;
9873         /* Search with the first key, to ensure we can reach it */
9874         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9875         if (ret < 0) {
9876                 err |= REFERENCER_MISSING;
9877                 goto release_out;
9878         }
9879
9880         node = path.nodes[level];
9881         if (btrfs_header_bytenr(node) != bytenr) {
9882                 error(
9883         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9884                         bytenr, nodesize, bytenr,
9885                         btrfs_header_bytenr(node));
9886                 err |= REFERENCER_MISMATCH;
9887         }
9888         if (btrfs_header_level(node) != level) {
9889                 error(
9890         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9891                         bytenr, nodesize, level,
9892                         btrfs_header_level(node));
9893                 err |= REFERENCER_MISMATCH;
9894         }
9895
9896 release_out:
9897         btrfs_release_path(&path);
9898 out:
9899         if (err & REFERENCER_MISSING) {
9900                 if (level < 0)
9901                         error("extent [%llu %d] lost referencer (owner: %llu)",
9902                                 bytenr, nodesize, root_id);
9903                 else
9904                         error(
9905                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9906                                 bytenr, nodesize, root_id, level);
9907         }
9908
9909         return err;
9910 }
9911
9912 /*
9913  * Check referencer for shared block backref
9914  * If level == -1, this function will resolve the level.
9915  */
9916 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9917                                      u64 parent, u64 bytenr, int level)
9918 {
9919         struct extent_buffer *eb;
9920         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9921         u32 nr;
9922         int found_parent = 0;
9923         int i;
9924
9925         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9926         if (!extent_buffer_uptodate(eb))
9927                 goto out;
9928
9929         if (level == -1)
9930                 level = query_tree_block_level(fs_info, bytenr);
9931         if (level < 0)
9932                 goto out;
9933
9934         if (level + 1 != btrfs_header_level(eb))
9935                 goto out;
9936
9937         nr = btrfs_header_nritems(eb);
9938         for (i = 0; i < nr; i++) {
9939                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9940                         found_parent = 1;
9941                         break;
9942                 }
9943         }
9944 out:
9945         free_extent_buffer(eb);
9946         if (!found_parent) {
9947                 error(
9948         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9949                         bytenr, nodesize, parent, level);
9950                 return REFERENCER_MISSING;
9951         }
9952         return 0;
9953 }
9954
9955 /*
9956  * Check referencer for normal (inlined) data ref
9957  * If len == 0, it will be resolved by searching in extent tree
9958  */
9959 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9960                                      u64 root_id, u64 objectid, u64 offset,
9961                                      u64 bytenr, u64 len, u32 count)
9962 {
9963         struct btrfs_root *root;
9964         struct btrfs_root *extent_root = fs_info->extent_root;
9965         struct btrfs_key key;
9966         struct btrfs_path path;
9967         struct extent_buffer *leaf;
9968         struct btrfs_file_extent_item *fi;
9969         u32 found_count = 0;
9970         int slot;
9971         int ret = 0;
9972
9973         if (!len) {
9974                 key.objectid = bytenr;
9975                 key.type = BTRFS_EXTENT_ITEM_KEY;
9976                 key.offset = (u64)-1;
9977
9978                 btrfs_init_path(&path);
9979                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9980                 if (ret < 0)
9981                         goto out;
9982                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9983                 if (ret)
9984                         goto out;
9985                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9986                 if (key.objectid != bytenr ||
9987                     key.type != BTRFS_EXTENT_ITEM_KEY)
9988                         goto out;
9989                 len = key.offset;
9990                 btrfs_release_path(&path);
9991         }
9992         key.objectid = root_id;
9993         key.type = BTRFS_ROOT_ITEM_KEY;
9994         key.offset = (u64)-1;
9995         btrfs_init_path(&path);
9996
9997         root = btrfs_read_fs_root(fs_info, &key);
9998         if (IS_ERR(root))
9999                 goto out;
10000
10001         key.objectid = objectid;
10002         key.type = BTRFS_EXTENT_DATA_KEY;
10003         /*
10004          * It can be nasty as data backref offset is
10005          * file offset - file extent offset, which is smaller or
10006          * equal to original backref offset.  The only special case is
10007          * overflow.  So we need to special check and do further search.
10008          */
10009         key.offset = offset & (1ULL << 63) ? 0 : offset;
10010
10011         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10012         if (ret < 0)
10013                 goto out;
10014
10015         /*
10016          * Search afterwards to get correct one
10017          * NOTE: As we must do a comprehensive check on the data backref to
10018          * make sure the dref count also matches, we must iterate all file
10019          * extents for that inode.
10020          */
10021         while (1) {
10022                 leaf = path.nodes[0];
10023                 slot = path.slots[0];
10024
10025                 btrfs_item_key_to_cpu(leaf, &key, slot);
10026                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10027                         break;
10028                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10029                 /*
10030                  * Except normal disk bytenr and disk num bytes, we still
10031                  * need to do extra check on dbackref offset as
10032                  * dbackref offset = file_offset - file_extent_offset
10033                  */
10034                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10035                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10036                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10037                     offset)
10038                         found_count++;
10039
10040                 ret = btrfs_next_item(root, &path);
10041                 if (ret)
10042                         break;
10043         }
10044 out:
10045         btrfs_release_path(&path);
10046         if (found_count != count) {
10047                 error(
10048 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10049                         bytenr, len, root_id, objectid, offset, count, found_count);
10050                 return REFERENCER_MISSING;
10051         }
10052         return 0;
10053 }
10054
10055 /*
10056  * Check if the referencer of a shared data backref exists
10057  */
10058 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10059                                      u64 parent, u64 bytenr)
10060 {
10061         struct extent_buffer *eb;
10062         struct btrfs_key key;
10063         struct btrfs_file_extent_item *fi;
10064         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10065         u32 nr;
10066         int found_parent = 0;
10067         int i;
10068
10069         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10070         if (!extent_buffer_uptodate(eb))
10071                 goto out;
10072
10073         nr = btrfs_header_nritems(eb);
10074         for (i = 0; i < nr; i++) {
10075                 btrfs_item_key_to_cpu(eb, &key, i);
10076                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10077                         continue;
10078
10079                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10080                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10081                         continue;
10082
10083                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10084                         found_parent = 1;
10085                         break;
10086                 }
10087         }
10088
10089 out:
10090         free_extent_buffer(eb);
10091         if (!found_parent) {
10092                 error("shared extent %llu referencer lost (parent: %llu)",
10093                         bytenr, parent);
10094                 return REFERENCER_MISSING;
10095         }
10096         return 0;
10097 }
10098
10099 /*
10100  * This function will check a given extent item, including its backref and
10101  * itself (like crossing stripe boundary and type)
10102  *
10103  * Since we don't use extent_record anymore, introduce new error bit
10104  */
10105 static int check_extent_item(struct btrfs_fs_info *fs_info,
10106                              struct extent_buffer *eb, int slot)
10107 {
10108         struct btrfs_extent_item *ei;
10109         struct btrfs_extent_inline_ref *iref;
10110         struct btrfs_extent_data_ref *dref;
10111         unsigned long end;
10112         unsigned long ptr;
10113         int type;
10114         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10115         u32 item_size = btrfs_item_size_nr(eb, slot);
10116         u64 flags;
10117         u64 offset;
10118         int metadata = 0;
10119         int level;
10120         struct btrfs_key key;
10121         int ret;
10122         int err = 0;
10123
10124         btrfs_item_key_to_cpu(eb, &key, slot);
10125         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10126                 bytes_used += key.offset;
10127         else
10128                 bytes_used += nodesize;
10129
10130         if (item_size < sizeof(*ei)) {
10131                 /*
10132                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10133                  * old thing when on disk format is still un-determined.
10134                  * No need to care about it anymore
10135                  */
10136                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10137                 return -ENOTTY;
10138         }
10139
10140         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10141         flags = btrfs_extent_flags(eb, ei);
10142
10143         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10144                 metadata = 1;
10145         if (metadata && check_crossing_stripes(global_info, key.objectid,
10146                                                eb->len)) {
10147                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10148                       key.objectid, key.objectid + nodesize);
10149                 err |= CROSSING_STRIPE_BOUNDARY;
10150         }
10151
10152         ptr = (unsigned long)(ei + 1);
10153
10154         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10155                 /* Old EXTENT_ITEM metadata */
10156                 struct btrfs_tree_block_info *info;
10157
10158                 info = (struct btrfs_tree_block_info *)ptr;
10159                 level = btrfs_tree_block_level(eb, info);
10160                 ptr += sizeof(struct btrfs_tree_block_info);
10161         } else {
10162                 /* New METADATA_ITEM */
10163                 level = key.offset;
10164         }
10165         end = (unsigned long)ei + item_size;
10166
10167         if (ptr >= end) {
10168                 err |= ITEM_SIZE_MISMATCH;
10169                 goto out;
10170         }
10171
10172         /* Now check every backref in this extent item */
10173 next:
10174         iref = (struct btrfs_extent_inline_ref *)ptr;
10175         type = btrfs_extent_inline_ref_type(eb, iref);
10176         offset = btrfs_extent_inline_ref_offset(eb, iref);
10177         switch (type) {
10178         case BTRFS_TREE_BLOCK_REF_KEY:
10179                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10180                                                level);
10181                 err |= ret;
10182                 break;
10183         case BTRFS_SHARED_BLOCK_REF_KEY:
10184                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10185                                                  level);
10186                 err |= ret;
10187                 break;
10188         case BTRFS_EXTENT_DATA_REF_KEY:
10189                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10190                 ret = check_extent_data_backref(fs_info,
10191                                 btrfs_extent_data_ref_root(eb, dref),
10192                                 btrfs_extent_data_ref_objectid(eb, dref),
10193                                 btrfs_extent_data_ref_offset(eb, dref),
10194                                 key.objectid, key.offset,
10195                                 btrfs_extent_data_ref_count(eb, dref));
10196                 err |= ret;
10197                 break;
10198         case BTRFS_SHARED_DATA_REF_KEY:
10199                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10200                 err |= ret;
10201                 break;
10202         default:
10203                 error("extent[%llu %d %llu] has unknown ref type: %d",
10204                         key.objectid, key.type, key.offset, type);
10205                 err |= UNKNOWN_TYPE;
10206                 goto out;
10207         }
10208
10209         ptr += btrfs_extent_inline_ref_size(type);
10210         if (ptr < end)
10211                 goto next;
10212
10213 out:
10214         return err;
10215 }
10216
10217 /*
10218  * Check if a dev extent item is referred correctly by its chunk
10219  */
10220 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10221                                  struct extent_buffer *eb, int slot)
10222 {
10223         struct btrfs_root *chunk_root = fs_info->chunk_root;
10224         struct btrfs_dev_extent *ptr;
10225         struct btrfs_path path;
10226         struct btrfs_key chunk_key;
10227         struct btrfs_key devext_key;
10228         struct btrfs_chunk *chunk;
10229         struct extent_buffer *l;
10230         int num_stripes;
10231         u64 length;
10232         int i;
10233         int found_chunk = 0;
10234         int ret;
10235
10236         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10237         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10238         length = btrfs_dev_extent_length(eb, ptr);
10239
10240         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10241         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10242         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10243
10244         btrfs_init_path(&path);
10245         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10246         if (ret)
10247                 goto out;
10248
10249         l = path.nodes[0];
10250         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10251         if (btrfs_chunk_length(l, chunk) != length)
10252                 goto out;
10253
10254         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10255         for (i = 0; i < num_stripes; i++) {
10256                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10257                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10258
10259                 if (devid == devext_key.objectid &&
10260                     offset == devext_key.offset) {
10261                         found_chunk = 1;
10262                         break;
10263                 }
10264         }
10265 out:
10266         btrfs_release_path(&path);
10267         if (!found_chunk) {
10268                 error(
10269                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10270                         devext_key.objectid, devext_key.offset, length);
10271                 return REFERENCER_MISSING;
10272         }
10273         return 0;
10274 }
10275
10276 /*
10277  * Check if the used space is correct with the dev item
10278  */
10279 static int check_dev_item(struct btrfs_fs_info *fs_info,
10280                           struct extent_buffer *eb, int slot)
10281 {
10282         struct btrfs_root *dev_root = fs_info->dev_root;
10283         struct btrfs_dev_item *dev_item;
10284         struct btrfs_path path;
10285         struct btrfs_key key;
10286         struct btrfs_dev_extent *ptr;
10287         u64 dev_id;
10288         u64 used;
10289         u64 total = 0;
10290         int ret;
10291
10292         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10293         dev_id = btrfs_device_id(eb, dev_item);
10294         used = btrfs_device_bytes_used(eb, dev_item);
10295
10296         key.objectid = dev_id;
10297         key.type = BTRFS_DEV_EXTENT_KEY;
10298         key.offset = 0;
10299
10300         btrfs_init_path(&path);
10301         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10302         if (ret < 0) {
10303                 btrfs_item_key_to_cpu(eb, &key, slot);
10304                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10305                         key.objectid, key.type, key.offset);
10306                 btrfs_release_path(&path);
10307                 return REFERENCER_MISSING;
10308         }
10309
10310         /* Iterate dev_extents to calculate the used space of a device */
10311         while (1) {
10312                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10313
10314                 if (key.objectid > dev_id)
10315                         break;
10316                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10317                         goto next;
10318
10319                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10320                                      struct btrfs_dev_extent);
10321                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10322 next:
10323                 ret = btrfs_next_item(dev_root, &path);
10324                 if (ret)
10325                         break;
10326         }
10327         btrfs_release_path(&path);
10328
10329         if (used != total) {
10330                 btrfs_item_key_to_cpu(eb, &key, slot);
10331                 error(
10332 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10333                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10334                         BTRFS_DEV_EXTENT_KEY, dev_id);
10335                 return ACCOUNTING_MISMATCH;
10336         }
10337         return 0;
10338 }
10339
10340 /*
10341  * Check a block group item with its referener (chunk) and its used space
10342  * with extent/metadata item
10343  */
10344 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10345                                   struct extent_buffer *eb, int slot)
10346 {
10347         struct btrfs_root *extent_root = fs_info->extent_root;
10348         struct btrfs_root *chunk_root = fs_info->chunk_root;
10349         struct btrfs_block_group_item *bi;
10350         struct btrfs_block_group_item bg_item;
10351         struct btrfs_path path;
10352         struct btrfs_key bg_key;
10353         struct btrfs_key chunk_key;
10354         struct btrfs_key extent_key;
10355         struct btrfs_chunk *chunk;
10356         struct extent_buffer *leaf;
10357         struct btrfs_extent_item *ei;
10358         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10359         u64 flags;
10360         u64 bg_flags;
10361         u64 used;
10362         u64 total = 0;
10363         int ret;
10364         int err = 0;
10365
10366         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10367         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10368         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10369         used = btrfs_block_group_used(&bg_item);
10370         bg_flags = btrfs_block_group_flags(&bg_item);
10371
10372         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10373         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10374         chunk_key.offset = bg_key.objectid;
10375
10376         btrfs_init_path(&path);
10377         /* Search for the referencer chunk */
10378         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10379         if (ret) {
10380                 error(
10381                 "block group[%llu %llu] did not find the related chunk item",
10382                         bg_key.objectid, bg_key.offset);
10383                 err |= REFERENCER_MISSING;
10384         } else {
10385                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10386                                         struct btrfs_chunk);
10387                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10388                                                 bg_key.offset) {
10389                         error(
10390         "block group[%llu %llu] related chunk item length does not match",
10391                                 bg_key.objectid, bg_key.offset);
10392                         err |= REFERENCER_MISMATCH;
10393                 }
10394         }
10395         btrfs_release_path(&path);
10396
10397         /* Search from the block group bytenr */
10398         extent_key.objectid = bg_key.objectid;
10399         extent_key.type = 0;
10400         extent_key.offset = 0;
10401
10402         btrfs_init_path(&path);
10403         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10404         if (ret < 0)
10405                 goto out;
10406
10407         /* Iterate extent tree to account used space */
10408         while (1) {
10409                 leaf = path.nodes[0];
10410                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10411                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10412                         break;
10413
10414                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10415                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10416                         goto next;
10417                 if (extent_key.objectid < bg_key.objectid)
10418                         goto next;
10419
10420                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10421                         total += nodesize;
10422                 else
10423                         total += extent_key.offset;
10424
10425                 ei = btrfs_item_ptr(leaf, path.slots[0],
10426                                     struct btrfs_extent_item);
10427                 flags = btrfs_extent_flags(leaf, ei);
10428                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10429                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10430                                 error(
10431                         "bad extent[%llu, %llu) type mismatch with chunk",
10432                                         extent_key.objectid,
10433                                         extent_key.objectid + extent_key.offset);
10434                                 err |= CHUNK_TYPE_MISMATCH;
10435                         }
10436                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10437                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10438                                     BTRFS_BLOCK_GROUP_METADATA))) {
10439                                 error(
10440                         "bad extent[%llu, %llu) type mismatch with chunk",
10441                                         extent_key.objectid,
10442                                         extent_key.objectid + nodesize);
10443                                 err |= CHUNK_TYPE_MISMATCH;
10444                         }
10445                 }
10446 next:
10447                 ret = btrfs_next_item(extent_root, &path);
10448                 if (ret)
10449                         break;
10450         }
10451
10452 out:
10453         btrfs_release_path(&path);
10454
10455         if (total != used) {
10456                 error(
10457                 "block group[%llu %llu] used %llu but extent items used %llu",
10458                         bg_key.objectid, bg_key.offset, used, total);
10459                 err |= ACCOUNTING_MISMATCH;
10460         }
10461         return err;
10462 }
10463
10464 /*
10465  * Check a chunk item.
10466  * Including checking all referred dev_extents and block group
10467  */
10468 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10469                             struct extent_buffer *eb, int slot)
10470 {
10471         struct btrfs_root *extent_root = fs_info->extent_root;
10472         struct btrfs_root *dev_root = fs_info->dev_root;
10473         struct btrfs_path path;
10474         struct btrfs_key chunk_key;
10475         struct btrfs_key bg_key;
10476         struct btrfs_key devext_key;
10477         struct btrfs_chunk *chunk;
10478         struct extent_buffer *leaf;
10479         struct btrfs_block_group_item *bi;
10480         struct btrfs_block_group_item bg_item;
10481         struct btrfs_dev_extent *ptr;
10482         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10483         u64 length;
10484         u64 chunk_end;
10485         u64 type;
10486         u64 profile;
10487         int num_stripes;
10488         u64 offset;
10489         u64 objectid;
10490         int i;
10491         int ret;
10492         int err = 0;
10493
10494         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10495         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10496         length = btrfs_chunk_length(eb, chunk);
10497         chunk_end = chunk_key.offset + length;
10498         if (!IS_ALIGNED(length, sectorsize)) {
10499                 error("chunk[%llu %llu) not aligned to %u",
10500                         chunk_key.offset, chunk_end, sectorsize);
10501                 err |= BYTES_UNALIGNED;
10502                 goto out;
10503         }
10504
10505         type = btrfs_chunk_type(eb, chunk);
10506         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10507         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10508                 error("chunk[%llu %llu) has no chunk type",
10509                         chunk_key.offset, chunk_end);
10510                 err |= UNKNOWN_TYPE;
10511         }
10512         if (profile && (profile & (profile - 1))) {
10513                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10514                         chunk_key.offset, chunk_end, profile);
10515                 err |= UNKNOWN_TYPE;
10516         }
10517
10518         bg_key.objectid = chunk_key.offset;
10519         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10520         bg_key.offset = length;
10521
10522         btrfs_init_path(&path);
10523         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10524         if (ret) {
10525                 error(
10526                 "chunk[%llu %llu) did not find the related block group item",
10527                         chunk_key.offset, chunk_end);
10528                 err |= REFERENCER_MISSING;
10529         } else{
10530                 leaf = path.nodes[0];
10531                 bi = btrfs_item_ptr(leaf, path.slots[0],
10532                                     struct btrfs_block_group_item);
10533                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10534                                    sizeof(bg_item));
10535                 if (btrfs_block_group_flags(&bg_item) != type) {
10536                         error(
10537 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10538                                 chunk_key.offset, chunk_end, type,
10539                                 btrfs_block_group_flags(&bg_item));
10540                         err |= REFERENCER_MISSING;
10541                 }
10542         }
10543
10544         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10545         for (i = 0; i < num_stripes; i++) {
10546                 btrfs_release_path(&path);
10547                 btrfs_init_path(&path);
10548                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10549                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10550                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10551
10552                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10553                                         0, 0);
10554                 if (ret)
10555                         goto not_match_dev;
10556
10557                 leaf = path.nodes[0];
10558                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10559                                      struct btrfs_dev_extent);
10560                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10561                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10562                 if (objectid != chunk_key.objectid ||
10563                     offset != chunk_key.offset ||
10564                     btrfs_dev_extent_length(leaf, ptr) != length)
10565                         goto not_match_dev;
10566                 continue;
10567 not_match_dev:
10568                 err |= BACKREF_MISSING;
10569                 error(
10570                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10571                         chunk_key.objectid, chunk_end, i);
10572                 continue;
10573         }
10574         btrfs_release_path(&path);
10575 out:
10576         return err;
10577 }
10578
10579 /*
10580  * Main entry function to check known items and update related accounting info
10581  */
10582 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10583 {
10584         struct btrfs_fs_info *fs_info = root->fs_info;
10585         struct btrfs_key key;
10586         int slot = 0;
10587         int type;
10588         struct btrfs_extent_data_ref *dref;
10589         int ret;
10590         int err = 0;
10591
10592 next:
10593         btrfs_item_key_to_cpu(eb, &key, slot);
10594         type = key.type;
10595
10596         switch (type) {
10597         case BTRFS_EXTENT_DATA_KEY:
10598                 ret = check_extent_data_item(root, eb, slot);
10599                 err |= ret;
10600                 break;
10601         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10602                 ret = check_block_group_item(fs_info, eb, slot);
10603                 err |= ret;
10604                 break;
10605         case BTRFS_DEV_ITEM_KEY:
10606                 ret = check_dev_item(fs_info, eb, slot);
10607                 err |= ret;
10608                 break;
10609         case BTRFS_CHUNK_ITEM_KEY:
10610                 ret = check_chunk_item(fs_info, eb, slot);
10611                 err |= ret;
10612                 break;
10613         case BTRFS_DEV_EXTENT_KEY:
10614                 ret = check_dev_extent_item(fs_info, eb, slot);
10615                 err |= ret;
10616                 break;
10617         case BTRFS_EXTENT_ITEM_KEY:
10618         case BTRFS_METADATA_ITEM_KEY:
10619                 ret = check_extent_item(fs_info, eb, slot);
10620                 err |= ret;
10621                 break;
10622         case BTRFS_EXTENT_CSUM_KEY:
10623                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10624                 break;
10625         case BTRFS_TREE_BLOCK_REF_KEY:
10626                 ret = check_tree_block_backref(fs_info, key.offset,
10627                                                key.objectid, -1);
10628                 err |= ret;
10629                 break;
10630         case BTRFS_EXTENT_DATA_REF_KEY:
10631                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10632                 ret = check_extent_data_backref(fs_info,
10633                                 btrfs_extent_data_ref_root(eb, dref),
10634                                 btrfs_extent_data_ref_objectid(eb, dref),
10635                                 btrfs_extent_data_ref_offset(eb, dref),
10636                                 key.objectid, 0,
10637                                 btrfs_extent_data_ref_count(eb, dref));
10638                 err |= ret;
10639                 break;
10640         case BTRFS_SHARED_BLOCK_REF_KEY:
10641                 ret = check_shared_block_backref(fs_info, key.offset,
10642                                                  key.objectid, -1);
10643                 err |= ret;
10644                 break;
10645         case BTRFS_SHARED_DATA_REF_KEY:
10646                 ret = check_shared_data_backref(fs_info, key.offset,
10647                                                 key.objectid);
10648                 err |= ret;
10649                 break;
10650         default:
10651                 break;
10652         }
10653
10654         if (++slot < btrfs_header_nritems(eb))
10655                 goto next;
10656
10657         return err;
10658 }
10659
10660 /*
10661  * Helper function for later fs/subvol tree check.  To determine if a tree
10662  * block should be checked.
10663  * This function will ensure only the direct referencer with lowest rootid to
10664  * check a fs/subvolume tree block.
10665  *
10666  * Backref check at extent tree would detect errors like missing subvolume
10667  * tree, so we can do aggressive check to reduce duplicated checks.
10668  */
10669 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10670 {
10671         struct btrfs_root *extent_root = root->fs_info->extent_root;
10672         struct btrfs_key key;
10673         struct btrfs_path path;
10674         struct extent_buffer *leaf;
10675         int slot;
10676         struct btrfs_extent_item *ei;
10677         unsigned long ptr;
10678         unsigned long end;
10679         int type;
10680         u32 item_size;
10681         u64 offset;
10682         struct btrfs_extent_inline_ref *iref;
10683         int ret;
10684
10685         btrfs_init_path(&path);
10686         key.objectid = btrfs_header_bytenr(eb);
10687         key.type = BTRFS_METADATA_ITEM_KEY;
10688         key.offset = (u64)-1;
10689
10690         /*
10691          * Any failure in backref resolving means we can't determine
10692          * whom the tree block belongs to.
10693          * So in that case, we need to check that tree block
10694          */
10695         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10696         if (ret < 0)
10697                 goto need_check;
10698
10699         ret = btrfs_previous_extent_item(extent_root, &path,
10700                                          btrfs_header_bytenr(eb));
10701         if (ret)
10702                 goto need_check;
10703
10704         leaf = path.nodes[0];
10705         slot = path.slots[0];
10706         btrfs_item_key_to_cpu(leaf, &key, slot);
10707         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10708
10709         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10710                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10711         } else {
10712                 struct btrfs_tree_block_info *info;
10713
10714                 info = (struct btrfs_tree_block_info *)(ei + 1);
10715                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10716         }
10717
10718         item_size = btrfs_item_size_nr(leaf, slot);
10719         ptr = (unsigned long)iref;
10720         end = (unsigned long)ei + item_size;
10721         while (ptr < end) {
10722                 iref = (struct btrfs_extent_inline_ref *)ptr;
10723                 type = btrfs_extent_inline_ref_type(leaf, iref);
10724                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10725
10726                 /*
10727                  * We only check the tree block if current root is
10728                  * the lowest referencer of it.
10729                  */
10730                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10731                     offset < root->objectid) {
10732                         btrfs_release_path(&path);
10733                         return 0;
10734                 }
10735
10736                 ptr += btrfs_extent_inline_ref_size(type);
10737         }
10738         /*
10739          * Normally we should also check keyed tree block ref, but that may be
10740          * very time consuming.  Inlined ref should already make us skip a lot
10741          * of refs now.  So skip search keyed tree block ref.
10742          */
10743
10744 need_check:
10745         btrfs_release_path(&path);
10746         return 1;
10747 }
10748
10749 /*
10750  * Traversal function for tree block. We will do:
10751  * 1) Skip shared fs/subvolume tree blocks
10752  * 2) Update related bytes accounting
10753  * 3) Pre-order traversal
10754  */
10755 static int traverse_tree_block(struct btrfs_root *root,
10756                                 struct extent_buffer *node)
10757 {
10758         struct extent_buffer *eb;
10759         struct btrfs_key key;
10760         struct btrfs_key drop_key;
10761         int level;
10762         u64 nr;
10763         int i;
10764         int err = 0;
10765         int ret;
10766
10767         /*
10768          * Skip shared fs/subvolume tree block, in that case they will
10769          * be checked by referencer with lowest rootid
10770          */
10771         if (is_fstree(root->objectid) && !should_check(root, node))
10772                 return 0;
10773
10774         /* Update bytes accounting */
10775         total_btree_bytes += node->len;
10776         if (fs_root_objectid(btrfs_header_owner(node)))
10777                 total_fs_tree_bytes += node->len;
10778         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10779                 total_extent_tree_bytes += node->len;
10780         if (!found_old_backref &&
10781             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10782             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10783             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10784                 found_old_backref = 1;
10785
10786         /* pre-order tranversal, check itself first */
10787         level = btrfs_header_level(node);
10788         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10789                                    btrfs_header_level(node),
10790                                    btrfs_header_owner(node));
10791         err |= ret;
10792         if (err)
10793                 error(
10794         "check %s failed root %llu bytenr %llu level %d, force continue check",
10795                         level ? "node":"leaf", root->objectid,
10796                         btrfs_header_bytenr(node), btrfs_header_level(node));
10797
10798         if (!level) {
10799                 btree_space_waste += btrfs_leaf_free_space(root, node);
10800                 ret = check_leaf_items(root, node);
10801                 err |= ret;
10802                 return err;
10803         }
10804
10805         nr = btrfs_header_nritems(node);
10806         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10807         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10808                 sizeof(struct btrfs_key_ptr);
10809
10810         /* Then check all its children */
10811         for (i = 0; i < nr; i++) {
10812                 u64 blocknr = btrfs_node_blockptr(node, i);
10813
10814                 btrfs_node_key_to_cpu(node, &key, i);
10815                 if (level == root->root_item.drop_level &&
10816                     is_dropped_key(&key, &drop_key))
10817                         continue;
10818
10819                 /*
10820                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10821                  * to call the function itself.
10822                  */
10823                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10824                 if (extent_buffer_uptodate(eb)) {
10825                         ret = traverse_tree_block(root, eb);
10826                         err |= ret;
10827                 }
10828                 free_extent_buffer(eb);
10829         }
10830
10831         return err;
10832 }
10833
10834 /*
10835  * Low memory usage version check_chunks_and_extents.
10836  */
10837 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10838 {
10839         struct btrfs_path path;
10840         struct btrfs_key key;
10841         struct btrfs_root *root1;
10842         struct btrfs_root *cur_root;
10843         int err = 0;
10844         int ret;
10845
10846         root1 = root->fs_info->chunk_root;
10847         ret = traverse_tree_block(root1, root1->node);
10848         err |= ret;
10849
10850         root1 = root->fs_info->tree_root;
10851         ret = traverse_tree_block(root1, root1->node);
10852         err |= ret;
10853
10854         btrfs_init_path(&path);
10855         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10856         key.offset = 0;
10857         key.type = BTRFS_ROOT_ITEM_KEY;
10858
10859         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10860         if (ret) {
10861                 error("cannot find extent treet in tree_root");
10862                 goto out;
10863         }
10864
10865         while (1) {
10866                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10867                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10868                         goto next;
10869                 key.offset = (u64)-1;
10870
10871                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10872                 if (IS_ERR(cur_root) || !cur_root) {
10873                         error("failed to read tree: %lld", key.objectid);
10874                         goto next;
10875                 }
10876
10877                 ret = traverse_tree_block(cur_root, cur_root->node);
10878                 err |= ret;
10879
10880 next:
10881                 ret = btrfs_next_item(root1, &path);
10882                 if (ret)
10883                         goto out;
10884         }
10885
10886 out:
10887         btrfs_release_path(&path);
10888         return err;
10889 }
10890
10891 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10892                            struct btrfs_root *root, int overwrite)
10893 {
10894         struct extent_buffer *c;
10895         struct extent_buffer *old = root->node;
10896         int level;
10897         int ret;
10898         struct btrfs_disk_key disk_key = {0,0,0};
10899
10900         level = 0;
10901
10902         if (overwrite) {
10903                 c = old;
10904                 extent_buffer_get(c);
10905                 goto init;
10906         }
10907         c = btrfs_alloc_free_block(trans, root,
10908                                    root->nodesize,
10909                                    root->root_key.objectid,
10910                                    &disk_key, level, 0, 0);
10911         if (IS_ERR(c)) {
10912                 c = old;
10913                 extent_buffer_get(c);
10914                 overwrite = 1;
10915         }
10916 init:
10917         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10918         btrfs_set_header_level(c, level);
10919         btrfs_set_header_bytenr(c, c->start);
10920         btrfs_set_header_generation(c, trans->transid);
10921         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10922         btrfs_set_header_owner(c, root->root_key.objectid);
10923
10924         write_extent_buffer(c, root->fs_info->fsid,
10925                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10926
10927         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10928                             btrfs_header_chunk_tree_uuid(c),
10929                             BTRFS_UUID_SIZE);
10930
10931         btrfs_mark_buffer_dirty(c);
10932         /*
10933          * this case can happen in the following case:
10934          *
10935          * 1.overwrite previous root.
10936          *
10937          * 2.reinit reloc data root, this is because we skip pin
10938          * down reloc data tree before which means we can allocate
10939          * same block bytenr here.
10940          */
10941         if (old->start == c->start) {
10942                 btrfs_set_root_generation(&root->root_item,
10943                                           trans->transid);
10944                 root->root_item.level = btrfs_header_level(root->node);
10945                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10946                                         &root->root_key, &root->root_item);
10947                 if (ret) {
10948                         free_extent_buffer(c);
10949                         return ret;
10950                 }
10951         }
10952         free_extent_buffer(old);
10953         root->node = c;
10954         add_root_to_dirty_list(root);
10955         return 0;
10956 }
10957
10958 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10959                                 struct extent_buffer *eb, int tree_root)
10960 {
10961         struct extent_buffer *tmp;
10962         struct btrfs_root_item *ri;
10963         struct btrfs_key key;
10964         u64 bytenr;
10965         u32 nodesize;
10966         int level = btrfs_header_level(eb);
10967         int nritems;
10968         int ret;
10969         int i;
10970
10971         /*
10972          * If we have pinned this block before, don't pin it again.
10973          * This can not only avoid forever loop with broken filesystem
10974          * but also give us some speedups.
10975          */
10976         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10977                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10978                 return 0;
10979
10980         btrfs_pin_extent(fs_info, eb->start, eb->len);
10981
10982         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10983         nritems = btrfs_header_nritems(eb);
10984         for (i = 0; i < nritems; i++) {
10985                 if (level == 0) {
10986                         btrfs_item_key_to_cpu(eb, &key, i);
10987                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10988                                 continue;
10989                         /* Skip the extent root and reloc roots */
10990                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10991                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10992                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10993                                 continue;
10994                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10995                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10996
10997                         /*
10998                          * If at any point we start needing the real root we
10999                          * will have to build a stump root for the root we are
11000                          * in, but for now this doesn't actually use the root so
11001                          * just pass in extent_root.
11002                          */
11003                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11004                                               nodesize, 0);
11005                         if (!extent_buffer_uptodate(tmp)) {
11006                                 fprintf(stderr, "Error reading root block\n");
11007                                 return -EIO;
11008                         }
11009                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11010                         free_extent_buffer(tmp);
11011                         if (ret)
11012                                 return ret;
11013                 } else {
11014                         bytenr = btrfs_node_blockptr(eb, i);
11015
11016                         /* If we aren't the tree root don't read the block */
11017                         if (level == 1 && !tree_root) {
11018                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11019                                 continue;
11020                         }
11021
11022                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11023                                               nodesize, 0);
11024                         if (!extent_buffer_uptodate(tmp)) {
11025                                 fprintf(stderr, "Error reading tree block\n");
11026                                 return -EIO;
11027                         }
11028                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11029                         free_extent_buffer(tmp);
11030                         if (ret)
11031                                 return ret;
11032                 }
11033         }
11034
11035         return 0;
11036 }
11037
11038 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11039 {
11040         int ret;
11041
11042         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11043         if (ret)
11044                 return ret;
11045
11046         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11047 }
11048
11049 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11050 {
11051         struct btrfs_block_group_cache *cache;
11052         struct btrfs_path path;
11053         struct extent_buffer *leaf;
11054         struct btrfs_chunk *chunk;
11055         struct btrfs_key key;
11056         int ret;
11057         u64 start;
11058
11059         btrfs_init_path(&path);
11060         key.objectid = 0;
11061         key.type = BTRFS_CHUNK_ITEM_KEY;
11062         key.offset = 0;
11063         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11064         if (ret < 0) {
11065                 btrfs_release_path(&path);
11066                 return ret;
11067         }
11068
11069         /*
11070          * We do this in case the block groups were screwed up and had alloc
11071          * bits that aren't actually set on the chunks.  This happens with
11072          * restored images every time and could happen in real life I guess.
11073          */
11074         fs_info->avail_data_alloc_bits = 0;
11075         fs_info->avail_metadata_alloc_bits = 0;
11076         fs_info->avail_system_alloc_bits = 0;
11077
11078         /* First we need to create the in-memory block groups */
11079         while (1) {
11080                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11081                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11082                         if (ret < 0) {
11083                                 btrfs_release_path(&path);
11084                                 return ret;
11085                         }
11086                         if (ret) {
11087                                 ret = 0;
11088                                 break;
11089                         }
11090                 }
11091                 leaf = path.nodes[0];
11092                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11093                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11094                         path.slots[0]++;
11095                         continue;
11096                 }
11097
11098                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11099                 btrfs_add_block_group(fs_info, 0,
11100                                       btrfs_chunk_type(leaf, chunk),
11101                                       key.objectid, key.offset,
11102                                       btrfs_chunk_length(leaf, chunk));
11103                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11104                                  key.offset + btrfs_chunk_length(leaf, chunk),
11105                                  GFP_NOFS);
11106                 path.slots[0]++;
11107         }
11108         start = 0;
11109         while (1) {
11110                 cache = btrfs_lookup_first_block_group(fs_info, start);
11111                 if (!cache)
11112                         break;
11113                 cache->cached = 1;
11114                 start = cache->key.objectid + cache->key.offset;
11115         }
11116
11117         btrfs_release_path(&path);
11118         return 0;
11119 }
11120
11121 static int reset_balance(struct btrfs_trans_handle *trans,
11122                          struct btrfs_fs_info *fs_info)
11123 {
11124         struct btrfs_root *root = fs_info->tree_root;
11125         struct btrfs_path path;
11126         struct extent_buffer *leaf;
11127         struct btrfs_key key;
11128         int del_slot, del_nr = 0;
11129         int ret;
11130         int found = 0;
11131
11132         btrfs_init_path(&path);
11133         key.objectid = BTRFS_BALANCE_OBJECTID;
11134         key.type = BTRFS_BALANCE_ITEM_KEY;
11135         key.offset = 0;
11136         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11137         if (ret) {
11138                 if (ret > 0)
11139                         ret = 0;
11140                 if (!ret)
11141                         goto reinit_data_reloc;
11142                 else
11143                         goto out;
11144         }
11145
11146         ret = btrfs_del_item(trans, root, &path);
11147         if (ret)
11148                 goto out;
11149         btrfs_release_path(&path);
11150
11151         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11152         key.type = BTRFS_ROOT_ITEM_KEY;
11153         key.offset = 0;
11154         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11155         if (ret < 0)
11156                 goto out;
11157         while (1) {
11158                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11159                         if (!found)
11160                                 break;
11161
11162                         if (del_nr) {
11163                                 ret = btrfs_del_items(trans, root, &path,
11164                                                       del_slot, del_nr);
11165                                 del_nr = 0;
11166                                 if (ret)
11167                                         goto out;
11168                         }
11169                         key.offset++;
11170                         btrfs_release_path(&path);
11171
11172                         found = 0;
11173                         ret = btrfs_search_slot(trans, root, &key, &path,
11174                                                 -1, 1);
11175                         if (ret < 0)
11176                                 goto out;
11177                         continue;
11178                 }
11179                 found = 1;
11180                 leaf = path.nodes[0];
11181                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11182                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11183                         break;
11184                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11185                         path.slots[0]++;
11186                         continue;
11187                 }
11188                 if (!del_nr) {
11189                         del_slot = path.slots[0];
11190                         del_nr = 1;
11191                 } else {
11192                         del_nr++;
11193                 }
11194                 path.slots[0]++;
11195         }
11196
11197         if (del_nr) {
11198                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11199                 if (ret)
11200                         goto out;
11201         }
11202         btrfs_release_path(&path);
11203
11204 reinit_data_reloc:
11205         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11206         key.type = BTRFS_ROOT_ITEM_KEY;
11207         key.offset = (u64)-1;
11208         root = btrfs_read_fs_root(fs_info, &key);
11209         if (IS_ERR(root)) {
11210                 fprintf(stderr, "Error reading data reloc tree\n");
11211                 ret = PTR_ERR(root);
11212                 goto out;
11213         }
11214         record_root_in_trans(trans, root);
11215         ret = btrfs_fsck_reinit_root(trans, root, 0);
11216         if (ret)
11217                 goto out;
11218         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11219 out:
11220         btrfs_release_path(&path);
11221         return ret;
11222 }
11223
11224 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11225                               struct btrfs_fs_info *fs_info)
11226 {
11227         u64 start = 0;
11228         int ret;
11229
11230         /*
11231          * The only reason we don't do this is because right now we're just
11232          * walking the trees we find and pinning down their bytes, we don't look
11233          * at any of the leaves.  In order to do mixed groups we'd have to check
11234          * the leaves of any fs roots and pin down the bytes for any file
11235          * extents we find.  Not hard but why do it if we don't have to?
11236          */
11237         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11238                 fprintf(stderr, "We don't support re-initing the extent tree "
11239                         "for mixed block groups yet, please notify a btrfs "
11240                         "developer you want to do this so they can add this "
11241                         "functionality.\n");
11242                 return -EINVAL;
11243         }
11244
11245         /*
11246          * first we need to walk all of the trees except the extent tree and pin
11247          * down the bytes that are in use so we don't overwrite any existing
11248          * metadata.
11249          */
11250         ret = pin_metadata_blocks(fs_info);
11251         if (ret) {
11252                 fprintf(stderr, "error pinning down used bytes\n");
11253                 return ret;
11254         }
11255
11256         /*
11257          * Need to drop all the block groups since we're going to recreate all
11258          * of them again.
11259          */
11260         btrfs_free_block_groups(fs_info);
11261         ret = reset_block_groups(fs_info);
11262         if (ret) {
11263                 fprintf(stderr, "error resetting the block groups\n");
11264                 return ret;
11265         }
11266
11267         /* Ok we can allocate now, reinit the extent root */
11268         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11269         if (ret) {
11270                 fprintf(stderr, "extent root initialization failed\n");
11271                 /*
11272                  * When the transaction code is updated we should end the
11273                  * transaction, but for now progs only knows about commit so
11274                  * just return an error.
11275                  */
11276                 return ret;
11277         }
11278
11279         /*
11280          * Now we have all the in-memory block groups setup so we can make
11281          * allocations properly, and the metadata we care about is safe since we
11282          * pinned all of it above.
11283          */
11284         while (1) {
11285                 struct btrfs_block_group_cache *cache;
11286
11287                 cache = btrfs_lookup_first_block_group(fs_info, start);
11288                 if (!cache)
11289                         break;
11290                 start = cache->key.objectid + cache->key.offset;
11291                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11292                                         &cache->key, &cache->item,
11293                                         sizeof(cache->item));
11294                 if (ret) {
11295                         fprintf(stderr, "Error adding block group\n");
11296                         return ret;
11297                 }
11298                 btrfs_extent_post_op(trans, fs_info->extent_root);
11299         }
11300
11301         ret = reset_balance(trans, fs_info);
11302         if (ret)
11303                 fprintf(stderr, "error resetting the pending balance\n");
11304
11305         return ret;
11306 }
11307
11308 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11309 {
11310         struct btrfs_path path;
11311         struct btrfs_trans_handle *trans;
11312         struct btrfs_key key;
11313         int ret;
11314
11315         printf("Recowing metadata block %llu\n", eb->start);
11316         key.objectid = btrfs_header_owner(eb);
11317         key.type = BTRFS_ROOT_ITEM_KEY;
11318         key.offset = (u64)-1;
11319
11320         root = btrfs_read_fs_root(root->fs_info, &key);
11321         if (IS_ERR(root)) {
11322                 fprintf(stderr, "Couldn't find owner root %llu\n",
11323                         key.objectid);
11324                 return PTR_ERR(root);
11325         }
11326
11327         trans = btrfs_start_transaction(root, 1);
11328         if (IS_ERR(trans))
11329                 return PTR_ERR(trans);
11330
11331         btrfs_init_path(&path);
11332         path.lowest_level = btrfs_header_level(eb);
11333         if (path.lowest_level)
11334                 btrfs_node_key_to_cpu(eb, &key, 0);
11335         else
11336                 btrfs_item_key_to_cpu(eb, &key, 0);
11337
11338         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11339         btrfs_commit_transaction(trans, root);
11340         btrfs_release_path(&path);
11341         return ret;
11342 }
11343
11344 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11345 {
11346         struct btrfs_path path;
11347         struct btrfs_trans_handle *trans;
11348         struct btrfs_key key;
11349         int ret;
11350
11351         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11352                bad->key.type, bad->key.offset);
11353         key.objectid = bad->root_id;
11354         key.type = BTRFS_ROOT_ITEM_KEY;
11355         key.offset = (u64)-1;
11356
11357         root = btrfs_read_fs_root(root->fs_info, &key);
11358         if (IS_ERR(root)) {
11359                 fprintf(stderr, "Couldn't find owner root %llu\n",
11360                         key.objectid);
11361                 return PTR_ERR(root);
11362         }
11363
11364         trans = btrfs_start_transaction(root, 1);
11365         if (IS_ERR(trans))
11366                 return PTR_ERR(trans);
11367
11368         btrfs_init_path(&path);
11369         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11370         if (ret) {
11371                 if (ret > 0)
11372                         ret = 0;
11373                 goto out;
11374         }
11375         ret = btrfs_del_item(trans, root, &path);
11376 out:
11377         btrfs_commit_transaction(trans, root);
11378         btrfs_release_path(&path);
11379         return ret;
11380 }
11381
11382 static int zero_log_tree(struct btrfs_root *root)
11383 {
11384         struct btrfs_trans_handle *trans;
11385         int ret;
11386
11387         trans = btrfs_start_transaction(root, 1);
11388         if (IS_ERR(trans)) {
11389                 ret = PTR_ERR(trans);
11390                 return ret;
11391         }
11392         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11393         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11394         ret = btrfs_commit_transaction(trans, root);
11395         return ret;
11396 }
11397
11398 static int populate_csum(struct btrfs_trans_handle *trans,
11399                          struct btrfs_root *csum_root, char *buf, u64 start,
11400                          u64 len)
11401 {
11402         u64 offset = 0;
11403         u64 sectorsize;
11404         int ret = 0;
11405
11406         while (offset < len) {
11407                 sectorsize = csum_root->sectorsize;
11408                 ret = read_extent_data(csum_root, buf, start + offset,
11409                                        &sectorsize, 0);
11410                 if (ret)
11411                         break;
11412                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11413                                             start + offset, buf, sectorsize);
11414                 if (ret)
11415                         break;
11416                 offset += sectorsize;
11417         }
11418         return ret;
11419 }
11420
11421 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11422                                       struct btrfs_root *csum_root,
11423                                       struct btrfs_root *cur_root)
11424 {
11425         struct btrfs_path path;
11426         struct btrfs_key key;
11427         struct extent_buffer *node;
11428         struct btrfs_file_extent_item *fi;
11429         char *buf = NULL;
11430         u64 start = 0;
11431         u64 len = 0;
11432         int slot = 0;
11433         int ret = 0;
11434
11435         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11436         if (!buf)
11437                 return -ENOMEM;
11438
11439         btrfs_init_path(&path);
11440         key.objectid = 0;
11441         key.offset = 0;
11442         key.type = 0;
11443         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11444         if (ret < 0)
11445                 goto out;
11446         /* Iterate all regular file extents and fill its csum */
11447         while (1) {
11448                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11449
11450                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11451                         goto next;
11452                 node = path.nodes[0];
11453                 slot = path.slots[0];
11454                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11455                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11456                         goto next;
11457                 start = btrfs_file_extent_disk_bytenr(node, fi);
11458                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11459
11460                 ret = populate_csum(trans, csum_root, buf, start, len);
11461                 if (ret == -EEXIST)
11462                         ret = 0;
11463                 if (ret < 0)
11464                         goto out;
11465 next:
11466                 /*
11467                  * TODO: if next leaf is corrupted, jump to nearest next valid
11468                  * leaf.
11469                  */
11470                 ret = btrfs_next_item(cur_root, &path);
11471                 if (ret < 0)
11472                         goto out;
11473                 if (ret > 0) {
11474                         ret = 0;
11475                         goto out;
11476                 }
11477         }
11478
11479 out:
11480         btrfs_release_path(&path);
11481         free(buf);
11482         return ret;
11483 }
11484
11485 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11486                                   struct btrfs_root *csum_root)
11487 {
11488         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11489         struct btrfs_path path;
11490         struct btrfs_root *tree_root = fs_info->tree_root;
11491         struct btrfs_root *cur_root;
11492         struct extent_buffer *node;
11493         struct btrfs_key key;
11494         int slot = 0;
11495         int ret = 0;
11496
11497         btrfs_init_path(&path);
11498         key.objectid = BTRFS_FS_TREE_OBJECTID;
11499         key.offset = 0;
11500         key.type = BTRFS_ROOT_ITEM_KEY;
11501         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11502         if (ret < 0)
11503                 goto out;
11504         if (ret > 0) {
11505                 ret = -ENOENT;
11506                 goto out;
11507         }
11508
11509         while (1) {
11510                 node = path.nodes[0];
11511                 slot = path.slots[0];
11512                 btrfs_item_key_to_cpu(node, &key, slot);
11513                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11514                         goto out;
11515                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11516                         goto next;
11517                 if (!is_fstree(key.objectid))
11518                         goto next;
11519                 key.offset = (u64)-1;
11520
11521                 cur_root = btrfs_read_fs_root(fs_info, &key);
11522                 if (IS_ERR(cur_root) || !cur_root) {
11523                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11524                                 key.objectid);
11525                         goto out;
11526                 }
11527                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11528                                 cur_root);
11529                 if (ret < 0)
11530                         goto out;
11531 next:
11532                 ret = btrfs_next_item(tree_root, &path);
11533                 if (ret > 0) {
11534                         ret = 0;
11535                         goto out;
11536                 }
11537                 if (ret < 0)
11538                         goto out;
11539         }
11540
11541 out:
11542         btrfs_release_path(&path);
11543         return ret;
11544 }
11545
11546 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11547                                       struct btrfs_root *csum_root)
11548 {
11549         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11550         struct btrfs_path path;
11551         struct btrfs_extent_item *ei;
11552         struct extent_buffer *leaf;
11553         char *buf;
11554         struct btrfs_key key;
11555         int ret;
11556
11557         btrfs_init_path(&path);
11558         key.objectid = 0;
11559         key.type = BTRFS_EXTENT_ITEM_KEY;
11560         key.offset = 0;
11561         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11562         if (ret < 0) {
11563                 btrfs_release_path(&path);
11564                 return ret;
11565         }
11566
11567         buf = malloc(csum_root->sectorsize);
11568         if (!buf) {
11569                 btrfs_release_path(&path);
11570                 return -ENOMEM;
11571         }
11572
11573         while (1) {
11574                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11575                         ret = btrfs_next_leaf(extent_root, &path);
11576                         if (ret < 0)
11577                                 break;
11578                         if (ret) {
11579                                 ret = 0;
11580                                 break;
11581                         }
11582                 }
11583                 leaf = path.nodes[0];
11584
11585                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11586                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11587                         path.slots[0]++;
11588                         continue;
11589                 }
11590
11591                 ei = btrfs_item_ptr(leaf, path.slots[0],
11592                                     struct btrfs_extent_item);
11593                 if (!(btrfs_extent_flags(leaf, ei) &
11594                       BTRFS_EXTENT_FLAG_DATA)) {
11595                         path.slots[0]++;
11596                         continue;
11597                 }
11598
11599                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11600                                     key.offset);
11601                 if (ret)
11602                         break;
11603                 path.slots[0]++;
11604         }
11605
11606         btrfs_release_path(&path);
11607         free(buf);
11608         return ret;
11609 }
11610
11611 /*
11612  * Recalculate the csum and put it into the csum tree.
11613  *
11614  * Extent tree init will wipe out all the extent info, so in that case, we
11615  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11616  * will use fs/subvol trees to init the csum tree.
11617  */
11618 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11619                           struct btrfs_root *csum_root,
11620                           int search_fs_tree)
11621 {
11622         if (search_fs_tree)
11623                 return fill_csum_tree_from_fs(trans, csum_root);
11624         else
11625                 return fill_csum_tree_from_extent(trans, csum_root);
11626 }
11627
11628 static void free_roots_info_cache(void)
11629 {
11630         if (!roots_info_cache)
11631                 return;
11632
11633         while (!cache_tree_empty(roots_info_cache)) {
11634                 struct cache_extent *entry;
11635                 struct root_item_info *rii;
11636
11637                 entry = first_cache_extent(roots_info_cache);
11638                 if (!entry)
11639                         break;
11640                 remove_cache_extent(roots_info_cache, entry);
11641                 rii = container_of(entry, struct root_item_info, cache_extent);
11642                 free(rii);
11643         }
11644
11645         free(roots_info_cache);
11646         roots_info_cache = NULL;
11647 }
11648
11649 static int build_roots_info_cache(struct btrfs_fs_info *info)
11650 {
11651         int ret = 0;
11652         struct btrfs_key key;
11653         struct extent_buffer *leaf;
11654         struct btrfs_path path;
11655
11656         if (!roots_info_cache) {
11657                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11658                 if (!roots_info_cache)
11659                         return -ENOMEM;
11660                 cache_tree_init(roots_info_cache);
11661         }
11662
11663         btrfs_init_path(&path);
11664         key.objectid = 0;
11665         key.type = BTRFS_EXTENT_ITEM_KEY;
11666         key.offset = 0;
11667         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11668         if (ret < 0)
11669                 goto out;
11670         leaf = path.nodes[0];
11671
11672         while (1) {
11673                 struct btrfs_key found_key;
11674                 struct btrfs_extent_item *ei;
11675                 struct btrfs_extent_inline_ref *iref;
11676                 int slot = path.slots[0];
11677                 int type;
11678                 u64 flags;
11679                 u64 root_id;
11680                 u8 level;
11681                 struct cache_extent *entry;
11682                 struct root_item_info *rii;
11683
11684                 if (slot >= btrfs_header_nritems(leaf)) {
11685                         ret = btrfs_next_leaf(info->extent_root, &path);
11686                         if (ret < 0) {
11687                                 break;
11688                         } else if (ret) {
11689                                 ret = 0;
11690                                 break;
11691                         }
11692                         leaf = path.nodes[0];
11693                         slot = path.slots[0];
11694                 }
11695
11696                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11697
11698                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11699                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11700                         goto next;
11701
11702                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11703                 flags = btrfs_extent_flags(leaf, ei);
11704
11705                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11706                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11707                         goto next;
11708
11709                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11710                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11711                         level = found_key.offset;
11712                 } else {
11713                         struct btrfs_tree_block_info *binfo;
11714
11715                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11716                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11717                         level = btrfs_tree_block_level(leaf, binfo);
11718                 }
11719
11720                 /*
11721                  * For a root extent, it must be of the following type and the
11722                  * first (and only one) iref in the item.
11723                  */
11724                 type = btrfs_extent_inline_ref_type(leaf, iref);
11725                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11726                         goto next;
11727
11728                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11729                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11730                 if (!entry) {
11731                         rii = malloc(sizeof(struct root_item_info));
11732                         if (!rii) {
11733                                 ret = -ENOMEM;
11734                                 goto out;
11735                         }
11736                         rii->cache_extent.start = root_id;
11737                         rii->cache_extent.size = 1;
11738                         rii->level = (u8)-1;
11739                         entry = &rii->cache_extent;
11740                         ret = insert_cache_extent(roots_info_cache, entry);
11741                         ASSERT(ret == 0);
11742                 } else {
11743                         rii = container_of(entry, struct root_item_info,
11744                                            cache_extent);
11745                 }
11746
11747                 ASSERT(rii->cache_extent.start == root_id);
11748                 ASSERT(rii->cache_extent.size == 1);
11749
11750                 if (level > rii->level || rii->level == (u8)-1) {
11751                         rii->level = level;
11752                         rii->bytenr = found_key.objectid;
11753                         rii->gen = btrfs_extent_generation(leaf, ei);
11754                         rii->node_count = 1;
11755                 } else if (level == rii->level) {
11756                         rii->node_count++;
11757                 }
11758 next:
11759                 path.slots[0]++;
11760         }
11761
11762 out:
11763         btrfs_release_path(&path);
11764
11765         return ret;
11766 }
11767
11768 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11769                                   struct btrfs_path *path,
11770                                   const struct btrfs_key *root_key,
11771                                   const int read_only_mode)
11772 {
11773         const u64 root_id = root_key->objectid;
11774         struct cache_extent *entry;
11775         struct root_item_info *rii;
11776         struct btrfs_root_item ri;
11777         unsigned long offset;
11778
11779         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11780         if (!entry) {
11781                 fprintf(stderr,
11782                         "Error: could not find extent items for root %llu\n",
11783                         root_key->objectid);
11784                 return -ENOENT;
11785         }
11786
11787         rii = container_of(entry, struct root_item_info, cache_extent);
11788         ASSERT(rii->cache_extent.start == root_id);
11789         ASSERT(rii->cache_extent.size == 1);
11790
11791         if (rii->node_count != 1) {
11792                 fprintf(stderr,
11793                         "Error: could not find btree root extent for root %llu\n",
11794                         root_id);
11795                 return -ENOENT;
11796         }
11797
11798         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11799         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11800
11801         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11802             btrfs_root_level(&ri) != rii->level ||
11803             btrfs_root_generation(&ri) != rii->gen) {
11804
11805                 /*
11806                  * If we're in repair mode but our caller told us to not update
11807                  * the root item, i.e. just check if it needs to be updated, don't
11808                  * print this message, since the caller will call us again shortly
11809                  * for the same root item without read only mode (the caller will
11810                  * open a transaction first).
11811                  */
11812                 if (!(read_only_mode && repair))
11813                         fprintf(stderr,
11814                                 "%sroot item for root %llu,"
11815                                 " current bytenr %llu, current gen %llu, current level %u,"
11816                                 " new bytenr %llu, new gen %llu, new level %u\n",
11817                                 (read_only_mode ? "" : "fixing "),
11818                                 root_id,
11819                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11820                                 btrfs_root_level(&ri),
11821                                 rii->bytenr, rii->gen, rii->level);
11822
11823                 if (btrfs_root_generation(&ri) > rii->gen) {
11824                         fprintf(stderr,
11825                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11826                                 root_id, btrfs_root_generation(&ri), rii->gen);
11827                         return -EINVAL;
11828                 }
11829
11830                 if (!read_only_mode) {
11831                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11832                         btrfs_set_root_level(&ri, rii->level);
11833                         btrfs_set_root_generation(&ri, rii->gen);
11834                         write_extent_buffer(path->nodes[0], &ri,
11835                                             offset, sizeof(ri));
11836                 }
11837
11838                 return 1;
11839         }
11840
11841         return 0;
11842 }
11843
11844 /*
11845  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11846  * caused read-only snapshots to be corrupted if they were created at a moment
11847  * when the source subvolume/snapshot had orphan items. The issue was that the
11848  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11849  * node instead of the post orphan cleanup root node.
11850  * So this function, and its callees, just detects and fixes those cases. Even
11851  * though the regression was for read-only snapshots, this function applies to
11852  * any snapshot/subvolume root.
11853  * This must be run before any other repair code - not doing it so, makes other
11854  * repair code delete or modify backrefs in the extent tree for example, which
11855  * will result in an inconsistent fs after repairing the root items.
11856  */
11857 static int repair_root_items(struct btrfs_fs_info *info)
11858 {
11859         struct btrfs_path path;
11860         struct btrfs_key key;
11861         struct extent_buffer *leaf;
11862         struct btrfs_trans_handle *trans = NULL;
11863         int ret = 0;
11864         int bad_roots = 0;
11865         int need_trans = 0;
11866
11867         btrfs_init_path(&path);
11868
11869         ret = build_roots_info_cache(info);
11870         if (ret)
11871                 goto out;
11872
11873         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11874         key.type = BTRFS_ROOT_ITEM_KEY;
11875         key.offset = 0;
11876
11877 again:
11878         /*
11879          * Avoid opening and committing transactions if a leaf doesn't have
11880          * any root items that need to be fixed, so that we avoid rotating
11881          * backup roots unnecessarily.
11882          */
11883         if (need_trans) {
11884                 trans = btrfs_start_transaction(info->tree_root, 1);
11885                 if (IS_ERR(trans)) {
11886                         ret = PTR_ERR(trans);
11887                         goto out;
11888                 }
11889         }
11890
11891         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11892                                 0, trans ? 1 : 0);
11893         if (ret < 0)
11894                 goto out;
11895         leaf = path.nodes[0];
11896
11897         while (1) {
11898                 struct btrfs_key found_key;
11899
11900                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11901                         int no_more_keys = find_next_key(&path, &key);
11902
11903                         btrfs_release_path(&path);
11904                         if (trans) {
11905                                 ret = btrfs_commit_transaction(trans,
11906                                                                info->tree_root);
11907                                 trans = NULL;
11908                                 if (ret < 0)
11909                                         goto out;
11910                         }
11911                         need_trans = 0;
11912                         if (no_more_keys)
11913                                 break;
11914                         goto again;
11915                 }
11916
11917                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11918
11919                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11920                         goto next;
11921                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11922                         goto next;
11923
11924                 ret = maybe_repair_root_item(info, &path, &found_key,
11925                                              trans ? 0 : 1);
11926                 if (ret < 0)
11927                         goto out;
11928                 if (ret) {
11929                         if (!trans && repair) {
11930                                 need_trans = 1;
11931                                 key = found_key;
11932                                 btrfs_release_path(&path);
11933                                 goto again;
11934                         }
11935                         bad_roots++;
11936                 }
11937 next:
11938                 path.slots[0]++;
11939         }
11940         ret = 0;
11941 out:
11942         free_roots_info_cache();
11943         btrfs_release_path(&path);
11944         if (trans)
11945                 btrfs_commit_transaction(trans, info->tree_root);
11946         if (ret < 0)
11947                 return ret;
11948
11949         return bad_roots;
11950 }
11951
11952 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11953 {
11954         struct btrfs_trans_handle *trans;
11955         struct btrfs_block_group_cache *bg_cache;
11956         u64 current = 0;
11957         int ret = 0;
11958
11959         /* Clear all free space cache inodes and its extent data */
11960         while (1) {
11961                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11962                 if (!bg_cache)
11963                         break;
11964                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11965                 if (ret < 0)
11966                         return ret;
11967                 current = bg_cache->key.objectid + bg_cache->key.offset;
11968         }
11969
11970         /* Don't forget to set cache_generation to -1 */
11971         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11972         if (IS_ERR(trans)) {
11973                 error("failed to update super block cache generation");
11974                 return PTR_ERR(trans);
11975         }
11976         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11977         btrfs_commit_transaction(trans, fs_info->tree_root);
11978
11979         return ret;
11980 }
11981
11982 const char * const cmd_check_usage[] = {
11983         "btrfs check [options] <device>",
11984         "Check structural integrity of a filesystem (unmounted).",
11985         "Check structural integrity of an unmounted filesystem. Verify internal",
11986         "trees' consistency and item connectivity. In the repair mode try to",
11987         "fix the problems found. ",
11988         "WARNING: the repair mode is considered dangerous",
11989         "",
11990         "-s|--super <superblock>     use this superblock copy",
11991         "-b|--backup                 use the first valid backup root copy",
11992         "--repair                    try to repair the filesystem",
11993         "--readonly                  run in read-only mode (default)",
11994         "--init-csum-tree            create a new CRC tree",
11995         "--init-extent-tree          create a new extent tree",
11996         "--mode <MODE>               allows choice of memory/IO trade-offs",
11997         "                            where MODE is one of:",
11998         "                            original - read inodes and extents to memory (requires",
11999         "                                       more memory, does less IO)",
12000         "                            lowmem   - try to use less memory but read blocks again",
12001         "                                       when needed",
12002         "--check-data-csum           verify checksums of data blocks",
12003         "-Q|--qgroup-report          print a report on qgroup consistency",
12004         "-E|--subvol-extents <subvolid>",
12005         "                            print subvolume extents and sharing state",
12006         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12007         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12008         "-p|--progress               indicate progress",
12009         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12010         NULL
12011 };
12012
12013 int cmd_check(int argc, char **argv)
12014 {
12015         struct cache_tree root_cache;
12016         struct btrfs_root *root;
12017         struct btrfs_fs_info *info;
12018         u64 bytenr = 0;
12019         u64 subvolid = 0;
12020         u64 tree_root_bytenr = 0;
12021         u64 chunk_root_bytenr = 0;
12022         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12023         int ret;
12024         u64 num;
12025         int init_csum_tree = 0;
12026         int readonly = 0;
12027         int clear_space_cache = 0;
12028         int qgroup_report = 0;
12029         int qgroups_repaired = 0;
12030         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12031
12032         while(1) {
12033                 int c;
12034                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12035                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12036                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12037                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12038                 static const struct option long_options[] = {
12039                         { "super", required_argument, NULL, 's' },
12040                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12041                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12042                         { "init-csum-tree", no_argument, NULL,
12043                                 GETOPT_VAL_INIT_CSUM },
12044                         { "init-extent-tree", no_argument, NULL,
12045                                 GETOPT_VAL_INIT_EXTENT },
12046                         { "check-data-csum", no_argument, NULL,
12047                                 GETOPT_VAL_CHECK_CSUM },
12048                         { "backup", no_argument, NULL, 'b' },
12049                         { "subvol-extents", required_argument, NULL, 'E' },
12050                         { "qgroup-report", no_argument, NULL, 'Q' },
12051                         { "tree-root", required_argument, NULL, 'r' },
12052                         { "chunk-root", required_argument, NULL,
12053                                 GETOPT_VAL_CHUNK_TREE },
12054                         { "progress", no_argument, NULL, 'p' },
12055                         { "mode", required_argument, NULL,
12056                                 GETOPT_VAL_MODE },
12057                         { "clear-space-cache", required_argument, NULL,
12058                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12059                         { NULL, 0, NULL, 0}
12060                 };
12061
12062                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12063                 if (c < 0)
12064                         break;
12065                 switch(c) {
12066                         case 'a': /* ignored */ break;
12067                         case 'b':
12068                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12069                                 break;
12070                         case 's':
12071                                 num = arg_strtou64(optarg);
12072                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12073                                         error(
12074                                         "super mirror should be less than %d",
12075                                                 BTRFS_SUPER_MIRROR_MAX);
12076                                         exit(1);
12077                                 }
12078                                 bytenr = btrfs_sb_offset(((int)num));
12079                                 printf("using SB copy %llu, bytenr %llu\n", num,
12080                                        (unsigned long long)bytenr);
12081                                 break;
12082                         case 'Q':
12083                                 qgroup_report = 1;
12084                                 break;
12085                         case 'E':
12086                                 subvolid = arg_strtou64(optarg);
12087                                 break;
12088                         case 'r':
12089                                 tree_root_bytenr = arg_strtou64(optarg);
12090                                 break;
12091                         case GETOPT_VAL_CHUNK_TREE:
12092                                 chunk_root_bytenr = arg_strtou64(optarg);
12093                                 break;
12094                         case 'p':
12095                                 ctx.progress_enabled = true;
12096                                 break;
12097                         case '?':
12098                         case 'h':
12099                                 usage(cmd_check_usage);
12100                         case GETOPT_VAL_REPAIR:
12101                                 printf("enabling repair mode\n");
12102                                 repair = 1;
12103                                 ctree_flags |= OPEN_CTREE_WRITES;
12104                                 break;
12105                         case GETOPT_VAL_READONLY:
12106                                 readonly = 1;
12107                                 break;
12108                         case GETOPT_VAL_INIT_CSUM:
12109                                 printf("Creating a new CRC tree\n");
12110                                 init_csum_tree = 1;
12111                                 repair = 1;
12112                                 ctree_flags |= OPEN_CTREE_WRITES;
12113                                 break;
12114                         case GETOPT_VAL_INIT_EXTENT:
12115                                 init_extent_tree = 1;
12116                                 ctree_flags |= (OPEN_CTREE_WRITES |
12117                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12118                                 repair = 1;
12119                                 break;
12120                         case GETOPT_VAL_CHECK_CSUM:
12121                                 check_data_csum = 1;
12122                                 break;
12123                         case GETOPT_VAL_MODE:
12124                                 check_mode = parse_check_mode(optarg);
12125                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12126                                         error("unknown mode: %s", optarg);
12127                                         exit(1);
12128                                 }
12129                                 break;
12130                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12131                                 if (strcmp(optarg, "v1") == 0) {
12132                                         clear_space_cache = 1;
12133                                 } else if (strcmp(optarg, "v2") == 0) {
12134                                         clear_space_cache = 2;
12135                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12136                                 } else {
12137                                         error(
12138                 "invalid argument to --clear-space-cache, must be v1 or v2");
12139                                         exit(1);
12140                                 }
12141                                 ctree_flags |= OPEN_CTREE_WRITES;
12142                                 break;
12143                 }
12144         }
12145
12146         if (check_argc_exact(argc - optind, 1))
12147                 usage(cmd_check_usage);
12148
12149         if (ctx.progress_enabled) {
12150                 ctx.tp = TASK_NOTHING;
12151                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12152         }
12153
12154         /* This check is the only reason for --readonly to exist */
12155         if (readonly && repair) {
12156                 error("repair options are not compatible with --readonly");
12157                 exit(1);
12158         }
12159
12160         /*
12161          * Not supported yet
12162          */
12163         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12164                 error("low memory mode doesn't support repair yet");
12165                 exit(1);
12166         }
12167
12168         radix_tree_init();
12169         cache_tree_init(&root_cache);
12170
12171         if((ret = check_mounted(argv[optind])) < 0) {
12172                 error("could not check mount status: %s", strerror(-ret));
12173                 goto err_out;
12174         } else if(ret) {
12175                 error("%s is currently mounted, aborting", argv[optind]);
12176                 ret = -EBUSY;
12177                 goto err_out;
12178         }
12179
12180         /* only allow partial opening under repair mode */
12181         if (repair)
12182                 ctree_flags |= OPEN_CTREE_PARTIAL;
12183
12184         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12185                                   chunk_root_bytenr, ctree_flags);
12186         if (!info) {
12187                 error("cannot open file system");
12188                 ret = -EIO;
12189                 goto err_out;
12190         }
12191
12192         global_info = info;
12193         root = info->fs_root;
12194         if (clear_space_cache == 1) {
12195                 if (btrfs_fs_compat_ro(info,
12196                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12197                         error(
12198                 "free space cache v2 detected, use --clear-space-cache v2");
12199                         ret = 1;
12200                         goto close_out;
12201                 }
12202                 printf("Clearing free space cache\n");
12203                 ret = clear_free_space_cache(info);
12204                 if (ret) {
12205                         error("failed to clear free space cache");
12206                         ret = 1;
12207                 } else {
12208                         printf("Free space cache cleared\n");
12209                 }
12210                 goto close_out;
12211         } else if (clear_space_cache == 2) {
12212                 if (!btrfs_fs_compat_ro(info,
12213                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12214                         printf("no free space cache v2 to clear\n");
12215                         ret = 0;
12216                         goto close_out;
12217                 }
12218                 printf("Clear free space cache v2\n");
12219                 ret = btrfs_clear_free_space_tree(info);
12220                 if (ret) {
12221                         error("failed to clear free space cache v2: %d", ret);
12222                         ret = 1;
12223                 } else {
12224                         printf("free space cache v2 cleared\n");
12225                 }
12226                 goto close_out;
12227         }
12228
12229         /*
12230          * repair mode will force us to commit transaction which
12231          * will make us fail to load log tree when mounting.
12232          */
12233         if (repair && btrfs_super_log_root(info->super_copy)) {
12234                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12235                 if (!ret) {
12236                         ret = 1;
12237                         goto close_out;
12238                 }
12239                 ret = zero_log_tree(root);
12240                 if (ret) {
12241                         error("failed to zero log tree: %d", ret);
12242                         goto close_out;
12243                 }
12244         }
12245
12246         uuid_unparse(info->super_copy->fsid, uuidbuf);
12247         if (qgroup_report) {
12248                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12249                        uuidbuf);
12250                 ret = qgroup_verify_all(info);
12251                 if (ret == 0)
12252                         report_qgroups(1);
12253                 goto close_out;
12254         }
12255         if (subvolid) {
12256                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12257                        subvolid, argv[optind], uuidbuf);
12258                 ret = print_extent_state(info, subvolid);
12259                 goto close_out;
12260         }
12261         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12262
12263         if (!extent_buffer_uptodate(info->tree_root->node) ||
12264             !extent_buffer_uptodate(info->dev_root->node) ||
12265             !extent_buffer_uptodate(info->chunk_root->node)) {
12266                 error("critical roots corrupted, unable to check the filesystem");
12267                 ret = -EIO;
12268                 goto close_out;
12269         }
12270
12271         if (init_extent_tree || init_csum_tree) {
12272                 struct btrfs_trans_handle *trans;
12273
12274                 trans = btrfs_start_transaction(info->extent_root, 0);
12275                 if (IS_ERR(trans)) {
12276                         error("error starting transaction");
12277                         ret = PTR_ERR(trans);
12278                         goto close_out;
12279                 }
12280
12281                 if (init_extent_tree) {
12282                         printf("Creating a new extent tree\n");
12283                         ret = reinit_extent_tree(trans, info);
12284                         if (ret)
12285                                 goto close_out;
12286                 }
12287
12288                 if (init_csum_tree) {
12289                         printf("Reinitialize checksum tree\n");
12290                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12291                         if (ret) {
12292                                 error("checksum tree initialization failed: %d",
12293                                                 ret);
12294                                 ret = -EIO;
12295                                 goto close_out;
12296                         }
12297
12298                         ret = fill_csum_tree(trans, info->csum_root,
12299                                              init_extent_tree);
12300                         if (ret) {
12301                                 error("checksum tree refilling failed: %d", ret);
12302                                 return -EIO;
12303                         }
12304                 }
12305                 /*
12306                  * Ok now we commit and run the normal fsck, which will add
12307                  * extent entries for all of the items it finds.
12308                  */
12309                 ret = btrfs_commit_transaction(trans, info->extent_root);
12310                 if (ret)
12311                         goto close_out;
12312         }
12313         if (!extent_buffer_uptodate(info->extent_root->node)) {
12314                 error("critical: extent_root, unable to check the filesystem");
12315                 ret = -EIO;
12316                 goto close_out;
12317         }
12318         if (!extent_buffer_uptodate(info->csum_root->node)) {
12319                 error("critical: csum_root, unable to check the filesystem");
12320                 ret = -EIO;
12321                 goto close_out;
12322         }
12323
12324         if (!ctx.progress_enabled)
12325                 fprintf(stderr, "checking extents\n");
12326         if (check_mode == CHECK_MODE_LOWMEM)
12327                 ret = check_chunks_and_extents_v2(root);
12328         else
12329                 ret = check_chunks_and_extents(root);
12330         if (ret)
12331                 error(
12332                 "errors found in extent allocation tree or chunk allocation");
12333
12334         ret = repair_root_items(info);
12335         if (ret < 0)
12336                 goto close_out;
12337         if (repair) {
12338                 fprintf(stderr, "Fixed %d roots.\n", ret);
12339                 ret = 0;
12340         } else if (ret > 0) {
12341                 fprintf(stderr,
12342                        "Found %d roots with an outdated root item.\n",
12343                        ret);
12344                 fprintf(stderr,
12345                         "Please run a filesystem check with the option --repair to fix them.\n");
12346                 ret = 1;
12347                 goto close_out;
12348         }
12349
12350         if (!ctx.progress_enabled) {
12351                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12352                         fprintf(stderr, "checking free space tree\n");
12353                 else
12354                         fprintf(stderr, "checking free space cache\n");
12355         }
12356         ret = check_space_cache(root);
12357         if (ret)
12358                 goto out;
12359
12360         /*
12361          * We used to have to have these hole extents in between our real
12362          * extents so if we don't have this flag set we need to make sure there
12363          * are no gaps in the file extents for inodes, otherwise we can just
12364          * ignore it when this happens.
12365          */
12366         no_holes = btrfs_fs_incompat(root->fs_info,
12367                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12368         if (!ctx.progress_enabled)
12369                 fprintf(stderr, "checking fs roots\n");
12370         ret = check_fs_roots(root, &root_cache);
12371         if (ret)
12372                 goto out;
12373
12374         fprintf(stderr, "checking csums\n");
12375         ret = check_csums(root);
12376         if (ret)
12377                 goto out;
12378
12379         fprintf(stderr, "checking root refs\n");
12380         ret = check_root_refs(root, &root_cache);
12381         if (ret)
12382                 goto out;
12383
12384         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12385                 struct extent_buffer *eb;
12386
12387                 eb = list_first_entry(&root->fs_info->recow_ebs,
12388                                       struct extent_buffer, recow);
12389                 list_del_init(&eb->recow);
12390                 ret = recow_extent_buffer(root, eb);
12391                 if (ret)
12392                         break;
12393         }
12394
12395         while (!list_empty(&delete_items)) {
12396                 struct bad_item *bad;
12397
12398                 bad = list_first_entry(&delete_items, struct bad_item, list);
12399                 list_del_init(&bad->list);
12400                 if (repair)
12401                         ret = delete_bad_item(root, bad);
12402                 free(bad);
12403         }
12404
12405         if (info->quota_enabled) {
12406                 int err;
12407                 fprintf(stderr, "checking quota groups\n");
12408                 err = qgroup_verify_all(info);
12409                 if (err)
12410                         goto out;
12411                 report_qgroups(0);
12412                 err = repair_qgroups(info, &qgroups_repaired);
12413                 if (err)
12414                         goto out;
12415         }
12416
12417         if (!list_empty(&root->fs_info->recow_ebs)) {
12418                 error("transid errors in file system");
12419                 ret = 1;
12420         }
12421 out:
12422         /* Don't override original ret */
12423         if (!ret && qgroups_repaired)
12424                 ret = qgroups_repaired;
12425
12426         if (found_old_backref) { /*
12427                  * there was a disk format change when mixed
12428                  * backref was in testing tree. The old format
12429                  * existed about one week.
12430                  */
12431                 printf("\n * Found old mixed backref format. "
12432                        "The old format is not supported! *"
12433                        "\n * Please mount the FS in readonly mode, "
12434                        "backup data and re-format the FS. *\n\n");
12435                 ret = 1;
12436         }
12437         printf("found %llu bytes used err is %d\n",
12438                (unsigned long long)bytes_used, ret);
12439         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12440         printf("total tree bytes: %llu\n",
12441                (unsigned long long)total_btree_bytes);
12442         printf("total fs tree bytes: %llu\n",
12443                (unsigned long long)total_fs_tree_bytes);
12444         printf("total extent tree bytes: %llu\n",
12445                (unsigned long long)total_extent_tree_bytes);
12446         printf("btree space waste bytes: %llu\n",
12447                (unsigned long long)btree_space_waste);
12448         printf("file data blocks allocated: %llu\n referenced %llu\n",
12449                 (unsigned long long)data_bytes_allocated,
12450                 (unsigned long long)data_bytes_referenced);
12451
12452         free_qgroup_counts();
12453         free_root_recs_tree(&root_cache);
12454 close_out:
12455         close_ctree(root);
12456 err_out:
12457         if (ctx.progress_enabled)
12458                 task_deinit(ctx.info);
12459
12460         return ret;
12461 }