18eecca806aabe47695c5a7e8df164b26a25e081
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path path;
2254         int ret = 0;
2255
2256         trans = btrfs_start_transaction(root, 1);
2257         if (IS_ERR(trans))
2258                 return PTR_ERR(trans);
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         btrfs_init_path(&path);
2266         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267                                     backref->name, backref->namelen,
2268                                     backref->index, -1);
2269         if (IS_ERR(di)) {
2270                 ret = PTR_ERR(di);
2271                 btrfs_release_path(&path);
2272                 btrfs_commit_transaction(trans, root);
2273                 if (ret == -ENOENT)
2274                         return 0;
2275                 return ret;
2276         }
2277
2278         if (!di)
2279                 ret = btrfs_del_item(trans, root, &path);
2280         else
2281                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2282         BUG_ON(ret);
2283         btrfs_release_path(&path);
2284         btrfs_commit_transaction(trans, root);
2285         return ret;
2286 }
2287
2288 static int create_inode_item(struct btrfs_root *root,
2289                              struct inode_record *rec,
2290                              struct inode_backref *backref, int root_dir)
2291 {
2292         struct btrfs_trans_handle *trans;
2293         struct btrfs_inode_item inode_item;
2294         time_t now = time(NULL);
2295         int ret;
2296
2297         trans = btrfs_start_transaction(root, 1);
2298         if (IS_ERR(trans)) {
2299                 ret = PTR_ERR(trans);
2300                 return ret;
2301         }
2302
2303         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304                 "be incomplete, please check permissions and content after "
2305                 "the fsck completes.\n", (unsigned long long)root->objectid,
2306                 (unsigned long long)rec->ino);
2307
2308         memset(&inode_item, 0, sizeof(inode_item));
2309         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310         if (root_dir)
2311                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312         else
2313                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315         if (rec->found_dir_item) {
2316                 if (rec->found_file_extent)
2317                         fprintf(stderr, "root %llu inode %llu has both a dir "
2318                                 "item and extents, unsure if it is a dir or a "
2319                                 "regular file so setting it as a directory\n",
2320                                 (unsigned long long)root->objectid,
2321                                 (unsigned long long)rec->ino);
2322                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324         } else if (!rec->found_dir_item) {
2325                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327         }
2328         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336
2337         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338         BUG_ON(ret);
2339         btrfs_commit_transaction(trans, root);
2340         return 0;
2341 }
2342
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344                                  struct inode_record *rec,
2345                                  struct cache_tree *inode_cache,
2346                                  int delete)
2347 {
2348         struct inode_backref *tmp, *backref;
2349         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2350         int ret = 0;
2351         int repaired = 0;
2352
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 if (!delete && rec->ino == root_dirid) {
2355                         if (!rec->found_inode_item) {
2356                                 ret = create_inode_item(root, rec, backref, 1);
2357                                 if (ret)
2358                                         break;
2359                                 repaired++;
2360                         }
2361                 }
2362
2363                 /* Index 0 for root dir's are special, don't mess with it */
2364                 if (rec->ino == root_dirid && backref->index == 0)
2365                         continue;
2366
2367                 if (delete &&
2368                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2369                      (backref->found_dir_index && backref->found_inode_ref &&
2370                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371                         ret = delete_dir_index(root, inode_cache, rec, backref);
2372                         if (ret)
2373                                 break;
2374                         repaired++;
2375                         list_del(&backref->list);
2376                         free(backref);
2377                 }
2378
2379                 if (!delete && !backref->found_dir_index &&
2380                     backref->found_dir_item && backref->found_inode_ref) {
2381                         ret = add_missing_dir_index(root, inode_cache, rec,
2382                                                     backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         if (backref->found_dir_item &&
2387                             backref->found_dir_index &&
2388                             backref->found_dir_index) {
2389                                 if (!backref->errors &&
2390                                     backref->found_inode_ref) {
2391                                         list_del(&backref->list);
2392                                         free(backref);
2393                                 }
2394                         }
2395                 }
2396
2397                 if (!delete && (!backref->found_dir_index &&
2398                                 !backref->found_dir_item &&
2399                                 backref->found_inode_ref)) {
2400                         struct btrfs_trans_handle *trans;
2401                         struct btrfs_key location;
2402
2403                         ret = check_dir_conflict(root, backref->name,
2404                                                  backref->namelen,
2405                                                  backref->dir,
2406                                                  backref->index);
2407                         if (ret) {
2408                                 /*
2409                                  * let nlink fixing routine to handle it,
2410                                  * which can do it better.
2411                                  */
2412                                 ret = 0;
2413                                 break;
2414                         }
2415                         location.objectid = rec->ino;
2416                         location.type = BTRFS_INODE_ITEM_KEY;
2417                         location.offset = 0;
2418
2419                         trans = btrfs_start_transaction(root, 1);
2420                         if (IS_ERR(trans)) {
2421                                 ret = PTR_ERR(trans);
2422                                 break;
2423                         }
2424                         fprintf(stderr, "adding missing dir index/item pair "
2425                                 "for inode %llu\n",
2426                                 (unsigned long long)rec->ino);
2427                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2428                                                     backref->namelen,
2429                                                     backref->dir, &location,
2430                                                     imode_to_type(rec->imode),
2431                                                     backref->index);
2432                         BUG_ON(ret);
2433                         btrfs_commit_transaction(trans, root);
2434                         repaired++;
2435                 }
2436
2437                 if (!delete && (backref->found_inode_ref &&
2438                                 backref->found_dir_index &&
2439                                 backref->found_dir_item &&
2440                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441                                 !rec->found_inode_item)) {
2442                         ret = create_inode_item(root, rec, backref, 0);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                 }
2447
2448         }
2449         return ret ? ret : repaired;
2450 }
2451
2452 /*
2453  * To determine the file type for nlink/inode_item repair
2454  *
2455  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456  * Return -ENOENT if file type is not found.
2457  */
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2459 {
2460         struct inode_backref *backref;
2461
2462         /* For inode item recovered case */
2463         if (rec->found_inode_item) {
2464                 *type = imode_to_type(rec->imode);
2465                 return 0;
2466         }
2467
2468         list_for_each_entry(backref, &rec->backrefs, list) {
2469                 if (backref->found_dir_index || backref->found_dir_item) {
2470                         *type = backref->filetype;
2471                         return 0;
2472                 }
2473         }
2474         return -ENOENT;
2475 }
2476
2477 /*
2478  * To determine the file name for nlink repair
2479  *
2480  * Return 0 if file name is found, set name and namelen.
2481  * Return -ENOENT if file name is not found.
2482  */
2483 static int find_file_name(struct inode_record *rec,
2484                           char *name, int *namelen)
2485 {
2486         struct inode_backref *backref;
2487
2488         list_for_each_entry(backref, &rec->backrefs, list) {
2489                 if (backref->found_dir_index || backref->found_dir_item ||
2490                     backref->found_inode_ref) {
2491                         memcpy(name, backref->name, backref->namelen);
2492                         *namelen = backref->namelen;
2493                         return 0;
2494                 }
2495         }
2496         return -ENOENT;
2497 }
2498
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501                        struct btrfs_root *root,
2502                        struct btrfs_path *path,
2503                        struct inode_record *rec)
2504 {
2505         struct inode_backref *backref;
2506         struct inode_backref *tmp;
2507         struct btrfs_key key;
2508         struct btrfs_inode_item *inode_item;
2509         int ret = 0;
2510
2511         /* We don't believe this either, reset it and iterate backref */
2512         rec->found_link = 0;
2513
2514         /* Remove all backref including the valid ones */
2515         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517                                    backref->index, backref->name,
2518                                    backref->namelen, 0);
2519                 if (ret < 0)
2520                         goto out;
2521
2522                 /* remove invalid backref, so it won't be added back */
2523                 if (!(backref->found_dir_index &&
2524                       backref->found_dir_item &&
2525                       backref->found_inode_ref)) {
2526                         list_del(&backref->list);
2527                         free(backref);
2528                 } else {
2529                         rec->found_link++;
2530                 }
2531         }
2532
2533         /* Set nlink to 0 */
2534         key.objectid = rec->ino;
2535         key.type = BTRFS_INODE_ITEM_KEY;
2536         key.offset = 0;
2537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2538         if (ret < 0)
2539                 goto out;
2540         if (ret > 0) {
2541                 ret = -ENOENT;
2542                 goto out;
2543         }
2544         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545                                     struct btrfs_inode_item);
2546         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547         btrfs_mark_buffer_dirty(path->nodes[0]);
2548         btrfs_release_path(path);
2549
2550         /*
2551          * Add back valid inode_ref/dir_item/dir_index,
2552          * add_link() will handle the nlink inc, so new nlink must be correct
2553          */
2554         list_for_each_entry(backref, &rec->backrefs, list) {
2555                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556                                      backref->name, backref->namelen,
2557                                      backref->filetype, &backref->index, 1);
2558                 if (ret < 0)
2559                         goto out;
2560         }
2561 out:
2562         btrfs_release_path(path);
2563         return ret;
2564 }
2565
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567                                struct btrfs_root *root,
2568                                struct btrfs_path *path,
2569                                struct inode_record *rec)
2570 {
2571         char *dir_name = "lost+found";
2572         char namebuf[BTRFS_NAME_LEN] = {0};
2573         u64 lost_found_ino;
2574         u32 mode = 0700;
2575         u8 type = 0;
2576         int namelen = 0;
2577         int name_recovered = 0;
2578         int type_recovered = 0;
2579         int ret = 0;
2580
2581         /*
2582          * Get file name and type first before these invalid inode ref
2583          * are deleted by remove_all_invalid_backref()
2584          */
2585         name_recovered = !find_file_name(rec, namebuf, &namelen);
2586         type_recovered = !find_file_type(rec, &type);
2587
2588         if (!name_recovered) {
2589                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590                        rec->ino, rec->ino);
2591                 namelen = count_digits(rec->ino);
2592                 sprintf(namebuf, "%llu", rec->ino);
2593                 name_recovered = 1;
2594         }
2595         if (!type_recovered) {
2596                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597                        rec->ino);
2598                 type = BTRFS_FT_REG_FILE;
2599                 type_recovered = 1;
2600         }
2601
2602         ret = reset_nlink(trans, root, path, rec);
2603         if (ret < 0) {
2604                 fprintf(stderr,
2605                         "Failed to reset nlink for inode %llu: %s\n",
2606                         rec->ino, strerror(-ret));
2607                 goto out;
2608         }
2609
2610         if (rec->found_link == 0) {
2611                 lost_found_ino = root->highest_inode;
2612                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2613                         ret = -EOVERFLOW;
2614                         goto out;
2615                 }
2616                 lost_found_ino++;
2617                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2619                                   mode);
2620                 if (ret < 0) {
2621                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622                                 dir_name, strerror(-ret));
2623                         goto out;
2624                 }
2625                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626                                      namebuf, namelen, type, NULL, 1);
2627                 /*
2628                  * Add ".INO" suffix several times to handle case where
2629                  * "FILENAME.INO" is already taken by another file.
2630                  */
2631                 while (ret == -EEXIST) {
2632                         /*
2633                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634                          */
2635                         if (namelen + count_digits(rec->ino) + 1 >
2636                             BTRFS_NAME_LEN) {
2637                                 ret = -EFBIG;
2638                                 goto out;
2639                         }
2640                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641                                  ".%llu", rec->ino);
2642                         namelen += count_digits(rec->ino) + 1;
2643                         ret = btrfs_add_link(trans, root, rec->ino,
2644                                              lost_found_ino, namebuf,
2645                                              namelen, type, NULL, 1);
2646                 }
2647                 if (ret < 0) {
2648                         fprintf(stderr,
2649                                 "Failed to link the inode %llu to %s dir: %s\n",
2650                                 rec->ino, dir_name, strerror(-ret));
2651                         goto out;
2652                 }
2653                 /*
2654                  * Just increase the found_link, don't actually add the
2655                  * backref. This will make things easier and this inode
2656                  * record will be freed after the repair is done.
2657                  * So fsck will not report problem about this inode.
2658                  */
2659                 rec->found_link++;
2660                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661                        namelen, namebuf, dir_name);
2662         }
2663         printf("Fixed the nlink of inode %llu\n", rec->ino);
2664 out:
2665         /*
2666          * Clear the flag anyway, or we will loop forever for the same inode
2667          * as it will not be removed from the bad inode list and the dead loop
2668          * happens.
2669          */
2670         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671         btrfs_release_path(path);
2672         return ret;
2673 }
2674
2675 /*
2676  * Check if there is any normal(reg or prealloc) file extent for given
2677  * ino.
2678  * This is used to determine the file type when neither its dir_index/item or
2679  * inode_item exists.
2680  *
2681  * This will *NOT* report error, if any error happens, just consider it does
2682  * not have any normal file extent.
2683  */
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 {
2686         struct btrfs_path path;
2687         struct btrfs_key key;
2688         struct btrfs_key found_key;
2689         struct btrfs_file_extent_item *fi;
2690         u8 type;
2691         int ret = 0;
2692
2693         btrfs_init_path(&path);
2694         key.objectid = ino;
2695         key.type = BTRFS_EXTENT_DATA_KEY;
2696         key.offset = 0;
2697
2698         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2699         if (ret < 0) {
2700                 ret = 0;
2701                 goto out;
2702         }
2703         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704                 ret = btrfs_next_leaf(root, &path);
2705                 if (ret) {
2706                         ret = 0;
2707                         goto out;
2708                 }
2709         }
2710         while (1) {
2711                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2712                                       path.slots[0]);
2713                 if (found_key.objectid != ino ||
2714                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2715                         break;
2716                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717                                     struct btrfs_file_extent_item);
2718                 type = btrfs_file_extent_type(path.nodes[0], fi);
2719                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2720                         ret = 1;
2721                         goto out;
2722                 }
2723         }
2724 out:
2725         btrfs_release_path(&path);
2726         return ret;
2727 }
2728
2729 static u32 btrfs_type_to_imode(u8 type)
2730 {
2731         static u32 imode_by_btrfs_type[] = {
2732                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2733                 [BTRFS_FT_DIR]          = S_IFDIR,
2734                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2735                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2736                 [BTRFS_FT_FIFO]         = S_IFIFO,
2737                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2738                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2739         };
2740
2741         return imode_by_btrfs_type[(type)];
2742 }
2743
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745                                 struct btrfs_root *root,
2746                                 struct btrfs_path *path,
2747                                 struct inode_record *rec)
2748 {
2749         u8 filetype;
2750         u32 mode = 0700;
2751         int type_recovered = 0;
2752         int ret = 0;
2753
2754         printf("Trying to rebuild inode:%llu\n", rec->ino);
2755
2756         type_recovered = !find_file_type(rec, &filetype);
2757
2758         /*
2759          * Try to determine inode type if type not found.
2760          *
2761          * For found regular file extent, it must be FILE.
2762          * For found dir_item/index, it must be DIR.
2763          *
2764          * For undetermined one, use FILE as fallback.
2765          *
2766          * TODO:
2767          * 1. If found backref(inode_index/item is already handled) to it,
2768          *    it must be DIR.
2769          *    Need new inode-inode ref structure to allow search for that.
2770          */
2771         if (!type_recovered) {
2772                 if (rec->found_file_extent &&
2773                     find_normal_file_extent(root, rec->ino)) {
2774                         type_recovered = 1;
2775                         filetype = BTRFS_FT_REG_FILE;
2776                 } else if (rec->found_dir_item) {
2777                         type_recovered = 1;
2778                         filetype = BTRFS_FT_DIR;
2779                 } else if (!list_empty(&rec->orphan_extents)) {
2780                         type_recovered = 1;
2781                         filetype = BTRFS_FT_REG_FILE;
2782                 } else{
2783                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2784                                rec->ino);
2785                         type_recovered = 1;
2786                         filetype = BTRFS_FT_REG_FILE;
2787                 }
2788         }
2789
2790         ret = btrfs_new_inode(trans, root, rec->ino,
2791                               mode | btrfs_type_to_imode(filetype));
2792         if (ret < 0)
2793                 goto out;
2794
2795         /*
2796          * Here inode rebuild is done, we only rebuild the inode item,
2797          * don't repair the nlink(like move to lost+found).
2798          * That is the job of nlink repair.
2799          *
2800          * We just fill the record and return
2801          */
2802         rec->found_dir_item = 1;
2803         rec->imode = mode | btrfs_type_to_imode(filetype);
2804         rec->nlink = 0;
2805         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806         /* Ensure the inode_nlinks repair function will be called */
2807         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2808 out:
2809         return ret;
2810 }
2811
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813                                       struct btrfs_root *root,
2814                                       struct btrfs_path *path,
2815                                       struct inode_record *rec)
2816 {
2817         struct orphan_data_extent *orphan;
2818         struct orphan_data_extent *tmp;
2819         int ret = 0;
2820
2821         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2822                 /*
2823                  * Check for conflicting file extents
2824                  *
2825                  * Here we don't know whether the extents is compressed or not,
2826                  * so we can only assume it not compressed nor data offset,
2827                  * and use its disk_len as extent length.
2828                  */
2829                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830                                        orphan->offset, orphan->disk_len, 0);
2831                 btrfs_release_path(path);
2832                 if (ret < 0)
2833                         goto out;
2834                 if (!ret) {
2835                         fprintf(stderr,
2836                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837                                 orphan->disk_bytenr, orphan->disk_len);
2838                         ret = btrfs_free_extent(trans,
2839                                         root->fs_info->extent_root,
2840                                         orphan->disk_bytenr, orphan->disk_len,
2841                                         0, root->objectid, orphan->objectid,
2842                                         orphan->offset);
2843                         if (ret < 0)
2844                                 goto out;
2845                 }
2846                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847                                 orphan->offset, orphan->disk_bytenr,
2848                                 orphan->disk_len, orphan->disk_len);
2849                 if (ret < 0)
2850                         goto out;
2851
2852                 /* Update file size info */
2853                 rec->found_size += orphan->disk_len;
2854                 if (rec->found_size == rec->nbytes)
2855                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2856
2857                 /* Update the file extent hole info too */
2858                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2859                                            orphan->disk_len);
2860                 if (ret < 0)
2861                         goto out;
2862                 if (RB_EMPTY_ROOT(&rec->holes))
2863                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2864
2865                 list_del(&orphan->list);
2866                 free(orphan);
2867         }
2868         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2869 out:
2870         return ret;
2871 }
2872
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874                                         struct btrfs_root *root,
2875                                         struct btrfs_path *path,
2876                                         struct inode_record *rec)
2877 {
2878         struct rb_node *node;
2879         struct file_extent_hole *hole;
2880         int found = 0;
2881         int ret = 0;
2882
2883         node = rb_first(&rec->holes);
2884
2885         while (node) {
2886                 found = 1;
2887                 hole = rb_entry(node, struct file_extent_hole, node);
2888                 ret = btrfs_punch_hole(trans, root, rec->ino,
2889                                        hole->start, hole->len);
2890                 if (ret < 0)
2891                         goto out;
2892                 ret = del_file_extent_hole(&rec->holes, hole->start,
2893                                            hole->len);
2894                 if (ret < 0)
2895                         goto out;
2896                 if (RB_EMPTY_ROOT(&rec->holes))
2897                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898                 node = rb_first(&rec->holes);
2899         }
2900         /* special case for a file losing all its file extent */
2901         if (!found) {
2902                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903                                        round_up(rec->isize, root->sectorsize));
2904                 if (ret < 0)
2905                         goto out;
2906         }
2907         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908                rec->ino, root->objectid);
2909 out:
2910         return ret;
2911 }
2912
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2914 {
2915         struct btrfs_trans_handle *trans;
2916         struct btrfs_path path;
2917         int ret = 0;
2918
2919         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920                              I_ERR_NO_ORPHAN_ITEM |
2921                              I_ERR_LINK_COUNT_WRONG |
2922                              I_ERR_NO_INODE_ITEM |
2923                              I_ERR_FILE_EXTENT_ORPHAN |
2924                              I_ERR_FILE_EXTENT_DISCOUNT|
2925                              I_ERR_FILE_NBYTES_WRONG)))
2926                 return rec->errors;
2927
2928         /*
2929          * For nlink repair, it may create a dir and add link, so
2930          * 2 for parent(256)'s dir_index and dir_item
2931          * 2 for lost+found dir's inode_item and inode_ref
2932          * 1 for the new inode_ref of the file
2933          * 2 for lost+found dir's dir_index and dir_item for the file
2934          */
2935         trans = btrfs_start_transaction(root, 7);
2936         if (IS_ERR(trans))
2937                 return PTR_ERR(trans);
2938
2939         btrfs_init_path(&path);
2940         if (rec->errors & I_ERR_NO_INODE_ITEM)
2941                 ret = repair_inode_no_item(trans, root, &path, rec);
2942         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947                 ret = repair_inode_isize(trans, root, &path, rec);
2948         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951                 ret = repair_inode_nlinks(trans, root, &path, rec);
2952         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953                 ret = repair_inode_nbytes(trans, root, &path, rec);
2954         btrfs_commit_transaction(trans, root);
2955         btrfs_release_path(&path);
2956         return ret;
2957 }
2958
2959 static int check_inode_recs(struct btrfs_root *root,
2960                             struct cache_tree *inode_cache)
2961 {
2962         struct cache_extent *cache;
2963         struct ptr_node *node;
2964         struct inode_record *rec;
2965         struct inode_backref *backref;
2966         int stage = 0;
2967         int ret = 0;
2968         int err = 0;
2969         u64 error = 0;
2970         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2971
2972         if (btrfs_root_refs(&root->root_item) == 0) {
2973                 if (!cache_tree_empty(inode_cache))
2974                         fprintf(stderr, "warning line %d\n", __LINE__);
2975                 return 0;
2976         }
2977
2978         /*
2979          * We need to record the highest inode number for later 'lost+found'
2980          * dir creation.
2981          * We must select an ino not used/referred by any existing inode, or
2982          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983          * this may cause 'lost+found' dir has wrong nlinks.
2984          */
2985         cache = last_cache_extent(inode_cache);
2986         if (cache) {
2987                 node = container_of(cache, struct ptr_node, cache);
2988                 rec = node->data;
2989                 if (rec->ino > root->highest_inode)
2990                         root->highest_inode = rec->ino;
2991         }
2992
2993         /*
2994          * We need to repair backrefs first because we could change some of the
2995          * errors in the inode recs.
2996          *
2997          * We also need to go through and delete invalid backrefs first and then
2998          * add the correct ones second.  We do this because we may get EEXIST
2999          * when adding back the correct index because we hadn't yet deleted the
3000          * invalid index.
3001          *
3002          * For example, if we were missing a dir index then the directories
3003          * isize would be wrong, so if we fixed the isize to what we thought it
3004          * would be and then fixed the backref we'd still have a invalid fs, so
3005          * we need to add back the dir index and then check to see if the isize
3006          * is still wrong.
3007          */
3008         while (stage < 3) {
3009                 stage++;
3010                 if (stage == 3 && !err)
3011                         break;
3012
3013                 cache = search_cache_extent(inode_cache, 0);
3014                 while (repair && cache) {
3015                         node = container_of(cache, struct ptr_node, cache);
3016                         rec = node->data;
3017                         cache = next_cache_extent(cache);
3018
3019                         /* Need to free everything up and rescan */
3020                         if (stage == 3) {
3021                                 remove_cache_extent(inode_cache, &node->cache);
3022                                 free(node);
3023                                 free_inode_rec(rec);
3024                                 continue;
3025                         }
3026
3027                         if (list_empty(&rec->backrefs))
3028                                 continue;
3029
3030                         ret = repair_inode_backrefs(root, rec, inode_cache,
3031                                                     stage == 1);
3032                         if (ret < 0) {
3033                                 err = ret;
3034                                 stage = 2;
3035                                 break;
3036                         } if (ret > 0) {
3037                                 err = -EAGAIN;
3038                         }
3039                 }
3040         }
3041         if (err)
3042                 return err;
3043
3044         rec = get_inode_rec(inode_cache, root_dirid, 0);
3045         BUG_ON(IS_ERR(rec));
3046         if (rec) {
3047                 ret = check_root_dir(rec);
3048                 if (ret) {
3049                         fprintf(stderr, "root %llu root dir %llu error\n",
3050                                 (unsigned long long)root->root_key.objectid,
3051                                 (unsigned long long)root_dirid);
3052                         print_inode_error(root, rec);
3053                         error++;
3054                 }
3055         } else {
3056                 if (repair) {
3057                         struct btrfs_trans_handle *trans;
3058
3059                         trans = btrfs_start_transaction(root, 1);
3060                         if (IS_ERR(trans)) {
3061                                 err = PTR_ERR(trans);
3062                                 return err;
3063                         }
3064
3065                         fprintf(stderr,
3066                                 "root %llu missing its root dir, recreating\n",
3067                                 (unsigned long long)root->objectid);
3068
3069                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3070                         BUG_ON(ret);
3071
3072                         btrfs_commit_transaction(trans, root);
3073                         return -EAGAIN;
3074                 }
3075
3076                 fprintf(stderr, "root %llu root dir %llu not found\n",
3077                         (unsigned long long)root->root_key.objectid,
3078                         (unsigned long long)root_dirid);
3079         }
3080
3081         while (1) {
3082                 cache = search_cache_extent(inode_cache, 0);
3083                 if (!cache)
3084                         break;
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 remove_cache_extent(inode_cache, &node->cache);
3088                 free(node);
3089                 if (rec->ino == root_dirid ||
3090                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091                         free_inode_rec(rec);
3092                         continue;
3093                 }
3094
3095                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096                         ret = check_orphan_item(root, rec->ino);
3097                         if (ret == 0)
3098                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099                         if (can_free_inode_rec(rec)) {
3100                                 free_inode_rec(rec);
3101                                 continue;
3102                         }
3103                 }
3104
3105                 if (!rec->found_inode_item)
3106                         rec->errors |= I_ERR_NO_INODE_ITEM;
3107                 if (rec->found_link != rec->nlink)
3108                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3109                 if (repair) {
3110                         ret = try_repair_inode(root, rec);
3111                         if (ret == 0 && can_free_inode_rec(rec)) {
3112                                 free_inode_rec(rec);
3113                                 continue;
3114                         }
3115                         ret = 0;
3116                 }
3117
3118                 if (!(repair && ret == 0))
3119                         error++;
3120                 print_inode_error(root, rec);
3121                 list_for_each_entry(backref, &rec->backrefs, list) {
3122                         if (!backref->found_dir_item)
3123                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124                         if (!backref->found_dir_index)
3125                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126                         if (!backref->found_inode_ref)
3127                                 backref->errors |= REF_ERR_NO_INODE_REF;
3128                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129                                 " namelen %u name %s filetype %d errors %x",
3130                                 (unsigned long long)backref->dir,
3131                                 (unsigned long long)backref->index,
3132                                 backref->namelen, backref->name,
3133                                 backref->filetype, backref->errors);
3134                         print_ref_error(backref->errors);
3135                 }
3136                 free_inode_rec(rec);
3137         }
3138         return (error > 0) ? -1 : 0;
3139 }
3140
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3142                                         u64 objectid)
3143 {
3144         struct cache_extent *cache;
3145         struct root_record *rec = NULL;
3146         int ret;
3147
3148         cache = lookup_cache_extent(root_cache, objectid, 1);
3149         if (cache) {
3150                 rec = container_of(cache, struct root_record, cache);
3151         } else {
3152                 rec = calloc(1, sizeof(*rec));
3153                 if (!rec)
3154                         return ERR_PTR(-ENOMEM);
3155                 rec->objectid = objectid;
3156                 INIT_LIST_HEAD(&rec->backrefs);
3157                 rec->cache.start = objectid;
3158                 rec->cache.size = 1;
3159
3160                 ret = insert_cache_extent(root_cache, &rec->cache);
3161                 if (ret)
3162                         return ERR_PTR(-EEXIST);
3163         }
3164         return rec;
3165 }
3166
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168                                              u64 ref_root, u64 dir, u64 index,
3169                                              const char *name, int namelen)
3170 {
3171         struct root_backref *backref;
3172
3173         list_for_each_entry(backref, &rec->backrefs, list) {
3174                 if (backref->ref_root != ref_root || backref->dir != dir ||
3175                     backref->namelen != namelen)
3176                         continue;
3177                 if (memcmp(name, backref->name, namelen))
3178                         continue;
3179                 return backref;
3180         }
3181
3182         backref = calloc(1, sizeof(*backref) + namelen + 1);
3183         if (!backref)
3184                 return NULL;
3185         backref->ref_root = ref_root;
3186         backref->dir = dir;
3187         backref->index = index;
3188         backref->namelen = namelen;
3189         memcpy(backref->name, name, namelen);
3190         backref->name[namelen] = '\0';
3191         list_add_tail(&backref->list, &rec->backrefs);
3192         return backref;
3193 }
3194
3195 static void free_root_record(struct cache_extent *cache)
3196 {
3197         struct root_record *rec;
3198         struct root_backref *backref;
3199
3200         rec = container_of(cache, struct root_record, cache);
3201         while (!list_empty(&rec->backrefs)) {
3202                 backref = to_root_backref(rec->backrefs.next);
3203                 list_del(&backref->list);
3204                 free(backref);
3205         }
3206
3207         free(rec);
3208 }
3209
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3211
3212 static int add_root_backref(struct cache_tree *root_cache,
3213                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3214                             const char *name, int namelen,
3215                             int item_type, int errors)
3216 {
3217         struct root_record *rec;
3218         struct root_backref *backref;
3219
3220         rec = get_root_rec(root_cache, root_id);
3221         BUG_ON(IS_ERR(rec));
3222         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3223         BUG_ON(!backref);
3224
3225         backref->errors |= errors;
3226
3227         if (item_type != BTRFS_DIR_ITEM_KEY) {
3228                 if (backref->found_dir_index || backref->found_back_ref ||
3229                     backref->found_forward_ref) {
3230                         if (backref->index != index)
3231                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3232                 } else {
3233                         backref->index = index;
3234                 }
3235         }
3236
3237         if (item_type == BTRFS_DIR_ITEM_KEY) {
3238                 if (backref->found_forward_ref)
3239                         rec->found_ref++;
3240                 backref->found_dir_item = 1;
3241         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242                 backref->found_dir_index = 1;
3243         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244                 if (backref->found_forward_ref)
3245                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3246                 else if (backref->found_dir_item)
3247                         rec->found_ref++;
3248                 backref->found_forward_ref = 1;
3249         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250                 if (backref->found_back_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252                 backref->found_back_ref = 1;
3253         } else {
3254                 BUG_ON(1);
3255         }
3256
3257         if (backref->found_forward_ref && backref->found_dir_item)
3258                 backref->reachable = 1;
3259         return 0;
3260 }
3261
3262 static int merge_root_recs(struct btrfs_root *root,
3263                            struct cache_tree *src_cache,
3264                            struct cache_tree *dst_cache)
3265 {
3266         struct cache_extent *cache;
3267         struct ptr_node *node;
3268         struct inode_record *rec;
3269         struct inode_backref *backref;
3270         int ret = 0;
3271
3272         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273                 free_inode_recs_tree(src_cache);
3274                 return 0;
3275         }
3276
3277         while (1) {
3278                 cache = search_cache_extent(src_cache, 0);
3279                 if (!cache)
3280                         break;
3281                 node = container_of(cache, struct ptr_node, cache);
3282                 rec = node->data;
3283                 remove_cache_extent(src_cache, &node->cache);
3284                 free(node);
3285
3286                 ret = is_child_root(root, root->objectid, rec->ino);
3287                 if (ret < 0)
3288                         break;
3289                 else if (ret == 0)
3290                         goto skip;
3291
3292                 list_for_each_entry(backref, &rec->backrefs, list) {
3293                         BUG_ON(backref->found_inode_ref);
3294                         if (backref->found_dir_item)
3295                                 add_root_backref(dst_cache, rec->ino,
3296                                         root->root_key.objectid, backref->dir,
3297                                         backref->index, backref->name,
3298                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3299                                         backref->errors);
3300                         if (backref->found_dir_index)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3305                                         backref->errors);
3306                 }
3307 skip:
3308                 free_inode_rec(rec);
3309         }
3310         if (ret < 0)
3311                 return ret;
3312         return 0;
3313 }
3314
3315 static int check_root_refs(struct btrfs_root *root,
3316                            struct cache_tree *root_cache)
3317 {
3318         struct root_record *rec;
3319         struct root_record *ref_root;
3320         struct root_backref *backref;
3321         struct cache_extent *cache;
3322         int loop = 1;
3323         int ret;
3324         int error;
3325         int errors = 0;
3326
3327         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328         BUG_ON(IS_ERR(rec));
3329         rec->found_ref = 1;
3330
3331         /* fixme: this can not detect circular references */
3332         while (loop) {
3333                 loop = 0;
3334                 cache = search_cache_extent(root_cache, 0);
3335                 while (1) {
3336                         if (!cache)
3337                                 break;
3338                         rec = container_of(cache, struct root_record, cache);
3339                         cache = next_cache_extent(cache);
3340
3341                         if (rec->found_ref == 0)
3342                                 continue;
3343
3344                         list_for_each_entry(backref, &rec->backrefs, list) {
3345                                 if (!backref->reachable)
3346                                         continue;
3347
3348                                 ref_root = get_root_rec(root_cache,
3349                                                         backref->ref_root);
3350                                 BUG_ON(IS_ERR(ref_root));
3351                                 if (ref_root->found_ref > 0)
3352                                         continue;
3353
3354                                 backref->reachable = 0;
3355                                 rec->found_ref--;
3356                                 if (rec->found_ref == 0)
3357                                         loop = 1;
3358                         }
3359                 }
3360         }
3361
3362         cache = search_cache_extent(root_cache, 0);
3363         while (1) {
3364                 if (!cache)
3365                         break;
3366                 rec = container_of(cache, struct root_record, cache);
3367                 cache = next_cache_extent(cache);
3368
3369                 if (rec->found_ref == 0 &&
3370                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372                         ret = check_orphan_item(root->fs_info->tree_root,
3373                                                 rec->objectid);
3374                         if (ret == 0)
3375                                 continue;
3376
3377                         /*
3378                          * If we don't have a root item then we likely just have
3379                          * a dir item in a snapshot for this root but no actual
3380                          * ref key or anything so it's meaningless.
3381                          */
3382                         if (!rec->found_root_item)
3383                                 continue;
3384                         errors++;
3385                         fprintf(stderr, "fs tree %llu not referenced\n",
3386                                 (unsigned long long)rec->objectid);
3387                 }
3388
3389                 error = 0;
3390                 if (rec->found_ref > 0 && !rec->found_root_item)
3391                         error = 1;
3392                 list_for_each_entry(backref, &rec->backrefs, list) {
3393                         if (!backref->found_dir_item)
3394                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395                         if (!backref->found_dir_index)
3396                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397                         if (!backref->found_back_ref)
3398                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399                         if (!backref->found_forward_ref)
3400                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3401                         if (backref->reachable && backref->errors)
3402                                 error = 1;
3403                 }
3404                 if (!error)
3405                         continue;
3406
3407                 errors++;
3408                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409                         (unsigned long long)rec->objectid, rec->found_ref,
3410                          rec->found_root_item ? "" : "not found");
3411
3412                 list_for_each_entry(backref, &rec->backrefs, list) {
3413                         if (!backref->reachable)
3414                                 continue;
3415                         if (!backref->errors && rec->found_root_item)
3416                                 continue;
3417                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418                                 " index %llu namelen %u name %s errors %x\n",
3419                                 (unsigned long long)backref->ref_root,
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426         }
3427         return errors > 0 ? 1 : 0;
3428 }
3429
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431                             struct btrfs_key *key,
3432                             struct cache_tree *root_cache)
3433 {
3434         u64 dirid;
3435         u64 index;
3436         u32 len;
3437         u32 name_len;
3438         struct btrfs_root_ref *ref;
3439         char namebuf[BTRFS_NAME_LEN];
3440         int error;
3441
3442         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3443
3444         dirid = btrfs_root_ref_dirid(eb, ref);
3445         index = btrfs_root_ref_sequence(eb, ref);
3446         name_len = btrfs_root_ref_name_len(eb, ref);
3447
3448         if (name_len <= BTRFS_NAME_LEN) {
3449                 len = name_len;
3450                 error = 0;
3451         } else {
3452                 len = BTRFS_NAME_LEN;
3453                 error = REF_ERR_NAME_TOO_LONG;
3454         }
3455         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3456
3457         if (key->type == BTRFS_ROOT_REF_KEY) {
3458                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459                                  index, namebuf, len, key->type, error);
3460         } else {
3461                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462                                  index, namebuf, len, key->type, error);
3463         }
3464         return 0;
3465 }
3466
3467 static void free_corrupt_block(struct cache_extent *cache)
3468 {
3469         struct btrfs_corrupt_block *corrupt;
3470
3471         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3472         free(corrupt);
3473 }
3474
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3476
3477 /*
3478  * Repair the btree of the given root.
3479  *
3480  * The fix is to remove the node key in corrupt_blocks cache_tree.
3481  * and rebalance the tree.
3482  * After the fix, the btree should be writeable.
3483  */
3484 static int repair_btree(struct btrfs_root *root,
3485                         struct cache_tree *corrupt_blocks)
3486 {
3487         struct btrfs_trans_handle *trans;
3488         struct btrfs_path path;
3489         struct btrfs_corrupt_block *corrupt;
3490         struct cache_extent *cache;
3491         struct btrfs_key key;
3492         u64 offset;
3493         int level;
3494         int ret = 0;
3495
3496         if (cache_tree_empty(corrupt_blocks))
3497                 return 0;
3498
3499         trans = btrfs_start_transaction(root, 1);
3500         if (IS_ERR(trans)) {
3501                 ret = PTR_ERR(trans);
3502                 fprintf(stderr, "Error starting transaction: %s\n",
3503                         strerror(-ret));
3504                 return ret;
3505         }
3506         btrfs_init_path(&path);
3507         cache = first_cache_extent(corrupt_blocks);
3508         while (cache) {
3509                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3510                                        cache);
3511                 level = corrupt->level;
3512                 path.lowest_level = level;
3513                 key.objectid = corrupt->key.objectid;
3514                 key.type = corrupt->key.type;
3515                 key.offset = corrupt->key.offset;
3516
3517                 /*
3518                  * Here we don't want to do any tree balance, since it may
3519                  * cause a balance with corrupted brother leaf/node,
3520                  * so ins_len set to 0 here.
3521                  * Balance will be done after all corrupt node/leaf is deleted.
3522                  */
3523                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3524                 if (ret < 0)
3525                         goto out;
3526                 offset = btrfs_node_blockptr(path.nodes[level],
3527                                              path.slots[level]);
3528
3529                 /* Remove the ptr */
3530                 ret = btrfs_del_ptr(trans, root, &path, level,
3531                                     path.slots[level]);
3532                 if (ret < 0)
3533                         goto out;
3534                 /*
3535                  * Remove the corresponding extent
3536                  * return value is not concerned.
3537                  */
3538                 btrfs_release_path(&path);
3539                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3540                                         0, root->root_key.objectid,
3541                                         level - 1, 0);
3542                 cache = next_cache_extent(cache);
3543         }
3544
3545         /* Balance the btree using btrfs_search_slot() */
3546         cache = first_cache_extent(corrupt_blocks);
3547         while (cache) {
3548                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3549                                        cache);
3550                 memcpy(&key, &corrupt->key, sizeof(key));
3551                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3552                 if (ret < 0)
3553                         goto out;
3554                 /* return will always >0 since it won't find the item */
3555                 ret = 0;
3556                 btrfs_release_path(&path);
3557                 cache = next_cache_extent(cache);
3558         }
3559 out:
3560         btrfs_commit_transaction(trans, root);
3561         btrfs_release_path(&path);
3562         return ret;
3563 }
3564
3565 static int check_fs_root(struct btrfs_root *root,
3566                          struct cache_tree *root_cache,
3567                          struct walk_control *wc)
3568 {
3569         int ret = 0;
3570         int err = 0;
3571         int wret;
3572         int level;
3573         struct btrfs_path path;
3574         struct shared_node root_node;
3575         struct root_record *rec;
3576         struct btrfs_root_item *root_item = &root->root_item;
3577         struct cache_tree corrupt_blocks;
3578         struct orphan_data_extent *orphan;
3579         struct orphan_data_extent *tmp;
3580         enum btrfs_tree_block_status status;
3581         struct node_refs nrefs;
3582
3583         /*
3584          * Reuse the corrupt_block cache tree to record corrupted tree block
3585          *
3586          * Unlike the usage in extent tree check, here we do it in a per
3587          * fs/subvol tree base.
3588          */
3589         cache_tree_init(&corrupt_blocks);
3590         root->fs_info->corrupt_blocks = &corrupt_blocks;
3591
3592         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3593                 rec = get_root_rec(root_cache, root->root_key.objectid);
3594                 BUG_ON(IS_ERR(rec));
3595                 if (btrfs_root_refs(root_item) > 0)
3596                         rec->found_root_item = 1;
3597         }
3598
3599         btrfs_init_path(&path);
3600         memset(&root_node, 0, sizeof(root_node));
3601         cache_tree_init(&root_node.root_cache);
3602         cache_tree_init(&root_node.inode_cache);
3603         memset(&nrefs, 0, sizeof(nrefs));
3604
3605         /* Move the orphan extent record to corresponding inode_record */
3606         list_for_each_entry_safe(orphan, tmp,
3607                                  &root->orphan_data_extents, list) {
3608                 struct inode_record *inode;
3609
3610                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3611                                       1);
3612                 BUG_ON(IS_ERR(inode));
3613                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3614                 list_move(&orphan->list, &inode->orphan_extents);
3615         }
3616
3617         level = btrfs_header_level(root->node);
3618         memset(wc->nodes, 0, sizeof(wc->nodes));
3619         wc->nodes[level] = &root_node;
3620         wc->active_node = level;
3621         wc->root_level = level;
3622
3623         /* We may not have checked the root block, lets do that now */
3624         if (btrfs_is_leaf(root->node))
3625                 status = btrfs_check_leaf(root, NULL, root->node);
3626         else
3627                 status = btrfs_check_node(root, NULL, root->node);
3628         if (status != BTRFS_TREE_BLOCK_CLEAN)
3629                 return -EIO;
3630
3631         if (btrfs_root_refs(root_item) > 0 ||
3632             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3633                 path.nodes[level] = root->node;
3634                 extent_buffer_get(root->node);
3635                 path.slots[level] = 0;
3636         } else {
3637                 struct btrfs_key key;
3638                 struct btrfs_disk_key found_key;
3639
3640                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3641                 level = root_item->drop_level;
3642                 path.lowest_level = level;
3643                 if (level > btrfs_header_level(root->node) ||
3644                     level >= BTRFS_MAX_LEVEL) {
3645                         error("ignoring invalid drop level: %u", level);
3646                         goto skip_walking;
3647                 }
3648                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3649                 if (wret < 0)
3650                         goto skip_walking;
3651                 btrfs_node_key(path.nodes[level], &found_key,
3652                                 path.slots[level]);
3653                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3654                                         sizeof(found_key)));
3655         }
3656
3657         while (1) {
3658                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3659                 if (wret < 0)
3660                         ret = wret;
3661                 if (wret != 0)
3662                         break;
3663
3664                 wret = walk_up_tree(root, &path, wc, &level);
3665                 if (wret < 0)
3666                         ret = wret;
3667                 if (wret != 0)
3668                         break;
3669         }
3670 skip_walking:
3671         btrfs_release_path(&path);
3672
3673         if (!cache_tree_empty(&corrupt_blocks)) {
3674                 struct cache_extent *cache;
3675                 struct btrfs_corrupt_block *corrupt;
3676
3677                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3678                        root->root_key.objectid);
3679                 cache = first_cache_extent(&corrupt_blocks);
3680                 while (cache) {
3681                         corrupt = container_of(cache,
3682                                                struct btrfs_corrupt_block,
3683                                                cache);
3684                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3685                                cache->start, corrupt->level,
3686                                corrupt->key.objectid, corrupt->key.type,
3687                                corrupt->key.offset);
3688                         cache = next_cache_extent(cache);
3689                 }
3690                 if (repair) {
3691                         printf("Try to repair the btree for root %llu\n",
3692                                root->root_key.objectid);
3693                         ret = repair_btree(root, &corrupt_blocks);
3694                         if (ret < 0)
3695                                 fprintf(stderr, "Failed to repair btree: %s\n",
3696                                         strerror(-ret));
3697                         if (!ret)
3698                                 printf("Btree for root %llu is fixed\n",
3699                                        root->root_key.objectid);
3700                 }
3701         }
3702
3703         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3704         if (err < 0)
3705                 ret = err;
3706
3707         if (root_node.current) {
3708                 root_node.current->checked = 1;
3709                 maybe_free_inode_rec(&root_node.inode_cache,
3710                                 root_node.current);
3711         }
3712
3713         err = check_inode_recs(root, &root_node.inode_cache);
3714         if (!ret)
3715                 ret = err;
3716
3717         free_corrupt_blocks_tree(&corrupt_blocks);
3718         root->fs_info->corrupt_blocks = NULL;
3719         free_orphan_data_extents(&root->orphan_data_extents);
3720         return ret;
3721 }
3722
3723 static int fs_root_objectid(u64 objectid)
3724 {
3725         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3726             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3727                 return 1;
3728         return is_fstree(objectid);
3729 }
3730
3731 static int check_fs_roots(struct btrfs_root *root,
3732                           struct cache_tree *root_cache)
3733 {
3734         struct btrfs_path path;
3735         struct btrfs_key key;
3736         struct walk_control wc;
3737         struct extent_buffer *leaf, *tree_node;
3738         struct btrfs_root *tmp_root;
3739         struct btrfs_root *tree_root = root->fs_info->tree_root;
3740         int ret;
3741         int err = 0;
3742
3743         if (ctx.progress_enabled) {
3744                 ctx.tp = TASK_FS_ROOTS;
3745                 task_start(ctx.info);
3746         }
3747
3748         /*
3749          * Just in case we made any changes to the extent tree that weren't
3750          * reflected into the free space cache yet.
3751          */
3752         if (repair)
3753                 reset_cached_block_groups(root->fs_info);
3754         memset(&wc, 0, sizeof(wc));
3755         cache_tree_init(&wc.shared);
3756         btrfs_init_path(&path);
3757
3758 again:
3759         key.offset = 0;
3760         key.objectid = 0;
3761         key.type = BTRFS_ROOT_ITEM_KEY;
3762         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3763         if (ret < 0) {
3764                 err = 1;
3765                 goto out;
3766         }
3767         tree_node = tree_root->node;
3768         while (1) {
3769                 if (tree_node != tree_root->node) {
3770                         free_root_recs_tree(root_cache);
3771                         btrfs_release_path(&path);
3772                         goto again;
3773                 }
3774                 leaf = path.nodes[0];
3775                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3776                         ret = btrfs_next_leaf(tree_root, &path);
3777                         if (ret) {
3778                                 if (ret < 0)
3779                                         err = 1;
3780                                 break;
3781                         }
3782                         leaf = path.nodes[0];
3783                 }
3784                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3785                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3786                     fs_root_objectid(key.objectid)) {
3787                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3788                                 tmp_root = btrfs_read_fs_root_no_cache(
3789                                                 root->fs_info, &key);
3790                         } else {
3791                                 key.offset = (u64)-1;
3792                                 tmp_root = btrfs_read_fs_root(
3793                                                 root->fs_info, &key);
3794                         }
3795                         if (IS_ERR(tmp_root)) {
3796                                 err = 1;
3797                                 goto next;
3798                         }
3799                         ret = check_fs_root(tmp_root, root_cache, &wc);
3800                         if (ret == -EAGAIN) {
3801                                 free_root_recs_tree(root_cache);
3802                                 btrfs_release_path(&path);
3803                                 goto again;
3804                         }
3805                         if (ret)
3806                                 err = 1;
3807                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3808                                 btrfs_free_fs_root(tmp_root);
3809                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3810                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3811                         process_root_ref(leaf, path.slots[0], &key,
3812                                          root_cache);
3813                 }
3814 next:
3815                 path.slots[0]++;
3816         }
3817 out:
3818         btrfs_release_path(&path);
3819         if (err)
3820                 free_extent_cache_tree(&wc.shared);
3821         if (!cache_tree_empty(&wc.shared))
3822                 fprintf(stderr, "warning line %d\n", __LINE__);
3823
3824         task_stop(ctx.info);
3825
3826         return err;
3827 }
3828
3829 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3830 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3831 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3832
3833 /*
3834  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3835  * INODE_REF/INODE_EXTREF match.
3836  *
3837  * @root:       the root of the fs/file tree
3838  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3839  * @key:        the key of the DIR_ITEM/DIR_INDEX
3840  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3841  *              distinguish root_dir between normal dir/file
3842  * @name:       the name in the INODE_REF/INODE_EXTREF
3843  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3844  * @mode:       the st_mode of INODE_ITEM
3845  *
3846  * Return 0 if no error occurred.
3847  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3848  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3849  * dir/file.
3850  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3851  * not match for normal dir/file.
3852  */
3853 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3854                          struct btrfs_key *key, u64 index, char *name,
3855                          u32 namelen, u32 mode)
3856 {
3857         struct btrfs_path path;
3858         struct extent_buffer *node;
3859         struct btrfs_dir_item *di;
3860         struct btrfs_key location;
3861         char namebuf[BTRFS_NAME_LEN] = {0};
3862         u32 total;
3863         u32 cur = 0;
3864         u32 len;
3865         u32 name_len;
3866         u32 data_len;
3867         u8 filetype;
3868         int slot;
3869         int ret;
3870
3871         btrfs_init_path(&path);
3872         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3873         if (ret < 0) {
3874                 ret = DIR_ITEM_MISSING;
3875                 goto out;
3876         }
3877
3878         /* Process root dir and goto out*/
3879         if (index == 0) {
3880                 if (ret == 0) {
3881                         ret = ROOT_DIR_ERROR;
3882                         error(
3883                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3884                                 root->objectid,
3885                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3886                                         "REF" : "EXTREF",
3887                                 ref_key->objectid, ref_key->offset,
3888                                 key->type == BTRFS_DIR_ITEM_KEY ?
3889                                         "DIR_ITEM" : "DIR_INDEX");
3890                 } else {
3891                         ret = 0;
3892                 }
3893
3894                 goto out;
3895         }
3896
3897         /* Process normal file/dir */
3898         if (ret > 0) {
3899                 ret = DIR_ITEM_MISSING;
3900                 error(
3901                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3902                         root->objectid,
3903                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3904                         ref_key->objectid, ref_key->offset,
3905                         key->type == BTRFS_DIR_ITEM_KEY ?
3906                                 "DIR_ITEM" : "DIR_INDEX",
3907                         key->objectid, key->offset, namelen, name,
3908                         imode_to_type(mode));
3909                 goto out;
3910         }
3911
3912         /* Check whether inode_id/filetype/name match */
3913         node = path.nodes[0];
3914         slot = path.slots[0];
3915         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3916         total = btrfs_item_size_nr(node, slot);
3917         while (cur < total) {
3918                 ret = DIR_ITEM_MISMATCH;
3919                 name_len = btrfs_dir_name_len(node, di);
3920                 data_len = btrfs_dir_data_len(node, di);
3921
3922                 btrfs_dir_item_key_to_cpu(node, di, &location);
3923                 if (location.objectid != ref_key->objectid ||
3924                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3925                     location.offset != 0)
3926                         goto next;
3927
3928                 filetype = btrfs_dir_type(node, di);
3929                 if (imode_to_type(mode) != filetype)
3930                         goto next;
3931
3932                 if (name_len <= BTRFS_NAME_LEN) {
3933                         len = name_len;
3934                 } else {
3935                         len = BTRFS_NAME_LEN;
3936                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3937                         root->objectid,
3938                         key->type == BTRFS_DIR_ITEM_KEY ?
3939                         "DIR_ITEM" : "DIR_INDEX",
3940                         key->objectid, key->offset, name_len);
3941                 }
3942                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3943                 if (len != namelen || strncmp(namebuf, name, len))
3944                         goto next;
3945
3946                 ret = 0;
3947                 goto out;
3948 next:
3949                 len = sizeof(*di) + name_len + data_len;
3950                 di = (struct btrfs_dir_item *)((char *)di + len);
3951                 cur += len;
3952         }
3953         if (ret == DIR_ITEM_MISMATCH)
3954                 error(
3955                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3956                         root->objectid,
3957                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3958                         ref_key->objectid, ref_key->offset,
3959                         key->type == BTRFS_DIR_ITEM_KEY ?
3960                                 "DIR_ITEM" : "DIR_INDEX",
3961                         key->objectid, key->offset, namelen, name,
3962                         imode_to_type(mode));
3963 out:
3964         btrfs_release_path(&path);
3965         return ret;
3966 }
3967
3968 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3969 {
3970         struct list_head *cur = rec->backrefs.next;
3971         struct extent_backref *back;
3972         struct tree_backref *tback;
3973         struct data_backref *dback;
3974         u64 found = 0;
3975         int err = 0;
3976
3977         while(cur != &rec->backrefs) {
3978                 back = to_extent_backref(cur);
3979                 cur = cur->next;
3980                 if (!back->found_extent_tree) {
3981                         err = 1;
3982                         if (!print_errs)
3983                                 goto out;
3984                         if (back->is_data) {
3985                                 dback = to_data_backref(back);
3986                                 fprintf(stderr, "Backref %llu %s %llu"
3987                                         " owner %llu offset %llu num_refs %lu"
3988                                         " not found in extent tree\n",
3989                                         (unsigned long long)rec->start,
3990                                         back->full_backref ?
3991                                         "parent" : "root",
3992                                         back->full_backref ?
3993                                         (unsigned long long)dback->parent:
3994                                         (unsigned long long)dback->root,
3995                                         (unsigned long long)dback->owner,
3996                                         (unsigned long long)dback->offset,
3997                                         (unsigned long)dback->num_refs);
3998                         } else {
3999                                 tback = to_tree_backref(back);
4000                                 fprintf(stderr, "Backref %llu parent %llu"
4001                                         " root %llu not found in extent tree\n",
4002                                         (unsigned long long)rec->start,
4003                                         (unsigned long long)tback->parent,
4004                                         (unsigned long long)tback->root);
4005                         }
4006                 }
4007                 if (!back->is_data && !back->found_ref) {
4008                         err = 1;
4009                         if (!print_errs)
4010                                 goto out;
4011                         tback = to_tree_backref(back);
4012                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4013                                 (unsigned long long)rec->start,
4014                                 back->full_backref ? "parent" : "root",
4015                                 back->full_backref ?
4016                                 (unsigned long long)tback->parent :
4017                                 (unsigned long long)tback->root, back);
4018                 }
4019                 if (back->is_data) {
4020                         dback = to_data_backref(back);
4021                         if (dback->found_ref != dback->num_refs) {
4022                                 err = 1;
4023                                 if (!print_errs)
4024                                         goto out;
4025                                 fprintf(stderr, "Incorrect local backref count"
4026                                         " on %llu %s %llu owner %llu"
4027                                         " offset %llu found %u wanted %u back %p\n",
4028                                         (unsigned long long)rec->start,
4029                                         back->full_backref ?
4030                                         "parent" : "root",
4031                                         back->full_backref ?
4032                                         (unsigned long long)dback->parent:
4033                                         (unsigned long long)dback->root,
4034                                         (unsigned long long)dback->owner,
4035                                         (unsigned long long)dback->offset,
4036                                         dback->found_ref, dback->num_refs, back);
4037                         }
4038                         if (dback->disk_bytenr != rec->start) {
4039                                 err = 1;
4040                                 if (!print_errs)
4041                                         goto out;
4042                                 fprintf(stderr, "Backref disk bytenr does not"
4043                                         " match extent record, bytenr=%llu, "
4044                                         "ref bytenr=%llu\n",
4045                                         (unsigned long long)rec->start,
4046                                         (unsigned long long)dback->disk_bytenr);
4047                         }
4048
4049                         if (dback->bytes != rec->nr) {
4050                                 err = 1;
4051                                 if (!print_errs)
4052                                         goto out;
4053                                 fprintf(stderr, "Backref bytes do not match "
4054                                         "extent backref, bytenr=%llu, ref "
4055                                         "bytes=%llu, backref bytes=%llu\n",
4056                                         (unsigned long long)rec->start,
4057                                         (unsigned long long)rec->nr,
4058                                         (unsigned long long)dback->bytes);
4059                         }
4060                 }
4061                 if (!back->is_data) {
4062                         found += 1;
4063                 } else {
4064                         dback = to_data_backref(back);
4065                         found += dback->found_ref;
4066                 }
4067         }
4068         if (found != rec->refs) {
4069                 err = 1;
4070                 if (!print_errs)
4071                         goto out;
4072                 fprintf(stderr, "Incorrect global backref count "
4073                         "on %llu found %llu wanted %llu\n",
4074                         (unsigned long long)rec->start,
4075                         (unsigned long long)found,
4076                         (unsigned long long)rec->refs);
4077         }
4078 out:
4079         return err;
4080 }
4081
4082 static int free_all_extent_backrefs(struct extent_record *rec)
4083 {
4084         struct extent_backref *back;
4085         struct list_head *cur;
4086         while (!list_empty(&rec->backrefs)) {
4087                 cur = rec->backrefs.next;
4088                 back = to_extent_backref(cur);
4089                 list_del(cur);
4090                 free(back);
4091         }
4092         return 0;
4093 }
4094
4095 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4096                                      struct cache_tree *extent_cache)
4097 {
4098         struct cache_extent *cache;
4099         struct extent_record *rec;
4100
4101         while (1) {
4102                 cache = first_cache_extent(extent_cache);
4103                 if (!cache)
4104                         break;
4105                 rec = container_of(cache, struct extent_record, cache);
4106                 remove_cache_extent(extent_cache, cache);
4107                 free_all_extent_backrefs(rec);
4108                 free(rec);
4109         }
4110 }
4111
4112 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4113                                  struct extent_record *rec)
4114 {
4115         if (rec->content_checked && rec->owner_ref_checked &&
4116             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4117             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4118             !rec->bad_full_backref && !rec->crossing_stripes &&
4119             !rec->wrong_chunk_type) {
4120                 remove_cache_extent(extent_cache, &rec->cache);
4121                 free_all_extent_backrefs(rec);
4122                 list_del_init(&rec->list);
4123                 free(rec);
4124         }
4125         return 0;
4126 }
4127
4128 static int check_owner_ref(struct btrfs_root *root,
4129                             struct extent_record *rec,
4130                             struct extent_buffer *buf)
4131 {
4132         struct extent_backref *node;
4133         struct tree_backref *back;
4134         struct btrfs_root *ref_root;
4135         struct btrfs_key key;
4136         struct btrfs_path path;
4137         struct extent_buffer *parent;
4138         int level;
4139         int found = 0;
4140         int ret;
4141
4142         list_for_each_entry(node, &rec->backrefs, list) {
4143                 if (node->is_data)
4144                         continue;
4145                 if (!node->found_ref)
4146                         continue;
4147                 if (node->full_backref)
4148                         continue;
4149                 back = to_tree_backref(node);
4150                 if (btrfs_header_owner(buf) == back->root)
4151                         return 0;
4152         }
4153         BUG_ON(rec->is_root);
4154
4155         /* try to find the block by search corresponding fs tree */
4156         key.objectid = btrfs_header_owner(buf);
4157         key.type = BTRFS_ROOT_ITEM_KEY;
4158         key.offset = (u64)-1;
4159
4160         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4161         if (IS_ERR(ref_root))
4162                 return 1;
4163
4164         level = btrfs_header_level(buf);
4165         if (level == 0)
4166                 btrfs_item_key_to_cpu(buf, &key, 0);
4167         else
4168                 btrfs_node_key_to_cpu(buf, &key, 0);
4169
4170         btrfs_init_path(&path);
4171         path.lowest_level = level + 1;
4172         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4173         if (ret < 0)
4174                 return 0;
4175
4176         parent = path.nodes[level + 1];
4177         if (parent && buf->start == btrfs_node_blockptr(parent,
4178                                                         path.slots[level + 1]))
4179                 found = 1;
4180
4181         btrfs_release_path(&path);
4182         return found ? 0 : 1;
4183 }
4184
4185 static int is_extent_tree_record(struct extent_record *rec)
4186 {
4187         struct list_head *cur = rec->backrefs.next;
4188         struct extent_backref *node;
4189         struct tree_backref *back;
4190         int is_extent = 0;
4191
4192         while(cur != &rec->backrefs) {
4193                 node = to_extent_backref(cur);
4194                 cur = cur->next;
4195                 if (node->is_data)
4196                         return 0;
4197                 back = to_tree_backref(node);
4198                 if (node->full_backref)
4199                         return 0;
4200                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4201                         is_extent = 1;
4202         }
4203         return is_extent;
4204 }
4205
4206
4207 static int record_bad_block_io(struct btrfs_fs_info *info,
4208                                struct cache_tree *extent_cache,
4209                                u64 start, u64 len)
4210 {
4211         struct extent_record *rec;
4212         struct cache_extent *cache;
4213         struct btrfs_key key;
4214
4215         cache = lookup_cache_extent(extent_cache, start, len);
4216         if (!cache)
4217                 return 0;
4218
4219         rec = container_of(cache, struct extent_record, cache);
4220         if (!is_extent_tree_record(rec))
4221                 return 0;
4222
4223         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4224         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4225 }
4226
4227 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4228                        struct extent_buffer *buf, int slot)
4229 {
4230         if (btrfs_header_level(buf)) {
4231                 struct btrfs_key_ptr ptr1, ptr2;
4232
4233                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4234                                    sizeof(struct btrfs_key_ptr));
4235                 read_extent_buffer(buf, &ptr2,
4236                                    btrfs_node_key_ptr_offset(slot + 1),
4237                                    sizeof(struct btrfs_key_ptr));
4238                 write_extent_buffer(buf, &ptr1,
4239                                     btrfs_node_key_ptr_offset(slot + 1),
4240                                     sizeof(struct btrfs_key_ptr));
4241                 write_extent_buffer(buf, &ptr2,
4242                                     btrfs_node_key_ptr_offset(slot),
4243                                     sizeof(struct btrfs_key_ptr));
4244                 if (slot == 0) {
4245                         struct btrfs_disk_key key;
4246                         btrfs_node_key(buf, &key, 0);
4247                         btrfs_fixup_low_keys(root, path, &key,
4248                                              btrfs_header_level(buf) + 1);
4249                 }
4250         } else {
4251                 struct btrfs_item *item1, *item2;
4252                 struct btrfs_key k1, k2;
4253                 char *item1_data, *item2_data;
4254                 u32 item1_offset, item2_offset, item1_size, item2_size;
4255
4256                 item1 = btrfs_item_nr(slot);
4257                 item2 = btrfs_item_nr(slot + 1);
4258                 btrfs_item_key_to_cpu(buf, &k1, slot);
4259                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4260                 item1_offset = btrfs_item_offset(buf, item1);
4261                 item2_offset = btrfs_item_offset(buf, item2);
4262                 item1_size = btrfs_item_size(buf, item1);
4263                 item2_size = btrfs_item_size(buf, item2);
4264
4265                 item1_data = malloc(item1_size);
4266                 if (!item1_data)
4267                         return -ENOMEM;
4268                 item2_data = malloc(item2_size);
4269                 if (!item2_data) {
4270                         free(item1_data);
4271                         return -ENOMEM;
4272                 }
4273
4274                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4275                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4276
4277                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4278                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4279                 free(item1_data);
4280                 free(item2_data);
4281
4282                 btrfs_set_item_offset(buf, item1, item2_offset);
4283                 btrfs_set_item_offset(buf, item2, item1_offset);
4284                 btrfs_set_item_size(buf, item1, item2_size);
4285                 btrfs_set_item_size(buf, item2, item1_size);
4286
4287                 path->slots[0] = slot;
4288                 btrfs_set_item_key_unsafe(root, path, &k2);
4289                 path->slots[0] = slot + 1;
4290                 btrfs_set_item_key_unsafe(root, path, &k1);
4291         }
4292         return 0;
4293 }
4294
4295 static int fix_key_order(struct btrfs_trans_handle *trans,
4296                          struct btrfs_root *root,
4297                          struct btrfs_path *path)
4298 {
4299         struct extent_buffer *buf;
4300         struct btrfs_key k1, k2;
4301         int i;
4302         int level = path->lowest_level;
4303         int ret = -EIO;
4304
4305         buf = path->nodes[level];
4306         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4307                 if (level) {
4308                         btrfs_node_key_to_cpu(buf, &k1, i);
4309                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4310                 } else {
4311                         btrfs_item_key_to_cpu(buf, &k1, i);
4312                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4313                 }
4314                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4315                         continue;
4316                 ret = swap_values(root, path, buf, i);
4317                 if (ret)
4318                         break;
4319                 btrfs_mark_buffer_dirty(buf);
4320                 i = 0;
4321         }
4322         return ret;
4323 }
4324
4325 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4326                              struct btrfs_root *root,
4327                              struct btrfs_path *path,
4328                              struct extent_buffer *buf, int slot)
4329 {
4330         struct btrfs_key key;
4331         int nritems = btrfs_header_nritems(buf);
4332
4333         btrfs_item_key_to_cpu(buf, &key, slot);
4334
4335         /* These are all the keys we can deal with missing. */
4336         if (key.type != BTRFS_DIR_INDEX_KEY &&
4337             key.type != BTRFS_EXTENT_ITEM_KEY &&
4338             key.type != BTRFS_METADATA_ITEM_KEY &&
4339             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4340             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4341                 return -1;
4342
4343         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4344                (unsigned long long)key.objectid, key.type,
4345                (unsigned long long)key.offset, slot, buf->start);
4346         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4347                               btrfs_item_nr_offset(slot + 1),
4348                               sizeof(struct btrfs_item) *
4349                               (nritems - slot - 1));
4350         btrfs_set_header_nritems(buf, nritems - 1);
4351         if (slot == 0) {
4352                 struct btrfs_disk_key disk_key;
4353
4354                 btrfs_item_key(buf, &disk_key, 0);
4355                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4356         }
4357         btrfs_mark_buffer_dirty(buf);
4358         return 0;
4359 }
4360
4361 static int fix_item_offset(struct btrfs_trans_handle *trans,
4362                            struct btrfs_root *root,
4363                            struct btrfs_path *path)
4364 {
4365         struct extent_buffer *buf;
4366         int i;
4367         int ret = 0;
4368
4369         /* We should only get this for leaves */
4370         BUG_ON(path->lowest_level);
4371         buf = path->nodes[0];
4372 again:
4373         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4374                 unsigned int shift = 0, offset;
4375
4376                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4377                     BTRFS_LEAF_DATA_SIZE(root)) {
4378                         if (btrfs_item_end_nr(buf, i) >
4379                             BTRFS_LEAF_DATA_SIZE(root)) {
4380                                 ret = delete_bogus_item(trans, root, path,
4381                                                         buf, i);
4382                                 if (!ret)
4383                                         goto again;
4384                                 fprintf(stderr, "item is off the end of the "
4385                                         "leaf, can't fix\n");
4386                                 ret = -EIO;
4387                                 break;
4388                         }
4389                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4390                                 btrfs_item_end_nr(buf, i);
4391                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4392                            btrfs_item_offset_nr(buf, i - 1)) {
4393                         if (btrfs_item_end_nr(buf, i) >
4394                             btrfs_item_offset_nr(buf, i - 1)) {
4395                                 ret = delete_bogus_item(trans, root, path,
4396                                                         buf, i);
4397                                 if (!ret)
4398                                         goto again;
4399                                 fprintf(stderr, "items overlap, can't fix\n");
4400                                 ret = -EIO;
4401                                 break;
4402                         }
4403                         shift = btrfs_item_offset_nr(buf, i - 1) -
4404                                 btrfs_item_end_nr(buf, i);
4405                 }
4406                 if (!shift)
4407                         continue;
4408
4409                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4410                        i, shift, (unsigned long long)buf->start);
4411                 offset = btrfs_item_offset_nr(buf, i);
4412                 memmove_extent_buffer(buf,
4413                                       btrfs_leaf_data(buf) + offset + shift,
4414                                       btrfs_leaf_data(buf) + offset,
4415                                       btrfs_item_size_nr(buf, i));
4416                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4417                                       offset + shift);
4418                 btrfs_mark_buffer_dirty(buf);
4419         }
4420
4421         /*
4422          * We may have moved things, in which case we want to exit so we don't
4423          * write those changes out.  Once we have proper abort functionality in
4424          * progs this can be changed to something nicer.
4425          */
4426         BUG_ON(ret);
4427         return ret;
4428 }
4429
4430 /*
4431  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4432  * then just return -EIO.
4433  */
4434 static int try_to_fix_bad_block(struct btrfs_root *root,
4435                                 struct extent_buffer *buf,
4436                                 enum btrfs_tree_block_status status)
4437 {
4438         struct btrfs_trans_handle *trans;
4439         struct ulist *roots;
4440         struct ulist_node *node;
4441         struct btrfs_root *search_root;
4442         struct btrfs_path path;
4443         struct ulist_iterator iter;
4444         struct btrfs_key root_key, key;
4445         int ret;
4446
4447         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4448             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4449                 return -EIO;
4450
4451         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4452         if (ret)
4453                 return -EIO;
4454
4455         btrfs_init_path(&path);
4456         ULIST_ITER_INIT(&iter);
4457         while ((node = ulist_next(roots, &iter))) {
4458                 root_key.objectid = node->val;
4459                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4460                 root_key.offset = (u64)-1;
4461
4462                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4463                 if (IS_ERR(root)) {
4464                         ret = -EIO;
4465                         break;
4466                 }
4467
4468
4469                 trans = btrfs_start_transaction(search_root, 0);
4470                 if (IS_ERR(trans)) {
4471                         ret = PTR_ERR(trans);
4472                         break;
4473                 }
4474
4475                 path.lowest_level = btrfs_header_level(buf);
4476                 path.skip_check_block = 1;
4477                 if (path.lowest_level)
4478                         btrfs_node_key_to_cpu(buf, &key, 0);
4479                 else
4480                         btrfs_item_key_to_cpu(buf, &key, 0);
4481                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4482                 if (ret) {
4483                         ret = -EIO;
4484                         btrfs_commit_transaction(trans, search_root);
4485                         break;
4486                 }
4487                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4488                         ret = fix_key_order(trans, search_root, &path);
4489                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4490                         ret = fix_item_offset(trans, search_root, &path);
4491                 if (ret) {
4492                         btrfs_commit_transaction(trans, search_root);
4493                         break;
4494                 }
4495                 btrfs_release_path(&path);
4496                 btrfs_commit_transaction(trans, search_root);
4497         }
4498         ulist_free(roots);
4499         btrfs_release_path(&path);
4500         return ret;
4501 }
4502
4503 static int check_block(struct btrfs_root *root,
4504                        struct cache_tree *extent_cache,
4505                        struct extent_buffer *buf, u64 flags)
4506 {
4507         struct extent_record *rec;
4508         struct cache_extent *cache;
4509         struct btrfs_key key;
4510         enum btrfs_tree_block_status status;
4511         int ret = 0;
4512         int level;
4513
4514         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4515         if (!cache)
4516                 return 1;
4517         rec = container_of(cache, struct extent_record, cache);
4518         rec->generation = btrfs_header_generation(buf);
4519
4520         level = btrfs_header_level(buf);
4521         if (btrfs_header_nritems(buf) > 0) {
4522
4523                 if (level == 0)
4524                         btrfs_item_key_to_cpu(buf, &key, 0);
4525                 else
4526                         btrfs_node_key_to_cpu(buf, &key, 0);
4527
4528                 rec->info_objectid = key.objectid;
4529         }
4530         rec->info_level = level;
4531
4532         if (btrfs_is_leaf(buf))
4533                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4534         else
4535                 status = btrfs_check_node(root, &rec->parent_key, buf);
4536
4537         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4538                 if (repair)
4539                         status = try_to_fix_bad_block(root, buf, status);
4540                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4541                         ret = -EIO;
4542                         fprintf(stderr, "bad block %llu\n",
4543                                 (unsigned long long)buf->start);
4544                 } else {
4545                         /*
4546                          * Signal to callers we need to start the scan over
4547                          * again since we'll have cowed blocks.
4548                          */
4549                         ret = -EAGAIN;
4550                 }
4551         } else {
4552                 rec->content_checked = 1;
4553                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4554                         rec->owner_ref_checked = 1;
4555                 else {
4556                         ret = check_owner_ref(root, rec, buf);
4557                         if (!ret)
4558                                 rec->owner_ref_checked = 1;
4559                 }
4560         }
4561         if (!ret)
4562                 maybe_free_extent_rec(extent_cache, rec);
4563         return ret;
4564 }
4565
4566 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4567                                                 u64 parent, u64 root)
4568 {
4569         struct list_head *cur = rec->backrefs.next;
4570         struct extent_backref *node;
4571         struct tree_backref *back;
4572
4573         while(cur != &rec->backrefs) {
4574                 node = to_extent_backref(cur);
4575                 cur = cur->next;
4576                 if (node->is_data)
4577                         continue;
4578                 back = to_tree_backref(node);
4579                 if (parent > 0) {
4580                         if (!node->full_backref)
4581                                 continue;
4582                         if (parent == back->parent)
4583                                 return back;
4584                 } else {
4585                         if (node->full_backref)
4586                                 continue;
4587                         if (back->root == root)
4588                                 return back;
4589                 }
4590         }
4591         return NULL;
4592 }
4593
4594 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4595                                                 u64 parent, u64 root)
4596 {
4597         struct tree_backref *ref = malloc(sizeof(*ref));
4598
4599         if (!ref)
4600                 return NULL;
4601         memset(&ref->node, 0, sizeof(ref->node));
4602         if (parent > 0) {
4603                 ref->parent = parent;
4604                 ref->node.full_backref = 1;
4605         } else {
4606                 ref->root = root;
4607                 ref->node.full_backref = 0;
4608         }
4609         list_add_tail(&ref->node.list, &rec->backrefs);
4610
4611         return ref;
4612 }
4613
4614 static struct data_backref *find_data_backref(struct extent_record *rec,
4615                                                 u64 parent, u64 root,
4616                                                 u64 owner, u64 offset,
4617                                                 int found_ref,
4618                                                 u64 disk_bytenr, u64 bytes)
4619 {
4620         struct list_head *cur = rec->backrefs.next;
4621         struct extent_backref *node;
4622         struct data_backref *back;
4623
4624         while(cur != &rec->backrefs) {
4625                 node = to_extent_backref(cur);
4626                 cur = cur->next;
4627                 if (!node->is_data)
4628                         continue;
4629                 back = to_data_backref(node);
4630                 if (parent > 0) {
4631                         if (!node->full_backref)
4632                                 continue;
4633                         if (parent == back->parent)
4634                                 return back;
4635                 } else {
4636                         if (node->full_backref)
4637                                 continue;
4638                         if (back->root == root && back->owner == owner &&
4639                             back->offset == offset) {
4640                                 if (found_ref && node->found_ref &&
4641                                     (back->bytes != bytes ||
4642                                     back->disk_bytenr != disk_bytenr))
4643                                         continue;
4644                                 return back;
4645                         }
4646                 }
4647         }
4648         return NULL;
4649 }
4650
4651 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4652                                                 u64 parent, u64 root,
4653                                                 u64 owner, u64 offset,
4654                                                 u64 max_size)
4655 {
4656         struct data_backref *ref = malloc(sizeof(*ref));
4657
4658         if (!ref)
4659                 return NULL;
4660         memset(&ref->node, 0, sizeof(ref->node));
4661         ref->node.is_data = 1;
4662
4663         if (parent > 0) {
4664                 ref->parent = parent;
4665                 ref->owner = 0;
4666                 ref->offset = 0;
4667                 ref->node.full_backref = 1;
4668         } else {
4669                 ref->root = root;
4670                 ref->owner = owner;
4671                 ref->offset = offset;
4672                 ref->node.full_backref = 0;
4673         }
4674         ref->bytes = max_size;
4675         ref->found_ref = 0;
4676         ref->num_refs = 0;
4677         list_add_tail(&ref->node.list, &rec->backrefs);
4678         if (max_size > rec->max_size)
4679                 rec->max_size = max_size;
4680         return ref;
4681 }
4682
4683 /* Check if the type of extent matches with its chunk */
4684 static void check_extent_type(struct extent_record *rec)
4685 {
4686         struct btrfs_block_group_cache *bg_cache;
4687
4688         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4689         if (!bg_cache)
4690                 return;
4691
4692         /* data extent, check chunk directly*/
4693         if (!rec->metadata) {
4694                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4695                         rec->wrong_chunk_type = 1;
4696                 return;
4697         }
4698
4699         /* metadata extent, check the obvious case first */
4700         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4701                                  BTRFS_BLOCK_GROUP_METADATA))) {
4702                 rec->wrong_chunk_type = 1;
4703                 return;
4704         }
4705
4706         /*
4707          * Check SYSTEM extent, as it's also marked as metadata, we can only
4708          * make sure it's a SYSTEM extent by its backref
4709          */
4710         if (!list_empty(&rec->backrefs)) {
4711                 struct extent_backref *node;
4712                 struct tree_backref *tback;
4713                 u64 bg_type;
4714
4715                 node = to_extent_backref(rec->backrefs.next);
4716                 if (node->is_data) {
4717                         /* tree block shouldn't have data backref */
4718                         rec->wrong_chunk_type = 1;
4719                         return;
4720                 }
4721                 tback = container_of(node, struct tree_backref, node);
4722
4723                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4724                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4725                 else
4726                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4727                 if (!(bg_cache->flags & bg_type))
4728                         rec->wrong_chunk_type = 1;
4729         }
4730 }
4731
4732 /*
4733  * Allocate a new extent record, fill default values from @tmpl and insert int
4734  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4735  * the cache, otherwise it fails.
4736  */
4737 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4738                 struct extent_record *tmpl)
4739 {
4740         struct extent_record *rec;
4741         int ret = 0;
4742
4743         rec = malloc(sizeof(*rec));
4744         if (!rec)
4745                 return -ENOMEM;
4746         rec->start = tmpl->start;
4747         rec->max_size = tmpl->max_size;
4748         rec->nr = max(tmpl->nr, tmpl->max_size);
4749         rec->found_rec = tmpl->found_rec;
4750         rec->content_checked = tmpl->content_checked;
4751         rec->owner_ref_checked = tmpl->owner_ref_checked;
4752         rec->num_duplicates = 0;
4753         rec->metadata = tmpl->metadata;
4754         rec->flag_block_full_backref = FLAG_UNSET;
4755         rec->bad_full_backref = 0;
4756         rec->crossing_stripes = 0;
4757         rec->wrong_chunk_type = 0;
4758         rec->is_root = tmpl->is_root;
4759         rec->refs = tmpl->refs;
4760         rec->extent_item_refs = tmpl->extent_item_refs;
4761         rec->parent_generation = tmpl->parent_generation;
4762         INIT_LIST_HEAD(&rec->backrefs);
4763         INIT_LIST_HEAD(&rec->dups);
4764         INIT_LIST_HEAD(&rec->list);
4765         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4766         rec->cache.start = tmpl->start;
4767         rec->cache.size = tmpl->nr;
4768         ret = insert_cache_extent(extent_cache, &rec->cache);
4769         if (ret) {
4770                 free(rec);
4771                 return ret;
4772         }
4773         bytes_used += rec->nr;
4774
4775         if (tmpl->metadata)
4776                 rec->crossing_stripes = check_crossing_stripes(global_info,
4777                                 rec->start, global_info->tree_root->nodesize);
4778         check_extent_type(rec);
4779         return ret;
4780 }
4781
4782 /*
4783  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4784  * some are hints:
4785  * - refs              - if found, increase refs
4786  * - is_root           - if found, set
4787  * - content_checked   - if found, set
4788  * - owner_ref_checked - if found, set
4789  *
4790  * If not found, create a new one, initialize and insert.
4791  */
4792 static int add_extent_rec(struct cache_tree *extent_cache,
4793                 struct extent_record *tmpl)
4794 {
4795         struct extent_record *rec;
4796         struct cache_extent *cache;
4797         int ret = 0;
4798         int dup = 0;
4799
4800         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4801         if (cache) {
4802                 rec = container_of(cache, struct extent_record, cache);
4803                 if (tmpl->refs)
4804                         rec->refs++;
4805                 if (rec->nr == 1)
4806                         rec->nr = max(tmpl->nr, tmpl->max_size);
4807
4808                 /*
4809                  * We need to make sure to reset nr to whatever the extent
4810                  * record says was the real size, this way we can compare it to
4811                  * the backrefs.
4812                  */
4813                 if (tmpl->found_rec) {
4814                         if (tmpl->start != rec->start || rec->found_rec) {
4815                                 struct extent_record *tmp;
4816
4817                                 dup = 1;
4818                                 if (list_empty(&rec->list))
4819                                         list_add_tail(&rec->list,
4820                                                       &duplicate_extents);
4821
4822                                 /*
4823                                  * We have to do this song and dance in case we
4824                                  * find an extent record that falls inside of
4825                                  * our current extent record but does not have
4826                                  * the same objectid.
4827                                  */
4828                                 tmp = malloc(sizeof(*tmp));
4829                                 if (!tmp)
4830                                         return -ENOMEM;
4831                                 tmp->start = tmpl->start;
4832                                 tmp->max_size = tmpl->max_size;
4833                                 tmp->nr = tmpl->nr;
4834                                 tmp->found_rec = 1;
4835                                 tmp->metadata = tmpl->metadata;
4836                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4837                                 INIT_LIST_HEAD(&tmp->list);
4838                                 list_add_tail(&tmp->list, &rec->dups);
4839                                 rec->num_duplicates++;
4840                         } else {
4841                                 rec->nr = tmpl->nr;
4842                                 rec->found_rec = 1;
4843                         }
4844                 }
4845
4846                 if (tmpl->extent_item_refs && !dup) {
4847                         if (rec->extent_item_refs) {
4848                                 fprintf(stderr, "block %llu rec "
4849                                         "extent_item_refs %llu, passed %llu\n",
4850                                         (unsigned long long)tmpl->start,
4851                                         (unsigned long long)
4852                                                         rec->extent_item_refs,
4853                                         (unsigned long long)tmpl->extent_item_refs);
4854                         }
4855                         rec->extent_item_refs = tmpl->extent_item_refs;
4856                 }
4857                 if (tmpl->is_root)
4858                         rec->is_root = 1;
4859                 if (tmpl->content_checked)
4860                         rec->content_checked = 1;
4861                 if (tmpl->owner_ref_checked)
4862                         rec->owner_ref_checked = 1;
4863                 memcpy(&rec->parent_key, &tmpl->parent_key,
4864                                 sizeof(tmpl->parent_key));
4865                 if (tmpl->parent_generation)
4866                         rec->parent_generation = tmpl->parent_generation;
4867                 if (rec->max_size < tmpl->max_size)
4868                         rec->max_size = tmpl->max_size;
4869
4870                 /*
4871                  * A metadata extent can't cross stripe_len boundary, otherwise
4872                  * kernel scrub won't be able to handle it.
4873                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4874                  * it.
4875                  */
4876                 if (tmpl->metadata)
4877                         rec->crossing_stripes = check_crossing_stripes(
4878                                         global_info, rec->start,
4879                                         global_info->tree_root->nodesize);
4880                 check_extent_type(rec);
4881                 maybe_free_extent_rec(extent_cache, rec);
4882                 return ret;
4883         }
4884
4885         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4886
4887         return ret;
4888 }
4889
4890 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4891                             u64 parent, u64 root, int found_ref)
4892 {
4893         struct extent_record *rec;
4894         struct tree_backref *back;
4895         struct cache_extent *cache;
4896         int ret;
4897
4898         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4899         if (!cache) {
4900                 struct extent_record tmpl;
4901
4902                 memset(&tmpl, 0, sizeof(tmpl));
4903                 tmpl.start = bytenr;
4904                 tmpl.nr = 1;
4905                 tmpl.metadata = 1;
4906
4907                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4908                 if (ret)
4909                         return ret;
4910
4911                 /* really a bug in cache_extent implement now */
4912                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4913                 if (!cache)
4914                         return -ENOENT;
4915         }
4916
4917         rec = container_of(cache, struct extent_record, cache);
4918         if (rec->start != bytenr) {
4919                 /*
4920                  * Several cause, from unaligned bytenr to over lapping extents
4921                  */
4922                 return -EEXIST;
4923         }
4924
4925         back = find_tree_backref(rec, parent, root);
4926         if (!back) {
4927                 back = alloc_tree_backref(rec, parent, root);
4928                 if (!back)
4929                         return -ENOMEM;
4930         }
4931
4932         if (found_ref) {
4933                 if (back->node.found_ref) {
4934                         fprintf(stderr, "Extent back ref already exists "
4935                                 "for %llu parent %llu root %llu \n",
4936                                 (unsigned long long)bytenr,
4937                                 (unsigned long long)parent,
4938                                 (unsigned long long)root);
4939                 }
4940                 back->node.found_ref = 1;
4941         } else {
4942                 if (back->node.found_extent_tree) {
4943                         fprintf(stderr, "Extent back ref already exists "
4944                                 "for %llu parent %llu root %llu \n",
4945                                 (unsigned long long)bytenr,
4946                                 (unsigned long long)parent,
4947                                 (unsigned long long)root);
4948                 }
4949                 back->node.found_extent_tree = 1;
4950         }
4951         check_extent_type(rec);
4952         maybe_free_extent_rec(extent_cache, rec);
4953         return 0;
4954 }
4955
4956 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4957                             u64 parent, u64 root, u64 owner, u64 offset,
4958                             u32 num_refs, int found_ref, u64 max_size)
4959 {
4960         struct extent_record *rec;
4961         struct data_backref *back;
4962         struct cache_extent *cache;
4963         int ret;
4964
4965         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4966         if (!cache) {
4967                 struct extent_record tmpl;
4968
4969                 memset(&tmpl, 0, sizeof(tmpl));
4970                 tmpl.start = bytenr;
4971                 tmpl.nr = 1;
4972                 tmpl.max_size = max_size;
4973
4974                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4975                 if (ret)
4976                         return ret;
4977
4978                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4979                 if (!cache)
4980                         abort();
4981         }
4982
4983         rec = container_of(cache, struct extent_record, cache);
4984         if (rec->max_size < max_size)
4985                 rec->max_size = max_size;
4986
4987         /*
4988          * If found_ref is set then max_size is the real size and must match the
4989          * existing refs.  So if we have already found a ref then we need to
4990          * make sure that this ref matches the existing one, otherwise we need
4991          * to add a new backref so we can notice that the backrefs don't match
4992          * and we need to figure out who is telling the truth.  This is to
4993          * account for that awful fsync bug I introduced where we'd end up with
4994          * a btrfs_file_extent_item that would have its length include multiple
4995          * prealloc extents or point inside of a prealloc extent.
4996          */
4997         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4998                                  bytenr, max_size);
4999         if (!back) {
5000                 back = alloc_data_backref(rec, parent, root, owner, offset,
5001                                           max_size);
5002                 BUG_ON(!back);
5003         }
5004
5005         if (found_ref) {
5006                 BUG_ON(num_refs != 1);
5007                 if (back->node.found_ref)
5008                         BUG_ON(back->bytes != max_size);
5009                 back->node.found_ref = 1;
5010                 back->found_ref += 1;
5011                 back->bytes = max_size;
5012                 back->disk_bytenr = bytenr;
5013                 rec->refs += 1;
5014                 rec->content_checked = 1;
5015                 rec->owner_ref_checked = 1;
5016         } else {
5017                 if (back->node.found_extent_tree) {
5018                         fprintf(stderr, "Extent back ref already exists "
5019                                 "for %llu parent %llu root %llu "
5020                                 "owner %llu offset %llu num_refs %lu\n",
5021                                 (unsigned long long)bytenr,
5022                                 (unsigned long long)parent,
5023                                 (unsigned long long)root,
5024                                 (unsigned long long)owner,
5025                                 (unsigned long long)offset,
5026                                 (unsigned long)num_refs);
5027                 }
5028                 back->num_refs = num_refs;
5029                 back->node.found_extent_tree = 1;
5030         }
5031         maybe_free_extent_rec(extent_cache, rec);
5032         return 0;
5033 }
5034
5035 static int add_pending(struct cache_tree *pending,
5036                        struct cache_tree *seen, u64 bytenr, u32 size)
5037 {
5038         int ret;
5039         ret = add_cache_extent(seen, bytenr, size);
5040         if (ret)
5041                 return ret;
5042         add_cache_extent(pending, bytenr, size);
5043         return 0;
5044 }
5045
5046 static int pick_next_pending(struct cache_tree *pending,
5047                         struct cache_tree *reada,
5048                         struct cache_tree *nodes,
5049                         u64 last, struct block_info *bits, int bits_nr,
5050                         int *reada_bits)
5051 {
5052         unsigned long node_start = last;
5053         struct cache_extent *cache;
5054         int ret;
5055
5056         cache = search_cache_extent(reada, 0);
5057         if (cache) {
5058                 bits[0].start = cache->start;
5059                 bits[0].size = cache->size;
5060                 *reada_bits = 1;
5061                 return 1;
5062         }
5063         *reada_bits = 0;
5064         if (node_start > 32768)
5065                 node_start -= 32768;
5066
5067         cache = search_cache_extent(nodes, node_start);
5068         if (!cache)
5069                 cache = search_cache_extent(nodes, 0);
5070
5071         if (!cache) {
5072                  cache = search_cache_extent(pending, 0);
5073                  if (!cache)
5074                          return 0;
5075                  ret = 0;
5076                  do {
5077                          bits[ret].start = cache->start;
5078                          bits[ret].size = cache->size;
5079                          cache = next_cache_extent(cache);
5080                          ret++;
5081                  } while (cache && ret < bits_nr);
5082                  return ret;
5083         }
5084
5085         ret = 0;
5086         do {
5087                 bits[ret].start = cache->start;
5088                 bits[ret].size = cache->size;
5089                 cache = next_cache_extent(cache);
5090                 ret++;
5091         } while (cache && ret < bits_nr);
5092
5093         if (bits_nr - ret > 8) {
5094                 u64 lookup = bits[0].start + bits[0].size;
5095                 struct cache_extent *next;
5096                 next = search_cache_extent(pending, lookup);
5097                 while(next) {
5098                         if (next->start - lookup > 32768)
5099                                 break;
5100                         bits[ret].start = next->start;
5101                         bits[ret].size = next->size;
5102                         lookup = next->start + next->size;
5103                         ret++;
5104                         if (ret == bits_nr)
5105                                 break;
5106                         next = next_cache_extent(next);
5107                         if (!next)
5108                                 break;
5109                 }
5110         }
5111         return ret;
5112 }
5113
5114 static void free_chunk_record(struct cache_extent *cache)
5115 {
5116         struct chunk_record *rec;
5117
5118         rec = container_of(cache, struct chunk_record, cache);
5119         list_del_init(&rec->list);
5120         list_del_init(&rec->dextents);
5121         free(rec);
5122 }
5123
5124 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5125 {
5126         cache_tree_free_extents(chunk_cache, free_chunk_record);
5127 }
5128
5129 static void free_device_record(struct rb_node *node)
5130 {
5131         struct device_record *rec;
5132
5133         rec = container_of(node, struct device_record, node);
5134         free(rec);
5135 }
5136
5137 FREE_RB_BASED_TREE(device_cache, free_device_record);
5138
5139 int insert_block_group_record(struct block_group_tree *tree,
5140                               struct block_group_record *bg_rec)
5141 {
5142         int ret;
5143
5144         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5145         if (ret)
5146                 return ret;
5147
5148         list_add_tail(&bg_rec->list, &tree->block_groups);
5149         return 0;
5150 }
5151
5152 static void free_block_group_record(struct cache_extent *cache)
5153 {
5154         struct block_group_record *rec;
5155
5156         rec = container_of(cache, struct block_group_record, cache);
5157         list_del_init(&rec->list);
5158         free(rec);
5159 }
5160
5161 void free_block_group_tree(struct block_group_tree *tree)
5162 {
5163         cache_tree_free_extents(&tree->tree, free_block_group_record);
5164 }
5165
5166 int insert_device_extent_record(struct device_extent_tree *tree,
5167                                 struct device_extent_record *de_rec)
5168 {
5169         int ret;
5170
5171         /*
5172          * Device extent is a bit different from the other extents, because
5173          * the extents which belong to the different devices may have the
5174          * same start and size, so we need use the special extent cache
5175          * search/insert functions.
5176          */
5177         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5178         if (ret)
5179                 return ret;
5180
5181         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5182         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5183         return 0;
5184 }
5185
5186 static void free_device_extent_record(struct cache_extent *cache)
5187 {
5188         struct device_extent_record *rec;
5189
5190         rec = container_of(cache, struct device_extent_record, cache);
5191         if (!list_empty(&rec->chunk_list))
5192                 list_del_init(&rec->chunk_list);
5193         if (!list_empty(&rec->device_list))
5194                 list_del_init(&rec->device_list);
5195         free(rec);
5196 }
5197
5198 void free_device_extent_tree(struct device_extent_tree *tree)
5199 {
5200         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5201 }
5202
5203 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5204 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5205                                  struct extent_buffer *leaf, int slot)
5206 {
5207         struct btrfs_extent_ref_v0 *ref0;
5208         struct btrfs_key key;
5209         int ret;
5210
5211         btrfs_item_key_to_cpu(leaf, &key, slot);
5212         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5213         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5214                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5215                                 0, 0);
5216         } else {
5217                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5218                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5219         }
5220         return ret;
5221 }
5222 #endif
5223
5224 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5225                                             struct btrfs_key *key,
5226                                             int slot)
5227 {
5228         struct btrfs_chunk *ptr;
5229         struct chunk_record *rec;
5230         int num_stripes, i;
5231
5232         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5233         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5234
5235         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5236         if (!rec) {
5237                 fprintf(stderr, "memory allocation failed\n");
5238                 exit(-1);
5239         }
5240
5241         INIT_LIST_HEAD(&rec->list);
5242         INIT_LIST_HEAD(&rec->dextents);
5243         rec->bg_rec = NULL;
5244
5245         rec->cache.start = key->offset;
5246         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5247
5248         rec->generation = btrfs_header_generation(leaf);
5249
5250         rec->objectid = key->objectid;
5251         rec->type = key->type;
5252         rec->offset = key->offset;
5253
5254         rec->length = rec->cache.size;
5255         rec->owner = btrfs_chunk_owner(leaf, ptr);
5256         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5257         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5258         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5259         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5260         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5261         rec->num_stripes = num_stripes;
5262         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5263
5264         for (i = 0; i < rec->num_stripes; ++i) {
5265                 rec->stripes[i].devid =
5266                         btrfs_stripe_devid_nr(leaf, ptr, i);
5267                 rec->stripes[i].offset =
5268                         btrfs_stripe_offset_nr(leaf, ptr, i);
5269                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5270                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5271                                 BTRFS_UUID_SIZE);
5272         }
5273
5274         return rec;
5275 }
5276
5277 static int process_chunk_item(struct cache_tree *chunk_cache,
5278                               struct btrfs_key *key, struct extent_buffer *eb,
5279                               int slot)
5280 {
5281         struct chunk_record *rec;
5282         struct btrfs_chunk *chunk;
5283         int ret = 0;
5284
5285         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5286         /*
5287          * Do extra check for this chunk item,
5288          *
5289          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5290          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5291          * and owner<->key_type check.
5292          */
5293         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5294                                       key->offset);
5295         if (ret < 0) {
5296                 error("chunk(%llu, %llu) is not valid, ignore it",
5297                       key->offset, btrfs_chunk_length(eb, chunk));
5298                 return 0;
5299         }
5300         rec = btrfs_new_chunk_record(eb, key, slot);
5301         ret = insert_cache_extent(chunk_cache, &rec->cache);
5302         if (ret) {
5303                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5304                         rec->offset, rec->length);
5305                 free(rec);
5306         }
5307
5308         return ret;
5309 }
5310
5311 static int process_device_item(struct rb_root *dev_cache,
5312                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5313 {
5314         struct btrfs_dev_item *ptr;
5315         struct device_record *rec;
5316         int ret = 0;
5317
5318         ptr = btrfs_item_ptr(eb,
5319                 slot, struct btrfs_dev_item);
5320
5321         rec = malloc(sizeof(*rec));
5322         if (!rec) {
5323                 fprintf(stderr, "memory allocation failed\n");
5324                 return -ENOMEM;
5325         }
5326
5327         rec->devid = key->offset;
5328         rec->generation = btrfs_header_generation(eb);
5329
5330         rec->objectid = key->objectid;
5331         rec->type = key->type;
5332         rec->offset = key->offset;
5333
5334         rec->devid = btrfs_device_id(eb, ptr);
5335         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5336         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5337
5338         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5339         if (ret) {
5340                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5341                 free(rec);
5342         }
5343
5344         return ret;
5345 }
5346
5347 struct block_group_record *
5348 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5349                              int slot)
5350 {
5351         struct btrfs_block_group_item *ptr;
5352         struct block_group_record *rec;
5353
5354         rec = calloc(1, sizeof(*rec));
5355         if (!rec) {
5356                 fprintf(stderr, "memory allocation failed\n");
5357                 exit(-1);
5358         }
5359
5360         rec->cache.start = key->objectid;
5361         rec->cache.size = key->offset;
5362
5363         rec->generation = btrfs_header_generation(leaf);
5364
5365         rec->objectid = key->objectid;
5366         rec->type = key->type;
5367         rec->offset = key->offset;
5368
5369         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5370         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5371
5372         INIT_LIST_HEAD(&rec->list);
5373
5374         return rec;
5375 }
5376
5377 static int process_block_group_item(struct block_group_tree *block_group_cache,
5378                                     struct btrfs_key *key,
5379                                     struct extent_buffer *eb, int slot)
5380 {
5381         struct block_group_record *rec;
5382         int ret = 0;
5383
5384         rec = btrfs_new_block_group_record(eb, key, slot);
5385         ret = insert_block_group_record(block_group_cache, rec);
5386         if (ret) {
5387                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5388                         rec->objectid, rec->offset);
5389                 free(rec);
5390         }
5391
5392         return ret;
5393 }
5394
5395 struct device_extent_record *
5396 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5397                                struct btrfs_key *key, int slot)
5398 {
5399         struct device_extent_record *rec;
5400         struct btrfs_dev_extent *ptr;
5401
5402         rec = calloc(1, sizeof(*rec));
5403         if (!rec) {
5404                 fprintf(stderr, "memory allocation failed\n");
5405                 exit(-1);
5406         }
5407
5408         rec->cache.objectid = key->objectid;
5409         rec->cache.start = key->offset;
5410
5411         rec->generation = btrfs_header_generation(leaf);
5412
5413         rec->objectid = key->objectid;
5414         rec->type = key->type;
5415         rec->offset = key->offset;
5416
5417         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5418         rec->chunk_objecteid =
5419                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5420         rec->chunk_offset =
5421                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5422         rec->length = btrfs_dev_extent_length(leaf, ptr);
5423         rec->cache.size = rec->length;
5424
5425         INIT_LIST_HEAD(&rec->chunk_list);
5426         INIT_LIST_HEAD(&rec->device_list);
5427
5428         return rec;
5429 }
5430
5431 static int
5432 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5433                            struct btrfs_key *key, struct extent_buffer *eb,
5434                            int slot)
5435 {
5436         struct device_extent_record *rec;
5437         int ret;
5438
5439         rec = btrfs_new_device_extent_record(eb, key, slot);
5440         ret = insert_device_extent_record(dev_extent_cache, rec);
5441         if (ret) {
5442                 fprintf(stderr,
5443                         "Device extent[%llu, %llu, %llu] existed.\n",
5444                         rec->objectid, rec->offset, rec->length);
5445                 free(rec);
5446         }
5447
5448         return ret;
5449 }
5450
5451 static int process_extent_item(struct btrfs_root *root,
5452                                struct cache_tree *extent_cache,
5453                                struct extent_buffer *eb, int slot)
5454 {
5455         struct btrfs_extent_item *ei;
5456         struct btrfs_extent_inline_ref *iref;
5457         struct btrfs_extent_data_ref *dref;
5458         struct btrfs_shared_data_ref *sref;
5459         struct btrfs_key key;
5460         struct extent_record tmpl;
5461         unsigned long end;
5462         unsigned long ptr;
5463         int ret;
5464         int type;
5465         u32 item_size = btrfs_item_size_nr(eb, slot);
5466         u64 refs = 0;
5467         u64 offset;
5468         u64 num_bytes;
5469         int metadata = 0;
5470
5471         btrfs_item_key_to_cpu(eb, &key, slot);
5472
5473         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5474                 metadata = 1;
5475                 num_bytes = root->nodesize;
5476         } else {
5477                 num_bytes = key.offset;
5478         }
5479
5480         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5481                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5482                       key.objectid, root->sectorsize);
5483                 return -EIO;
5484         }
5485         if (item_size < sizeof(*ei)) {
5486 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5487                 struct btrfs_extent_item_v0 *ei0;
5488                 BUG_ON(item_size != sizeof(*ei0));
5489                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5490                 refs = btrfs_extent_refs_v0(eb, ei0);
5491 #else
5492                 BUG();
5493 #endif
5494                 memset(&tmpl, 0, sizeof(tmpl));
5495                 tmpl.start = key.objectid;
5496                 tmpl.nr = num_bytes;
5497                 tmpl.extent_item_refs = refs;
5498                 tmpl.metadata = metadata;
5499                 tmpl.found_rec = 1;
5500                 tmpl.max_size = num_bytes;
5501
5502                 return add_extent_rec(extent_cache, &tmpl);
5503         }
5504
5505         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5506         refs = btrfs_extent_refs(eb, ei);
5507         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5508                 metadata = 1;
5509         else
5510                 metadata = 0;
5511         if (metadata && num_bytes != root->nodesize) {
5512                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5513                       num_bytes, root->nodesize);
5514                 return -EIO;
5515         }
5516         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5517                 error("ignore invalid data extent, length %llu is not aligned to %u",
5518                       num_bytes, root->sectorsize);
5519                 return -EIO;
5520         }
5521
5522         memset(&tmpl, 0, sizeof(tmpl));
5523         tmpl.start = key.objectid;
5524         tmpl.nr = num_bytes;
5525         tmpl.extent_item_refs = refs;
5526         tmpl.metadata = metadata;
5527         tmpl.found_rec = 1;
5528         tmpl.max_size = num_bytes;
5529         add_extent_rec(extent_cache, &tmpl);
5530
5531         ptr = (unsigned long)(ei + 1);
5532         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5533             key.type == BTRFS_EXTENT_ITEM_KEY)
5534                 ptr += sizeof(struct btrfs_tree_block_info);
5535
5536         end = (unsigned long)ei + item_size;
5537         while (ptr < end) {
5538                 iref = (struct btrfs_extent_inline_ref *)ptr;
5539                 type = btrfs_extent_inline_ref_type(eb, iref);
5540                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5541                 switch (type) {
5542                 case BTRFS_TREE_BLOCK_REF_KEY:
5543                         ret = add_tree_backref(extent_cache, key.objectid,
5544                                         0, offset, 0);
5545                         if (ret < 0)
5546                                 error("add_tree_backref failed: %s",
5547                                       strerror(-ret));
5548                         break;
5549                 case BTRFS_SHARED_BLOCK_REF_KEY:
5550                         ret = add_tree_backref(extent_cache, key.objectid,
5551                                         offset, 0, 0);
5552                         if (ret < 0)
5553                                 error("add_tree_backref failed: %s",
5554                                       strerror(-ret));
5555                         break;
5556                 case BTRFS_EXTENT_DATA_REF_KEY:
5557                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5558                         add_data_backref(extent_cache, key.objectid, 0,
5559                                         btrfs_extent_data_ref_root(eb, dref),
5560                                         btrfs_extent_data_ref_objectid(eb,
5561                                                                        dref),
5562                                         btrfs_extent_data_ref_offset(eb, dref),
5563                                         btrfs_extent_data_ref_count(eb, dref),
5564                                         0, num_bytes);
5565                         break;
5566                 case BTRFS_SHARED_DATA_REF_KEY:
5567                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5568                         add_data_backref(extent_cache, key.objectid, offset,
5569                                         0, 0, 0,
5570                                         btrfs_shared_data_ref_count(eb, sref),
5571                                         0, num_bytes);
5572                         break;
5573                 default:
5574                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5575                                 key.objectid, key.type, num_bytes);
5576                         goto out;
5577                 }
5578                 ptr += btrfs_extent_inline_ref_size(type);
5579         }
5580         WARN_ON(ptr > end);
5581 out:
5582         return 0;
5583 }
5584
5585 static int check_cache_range(struct btrfs_root *root,
5586                              struct btrfs_block_group_cache *cache,
5587                              u64 offset, u64 bytes)
5588 {
5589         struct btrfs_free_space *entry;
5590         u64 *logical;
5591         u64 bytenr;
5592         int stripe_len;
5593         int i, nr, ret;
5594
5595         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5596                 bytenr = btrfs_sb_offset(i);
5597                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5598                                        cache->key.objectid, bytenr, 0,
5599                                        &logical, &nr, &stripe_len);
5600                 if (ret)
5601                         return ret;
5602
5603                 while (nr--) {
5604                         if (logical[nr] + stripe_len <= offset)
5605                                 continue;
5606                         if (offset + bytes <= logical[nr])
5607                                 continue;
5608                         if (logical[nr] == offset) {
5609                                 if (stripe_len >= bytes) {
5610                                         free(logical);
5611                                         return 0;
5612                                 }
5613                                 bytes -= stripe_len;
5614                                 offset += stripe_len;
5615                         } else if (logical[nr] < offset) {
5616                                 if (logical[nr] + stripe_len >=
5617                                     offset + bytes) {
5618                                         free(logical);
5619                                         return 0;
5620                                 }
5621                                 bytes = (offset + bytes) -
5622                                         (logical[nr] + stripe_len);
5623                                 offset = logical[nr] + stripe_len;
5624                         } else {
5625                                 /*
5626                                  * Could be tricky, the super may land in the
5627                                  * middle of the area we're checking.  First
5628                                  * check the easiest case, it's at the end.
5629                                  */
5630                                 if (logical[nr] + stripe_len >=
5631                                     bytes + offset) {
5632                                         bytes = logical[nr] - offset;
5633                                         continue;
5634                                 }
5635
5636                                 /* Check the left side */
5637                                 ret = check_cache_range(root, cache,
5638                                                         offset,
5639                                                         logical[nr] - offset);
5640                                 if (ret) {
5641                                         free(logical);
5642                                         return ret;
5643                                 }
5644
5645                                 /* Now we continue with the right side */
5646                                 bytes = (offset + bytes) -
5647                                         (logical[nr] + stripe_len);
5648                                 offset = logical[nr] + stripe_len;
5649                         }
5650                 }
5651
5652                 free(logical);
5653         }
5654
5655         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5656         if (!entry) {
5657                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5658                         offset, offset+bytes);
5659                 return -EINVAL;
5660         }
5661
5662         if (entry->offset != offset) {
5663                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5664                         entry->offset);
5665                 return -EINVAL;
5666         }
5667
5668         if (entry->bytes != bytes) {
5669                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5670                         bytes, entry->bytes, offset);
5671                 return -EINVAL;
5672         }
5673
5674         unlink_free_space(cache->free_space_ctl, entry);
5675         free(entry);
5676         return 0;
5677 }
5678
5679 static int verify_space_cache(struct btrfs_root *root,
5680                               struct btrfs_block_group_cache *cache)
5681 {
5682         struct btrfs_path path;
5683         struct extent_buffer *leaf;
5684         struct btrfs_key key;
5685         u64 last;
5686         int ret = 0;
5687
5688         root = root->fs_info->extent_root;
5689
5690         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5691
5692         btrfs_init_path(&path);
5693         key.objectid = last;
5694         key.offset = 0;
5695         key.type = BTRFS_EXTENT_ITEM_KEY;
5696         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5697         if (ret < 0)
5698                 goto out;
5699         ret = 0;
5700         while (1) {
5701                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5702                         ret = btrfs_next_leaf(root, &path);
5703                         if (ret < 0)
5704                                 goto out;
5705                         if (ret > 0) {
5706                                 ret = 0;
5707                                 break;
5708                         }
5709                 }
5710                 leaf = path.nodes[0];
5711                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5712                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5713                         break;
5714                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5715                     key.type != BTRFS_METADATA_ITEM_KEY) {
5716                         path.slots[0]++;
5717                         continue;
5718                 }
5719
5720                 if (last == key.objectid) {
5721                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5722                                 last = key.objectid + key.offset;
5723                         else
5724                                 last = key.objectid + root->nodesize;
5725                         path.slots[0]++;
5726                         continue;
5727                 }
5728
5729                 ret = check_cache_range(root, cache, last,
5730                                         key.objectid - last);
5731                 if (ret)
5732                         break;
5733                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5734                         last = key.objectid + key.offset;
5735                 else
5736                         last = key.objectid + root->nodesize;
5737                 path.slots[0]++;
5738         }
5739
5740         if (last < cache->key.objectid + cache->key.offset)
5741                 ret = check_cache_range(root, cache, last,
5742                                         cache->key.objectid +
5743                                         cache->key.offset - last);
5744
5745 out:
5746         btrfs_release_path(&path);
5747
5748         if (!ret &&
5749             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5750                 fprintf(stderr, "There are still entries left in the space "
5751                         "cache\n");
5752                 ret = -EINVAL;
5753         }
5754
5755         return ret;
5756 }
5757
5758 static int check_space_cache(struct btrfs_root *root)
5759 {
5760         struct btrfs_block_group_cache *cache;
5761         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5762         int ret;
5763         int error = 0;
5764
5765         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5766             btrfs_super_generation(root->fs_info->super_copy) !=
5767             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5768                 printf("cache and super generation don't match, space cache "
5769                        "will be invalidated\n");
5770                 return 0;
5771         }
5772
5773         if (ctx.progress_enabled) {
5774                 ctx.tp = TASK_FREE_SPACE;
5775                 task_start(ctx.info);
5776         }
5777
5778         while (1) {
5779                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5780                 if (!cache)
5781                         break;
5782
5783                 start = cache->key.objectid + cache->key.offset;
5784                 if (!cache->free_space_ctl) {
5785                         if (btrfs_init_free_space_ctl(cache,
5786                                                       root->sectorsize)) {
5787                                 ret = -ENOMEM;
5788                                 break;
5789                         }
5790                 } else {
5791                         btrfs_remove_free_space_cache(cache);
5792                 }
5793
5794                 if (btrfs_fs_compat_ro(root->fs_info,
5795                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5796                         ret = exclude_super_stripes(root, cache);
5797                         if (ret) {
5798                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5799                                         strerror(-ret));
5800                                 error++;
5801                                 continue;
5802                         }
5803                         ret = load_free_space_tree(root->fs_info, cache);
5804                         free_excluded_extents(root, cache);
5805                         if (ret < 0) {
5806                                 fprintf(stderr, "could not load free space tree: %s\n",
5807                                         strerror(-ret));
5808                                 error++;
5809                                 continue;
5810                         }
5811                         error += ret;
5812                 } else {
5813                         ret = load_free_space_cache(root->fs_info, cache);
5814                         if (!ret)
5815                                 continue;
5816                 }
5817
5818                 ret = verify_space_cache(root, cache);
5819                 if (ret) {
5820                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5821                                 cache->key.objectid);
5822                         error++;
5823                 }
5824         }
5825
5826         task_stop(ctx.info);
5827
5828         return error ? -EINVAL : 0;
5829 }
5830
5831 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5832                         u64 num_bytes, unsigned long leaf_offset,
5833                         struct extent_buffer *eb) {
5834
5835         u64 offset = 0;
5836         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5837         char *data;
5838         unsigned long csum_offset;
5839         u32 csum;
5840         u32 csum_expected;
5841         u64 read_len;
5842         u64 data_checked = 0;
5843         u64 tmp;
5844         int ret = 0;
5845         int mirror;
5846         int num_copies;
5847
5848         if (num_bytes % root->sectorsize)
5849                 return -EINVAL;
5850
5851         data = malloc(num_bytes);
5852         if (!data)
5853                 return -ENOMEM;
5854
5855         while (offset < num_bytes) {
5856                 mirror = 0;
5857 again:
5858                 read_len = num_bytes - offset;
5859                 /* read as much space once a time */
5860                 ret = read_extent_data(root, data + offset,
5861                                 bytenr + offset, &read_len, mirror);
5862                 if (ret)
5863                         goto out;
5864                 data_checked = 0;
5865                 /* verify every 4k data's checksum */
5866                 while (data_checked < read_len) {
5867                         csum = ~(u32)0;
5868                         tmp = offset + data_checked;
5869
5870                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5871                                                csum, root->sectorsize);
5872                         btrfs_csum_final(csum, (u8 *)&csum);
5873
5874                         csum_offset = leaf_offset +
5875                                  tmp / root->sectorsize * csum_size;
5876                         read_extent_buffer(eb, (char *)&csum_expected,
5877                                            csum_offset, csum_size);
5878                         /* try another mirror */
5879                         if (csum != csum_expected) {
5880                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5881                                                 mirror, bytenr + tmp,
5882                                                 csum, csum_expected);
5883                                 num_copies = btrfs_num_copies(
5884                                                 &root->fs_info->mapping_tree,
5885                                                 bytenr, num_bytes);
5886                                 if (mirror < num_copies - 1) {
5887                                         mirror += 1;
5888                                         goto again;
5889                                 }
5890                         }
5891                         data_checked += root->sectorsize;
5892                 }
5893                 offset += read_len;
5894         }
5895 out:
5896         free(data);
5897         return ret;
5898 }
5899
5900 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5901                                u64 num_bytes)
5902 {
5903         struct btrfs_path path;
5904         struct extent_buffer *leaf;
5905         struct btrfs_key key;
5906         int ret;
5907
5908         btrfs_init_path(&path);
5909         key.objectid = bytenr;
5910         key.type = BTRFS_EXTENT_ITEM_KEY;
5911         key.offset = (u64)-1;
5912
5913 again:
5914         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5915                                 0, 0);
5916         if (ret < 0) {
5917                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5918                 btrfs_release_path(&path);
5919                 return ret;
5920         } else if (ret) {
5921                 if (path.slots[0] > 0) {
5922                         path.slots[0]--;
5923                 } else {
5924                         ret = btrfs_prev_leaf(root, &path);
5925                         if (ret < 0) {
5926                                 goto out;
5927                         } else if (ret > 0) {
5928                                 ret = 0;
5929                                 goto out;
5930                         }
5931                 }
5932         }
5933
5934         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5935
5936         /*
5937          * Block group items come before extent items if they have the same
5938          * bytenr, so walk back one more just in case.  Dear future traveller,
5939          * first congrats on mastering time travel.  Now if it's not too much
5940          * trouble could you go back to 2006 and tell Chris to make the
5941          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5942          * EXTENT_ITEM_KEY please?
5943          */
5944         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5945                 if (path.slots[0] > 0) {
5946                         path.slots[0]--;
5947                 } else {
5948                         ret = btrfs_prev_leaf(root, &path);
5949                         if (ret < 0) {
5950                                 goto out;
5951                         } else if (ret > 0) {
5952                                 ret = 0;
5953                                 goto out;
5954                         }
5955                 }
5956                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5957         }
5958
5959         while (num_bytes) {
5960                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5961                         ret = btrfs_next_leaf(root, &path);
5962                         if (ret < 0) {
5963                                 fprintf(stderr, "Error going to next leaf "
5964                                         "%d\n", ret);
5965                                 btrfs_release_path(&path);
5966                                 return ret;
5967                         } else if (ret) {
5968                                 break;
5969                         }
5970                 }
5971                 leaf = path.nodes[0];
5972                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5973                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5974                         path.slots[0]++;
5975                         continue;
5976                 }
5977                 if (key.objectid + key.offset < bytenr) {
5978                         path.slots[0]++;
5979                         continue;
5980                 }
5981                 if (key.objectid > bytenr + num_bytes)
5982                         break;
5983
5984                 if (key.objectid == bytenr) {
5985                         if (key.offset >= num_bytes) {
5986                                 num_bytes = 0;
5987                                 break;
5988                         }
5989                         num_bytes -= key.offset;
5990                         bytenr += key.offset;
5991                 } else if (key.objectid < bytenr) {
5992                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5993                                 num_bytes = 0;
5994                                 break;
5995                         }
5996                         num_bytes = (bytenr + num_bytes) -
5997                                 (key.objectid + key.offset);
5998                         bytenr = key.objectid + key.offset;
5999                 } else {
6000                         if (key.objectid + key.offset < bytenr + num_bytes) {
6001                                 u64 new_start = key.objectid + key.offset;
6002                                 u64 new_bytes = bytenr + num_bytes - new_start;
6003
6004                                 /*
6005                                  * Weird case, the extent is in the middle of
6006                                  * our range, we'll have to search one side
6007                                  * and then the other.  Not sure if this happens
6008                                  * in real life, but no harm in coding it up
6009                                  * anyway just in case.
6010                                  */
6011                                 btrfs_release_path(&path);
6012                                 ret = check_extent_exists(root, new_start,
6013                                                           new_bytes);
6014                                 if (ret) {
6015                                         fprintf(stderr, "Right section didn't "
6016                                                 "have a record\n");
6017                                         break;
6018                                 }
6019                                 num_bytes = key.objectid - bytenr;
6020                                 goto again;
6021                         }
6022                         num_bytes = key.objectid - bytenr;
6023                 }
6024                 path.slots[0]++;
6025         }
6026         ret = 0;
6027
6028 out:
6029         if (num_bytes && !ret) {
6030                 fprintf(stderr, "There are no extents for csum range "
6031                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6032                 ret = 1;
6033         }
6034
6035         btrfs_release_path(&path);
6036         return ret;
6037 }
6038
6039 static int check_csums(struct btrfs_root *root)
6040 {
6041         struct btrfs_path path;
6042         struct extent_buffer *leaf;
6043         struct btrfs_key key;
6044         u64 offset = 0, num_bytes = 0;
6045         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6046         int errors = 0;
6047         int ret;
6048         u64 data_len;
6049         unsigned long leaf_offset;
6050
6051         root = root->fs_info->csum_root;
6052         if (!extent_buffer_uptodate(root->node)) {
6053                 fprintf(stderr, "No valid csum tree found\n");
6054                 return -ENOENT;
6055         }
6056
6057         btrfs_init_path(&path);
6058         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6059         key.type = BTRFS_EXTENT_CSUM_KEY;
6060         key.offset = 0;
6061         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6062         if (ret < 0) {
6063                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6064                 btrfs_release_path(&path);
6065                 return ret;
6066         }
6067
6068         if (ret > 0 && path.slots[0])
6069                 path.slots[0]--;
6070         ret = 0;
6071
6072         while (1) {
6073                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6074                         ret = btrfs_next_leaf(root, &path);
6075                         if (ret < 0) {
6076                                 fprintf(stderr, "Error going to next leaf "
6077                                         "%d\n", ret);
6078                                 break;
6079                         }
6080                         if (ret)
6081                                 break;
6082                 }
6083                 leaf = path.nodes[0];
6084
6085                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6086                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6087                         path.slots[0]++;
6088                         continue;
6089                 }
6090
6091                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6092                               csum_size) * root->sectorsize;
6093                 if (!check_data_csum)
6094                         goto skip_csum_check;
6095                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6096                 ret = check_extent_csums(root, key.offset, data_len,
6097                                          leaf_offset, leaf);
6098                 if (ret)
6099                         break;
6100 skip_csum_check:
6101                 if (!num_bytes) {
6102                         offset = key.offset;
6103                 } else if (key.offset != offset + num_bytes) {
6104                         ret = check_extent_exists(root, offset, num_bytes);
6105                         if (ret) {
6106                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6107                                         "there is no extent record\n",
6108                                         offset, offset+num_bytes);
6109                                 errors++;
6110                         }
6111                         offset = key.offset;
6112                         num_bytes = 0;
6113                 }
6114                 num_bytes += data_len;
6115                 path.slots[0]++;
6116         }
6117
6118         btrfs_release_path(&path);
6119         return errors;
6120 }
6121
6122 static int is_dropped_key(struct btrfs_key *key,
6123                           struct btrfs_key *drop_key) {
6124         if (key->objectid < drop_key->objectid)
6125                 return 1;
6126         else if (key->objectid == drop_key->objectid) {
6127                 if (key->type < drop_key->type)
6128                         return 1;
6129                 else if (key->type == drop_key->type) {
6130                         if (key->offset < drop_key->offset)
6131                                 return 1;
6132                 }
6133         }
6134         return 0;
6135 }
6136
6137 /*
6138  * Here are the rules for FULL_BACKREF.
6139  *
6140  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6141  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6142  *      FULL_BACKREF set.
6143  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6144  *    if it happened after the relocation occurred since we'll have dropped the
6145  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6146  *    have no real way to know for sure.
6147  *
6148  * We process the blocks one root at a time, and we start from the lowest root
6149  * objectid and go to the highest.  So we can just lookup the owner backref for
6150  * the record and if we don't find it then we know it doesn't exist and we have
6151  * a FULL BACKREF.
6152  *
6153  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6154  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6155  * be set or not and then we can check later once we've gathered all the refs.
6156  */
6157 static int calc_extent_flag(struct btrfs_root *root,
6158                            struct cache_tree *extent_cache,
6159                            struct extent_buffer *buf,
6160                            struct root_item_record *ri,
6161                            u64 *flags)
6162 {
6163         struct extent_record *rec;
6164         struct cache_extent *cache;
6165         struct tree_backref *tback;
6166         u64 owner = 0;
6167
6168         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6169         /* we have added this extent before */
6170         if (!cache)
6171                 return -ENOENT;
6172
6173         rec = container_of(cache, struct extent_record, cache);
6174
6175         /*
6176          * Except file/reloc tree, we can not have
6177          * FULL BACKREF MODE
6178          */
6179         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6180                 goto normal;
6181         /*
6182          * root node
6183          */
6184         if (buf->start == ri->bytenr)
6185                 goto normal;
6186
6187         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6188                 goto full_backref;
6189
6190         owner = btrfs_header_owner(buf);
6191         if (owner == ri->objectid)
6192                 goto normal;
6193
6194         tback = find_tree_backref(rec, 0, owner);
6195         if (!tback)
6196                 goto full_backref;
6197 normal:
6198         *flags = 0;
6199         if (rec->flag_block_full_backref != FLAG_UNSET &&
6200             rec->flag_block_full_backref != 0)
6201                 rec->bad_full_backref = 1;
6202         return 0;
6203 full_backref:
6204         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6205         if (rec->flag_block_full_backref != FLAG_UNSET &&
6206             rec->flag_block_full_backref != 1)
6207                 rec->bad_full_backref = 1;
6208         return 0;
6209 }
6210
6211 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6212 {
6213         fprintf(stderr, "Invalid key type(");
6214         print_key_type(stderr, 0, key_type);
6215         fprintf(stderr, ") found in root(");
6216         print_objectid(stderr, rootid, 0);
6217         fprintf(stderr, ")\n");
6218 }
6219
6220 /*
6221  * Check if the key is valid with its extent buffer.
6222  *
6223  * This is a early check in case invalid key exists in a extent buffer
6224  * This is not comprehensive yet, but should prevent wrong key/item passed
6225  * further
6226  */
6227 static int check_type_with_root(u64 rootid, u8 key_type)
6228 {
6229         switch (key_type) {
6230         /* Only valid in chunk tree */
6231         case BTRFS_DEV_ITEM_KEY:
6232         case BTRFS_CHUNK_ITEM_KEY:
6233                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6234                         goto err;
6235                 break;
6236         /* valid in csum and log tree */
6237         case BTRFS_CSUM_TREE_OBJECTID:
6238                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6239                       is_fstree(rootid)))
6240                         goto err;
6241                 break;
6242         case BTRFS_EXTENT_ITEM_KEY:
6243         case BTRFS_METADATA_ITEM_KEY:
6244         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6245                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6246                         goto err;
6247                 break;
6248         case BTRFS_ROOT_ITEM_KEY:
6249                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6250                         goto err;
6251                 break;
6252         case BTRFS_DEV_EXTENT_KEY:
6253                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6254                         goto err;
6255                 break;
6256         }
6257         return 0;
6258 err:
6259         report_mismatch_key_root(key_type, rootid);
6260         return -EINVAL;
6261 }
6262
6263 static int run_next_block(struct btrfs_root *root,
6264                           struct block_info *bits,
6265                           int bits_nr,
6266                           u64 *last,
6267                           struct cache_tree *pending,
6268                           struct cache_tree *seen,
6269                           struct cache_tree *reada,
6270                           struct cache_tree *nodes,
6271                           struct cache_tree *extent_cache,
6272                           struct cache_tree *chunk_cache,
6273                           struct rb_root *dev_cache,
6274                           struct block_group_tree *block_group_cache,
6275                           struct device_extent_tree *dev_extent_cache,
6276                           struct root_item_record *ri)
6277 {
6278         struct extent_buffer *buf;
6279         struct extent_record *rec = NULL;
6280         u64 bytenr;
6281         u32 size;
6282         u64 parent;
6283         u64 owner;
6284         u64 flags;
6285         u64 ptr;
6286         u64 gen = 0;
6287         int ret = 0;
6288         int i;
6289         int nritems;
6290         struct btrfs_key key;
6291         struct cache_extent *cache;
6292         int reada_bits;
6293
6294         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6295                                     bits_nr, &reada_bits);
6296         if (nritems == 0)
6297                 return 1;
6298
6299         if (!reada_bits) {
6300                 for(i = 0; i < nritems; i++) {
6301                         ret = add_cache_extent(reada, bits[i].start,
6302                                                bits[i].size);
6303                         if (ret == -EEXIST)
6304                                 continue;
6305
6306                         /* fixme, get the parent transid */
6307                         readahead_tree_block(root, bits[i].start,
6308                                              bits[i].size, 0);
6309                 }
6310         }
6311         *last = bits[0].start;
6312         bytenr = bits[0].start;
6313         size = bits[0].size;
6314
6315         cache = lookup_cache_extent(pending, bytenr, size);
6316         if (cache) {
6317                 remove_cache_extent(pending, cache);
6318                 free(cache);
6319         }
6320         cache = lookup_cache_extent(reada, bytenr, size);
6321         if (cache) {
6322                 remove_cache_extent(reada, cache);
6323                 free(cache);
6324         }
6325         cache = lookup_cache_extent(nodes, bytenr, size);
6326         if (cache) {
6327                 remove_cache_extent(nodes, cache);
6328                 free(cache);
6329         }
6330         cache = lookup_cache_extent(extent_cache, bytenr, size);
6331         if (cache) {
6332                 rec = container_of(cache, struct extent_record, cache);
6333                 gen = rec->parent_generation;
6334         }
6335
6336         /* fixme, get the real parent transid */
6337         buf = read_tree_block(root, bytenr, size, gen);
6338         if (!extent_buffer_uptodate(buf)) {
6339                 record_bad_block_io(root->fs_info,
6340                                     extent_cache, bytenr, size);
6341                 goto out;
6342         }
6343
6344         nritems = btrfs_header_nritems(buf);
6345
6346         flags = 0;
6347         if (!init_extent_tree) {
6348                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6349                                        btrfs_header_level(buf), 1, NULL,
6350                                        &flags);
6351                 if (ret < 0) {
6352                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6353                         if (ret < 0) {
6354                                 fprintf(stderr, "Couldn't calc extent flags\n");
6355                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6356                         }
6357                 }
6358         } else {
6359                 flags = 0;
6360                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6361                 if (ret < 0) {
6362                         fprintf(stderr, "Couldn't calc extent flags\n");
6363                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6364                 }
6365         }
6366
6367         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6368                 if (ri != NULL &&
6369                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6370                     ri->objectid == btrfs_header_owner(buf)) {
6371                         /*
6372                          * Ok we got to this block from it's original owner and
6373                          * we have FULL_BACKREF set.  Relocation can leave
6374                          * converted blocks over so this is altogether possible,
6375                          * however it's not possible if the generation > the
6376                          * last snapshot, so check for this case.
6377                          */
6378                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6379                             btrfs_header_generation(buf) > ri->last_snapshot) {
6380                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6381                                 rec->bad_full_backref = 1;
6382                         }
6383                 }
6384         } else {
6385                 if (ri != NULL &&
6386                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6387                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6388                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6389                         rec->bad_full_backref = 1;
6390                 }
6391         }
6392
6393         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6394                 rec->flag_block_full_backref = 1;
6395                 parent = bytenr;
6396                 owner = 0;
6397         } else {
6398                 rec->flag_block_full_backref = 0;
6399                 parent = 0;
6400                 owner = btrfs_header_owner(buf);
6401         }
6402
6403         ret = check_block(root, extent_cache, buf, flags);
6404         if (ret)
6405                 goto out;
6406
6407         if (btrfs_is_leaf(buf)) {
6408                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6409                 for (i = 0; i < nritems; i++) {
6410                         struct btrfs_file_extent_item *fi;
6411                         btrfs_item_key_to_cpu(buf, &key, i);
6412                         /*
6413                          * Check key type against the leaf owner.
6414                          * Could filter quite a lot of early error if
6415                          * owner is correct
6416                          */
6417                         if (check_type_with_root(btrfs_header_owner(buf),
6418                                                  key.type)) {
6419                                 fprintf(stderr, "ignoring invalid key\n");
6420                                 continue;
6421                         }
6422                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6423                                 process_extent_item(root, extent_cache, buf,
6424                                                     i);
6425                                 continue;
6426                         }
6427                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6428                                 process_extent_item(root, extent_cache, buf,
6429                                                     i);
6430                                 continue;
6431                         }
6432                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6433                                 total_csum_bytes +=
6434                                         btrfs_item_size_nr(buf, i);
6435                                 continue;
6436                         }
6437                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6438                                 process_chunk_item(chunk_cache, &key, buf, i);
6439                                 continue;
6440                         }
6441                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6442                                 process_device_item(dev_cache, &key, buf, i);
6443                                 continue;
6444                         }
6445                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6446                                 process_block_group_item(block_group_cache,
6447                                         &key, buf, i);
6448                                 continue;
6449                         }
6450                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6451                                 process_device_extent_item(dev_extent_cache,
6452                                         &key, buf, i);
6453                                 continue;
6454
6455                         }
6456                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6457 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6458                                 process_extent_ref_v0(extent_cache, buf, i);
6459 #else
6460                                 BUG();
6461 #endif
6462                                 continue;
6463                         }
6464
6465                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6466                                 ret = add_tree_backref(extent_cache,
6467                                                 key.objectid, 0, key.offset, 0);
6468                                 if (ret < 0)
6469                                         error("add_tree_backref failed: %s",
6470                                               strerror(-ret));
6471                                 continue;
6472                         }
6473                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6474                                 ret = add_tree_backref(extent_cache,
6475                                                 key.objectid, key.offset, 0, 0);
6476                                 if (ret < 0)
6477                                         error("add_tree_backref failed: %s",
6478                                               strerror(-ret));
6479                                 continue;
6480                         }
6481                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6482                                 struct btrfs_extent_data_ref *ref;
6483                                 ref = btrfs_item_ptr(buf, i,
6484                                                 struct btrfs_extent_data_ref);
6485                                 add_data_backref(extent_cache,
6486                                         key.objectid, 0,
6487                                         btrfs_extent_data_ref_root(buf, ref),
6488                                         btrfs_extent_data_ref_objectid(buf,
6489                                                                        ref),
6490                                         btrfs_extent_data_ref_offset(buf, ref),
6491                                         btrfs_extent_data_ref_count(buf, ref),
6492                                         0, root->sectorsize);
6493                                 continue;
6494                         }
6495                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6496                                 struct btrfs_shared_data_ref *ref;
6497                                 ref = btrfs_item_ptr(buf, i,
6498                                                 struct btrfs_shared_data_ref);
6499                                 add_data_backref(extent_cache,
6500                                         key.objectid, key.offset, 0, 0, 0,
6501                                         btrfs_shared_data_ref_count(buf, ref),
6502                                         0, root->sectorsize);
6503                                 continue;
6504                         }
6505                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6506                                 struct bad_item *bad;
6507
6508                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6509                                         continue;
6510                                 if (!owner)
6511                                         continue;
6512                                 bad = malloc(sizeof(struct bad_item));
6513                                 if (!bad)
6514                                         continue;
6515                                 INIT_LIST_HEAD(&bad->list);
6516                                 memcpy(&bad->key, &key,
6517                                        sizeof(struct btrfs_key));
6518                                 bad->root_id = owner;
6519                                 list_add_tail(&bad->list, &delete_items);
6520                                 continue;
6521                         }
6522                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6523                                 continue;
6524                         fi = btrfs_item_ptr(buf, i,
6525                                             struct btrfs_file_extent_item);
6526                         if (btrfs_file_extent_type(buf, fi) ==
6527                             BTRFS_FILE_EXTENT_INLINE)
6528                                 continue;
6529                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6530                                 continue;
6531
6532                         data_bytes_allocated +=
6533                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6534                         if (data_bytes_allocated < root->sectorsize) {
6535                                 abort();
6536                         }
6537                         data_bytes_referenced +=
6538                                 btrfs_file_extent_num_bytes(buf, fi);
6539                         add_data_backref(extent_cache,
6540                                 btrfs_file_extent_disk_bytenr(buf, fi),
6541                                 parent, owner, key.objectid, key.offset -
6542                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6543                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6544                 }
6545         } else {
6546                 int level;
6547                 struct btrfs_key first_key;
6548
6549                 first_key.objectid = 0;
6550
6551                 if (nritems > 0)
6552                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6553                 level = btrfs_header_level(buf);
6554                 for (i = 0; i < nritems; i++) {
6555                         struct extent_record tmpl;
6556
6557                         ptr = btrfs_node_blockptr(buf, i);
6558                         size = root->nodesize;
6559                         btrfs_node_key_to_cpu(buf, &key, i);
6560                         if (ri != NULL) {
6561                                 if ((level == ri->drop_level)
6562                                     && is_dropped_key(&key, &ri->drop_key)) {
6563                                         continue;
6564                                 }
6565                         }
6566
6567                         memset(&tmpl, 0, sizeof(tmpl));
6568                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6569                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6570                         tmpl.start = ptr;
6571                         tmpl.nr = size;
6572                         tmpl.refs = 1;
6573                         tmpl.metadata = 1;
6574                         tmpl.max_size = size;
6575                         ret = add_extent_rec(extent_cache, &tmpl);
6576                         if (ret < 0)
6577                                 goto out;
6578
6579                         ret = add_tree_backref(extent_cache, ptr, parent,
6580                                         owner, 1);
6581                         if (ret < 0) {
6582                                 error("add_tree_backref failed: %s",
6583                                       strerror(-ret));
6584                                 continue;
6585                         }
6586
6587                         if (level > 1) {
6588                                 add_pending(nodes, seen, ptr, size);
6589                         } else {
6590                                 add_pending(pending, seen, ptr, size);
6591                         }
6592                 }
6593                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6594                                       nritems) * sizeof(struct btrfs_key_ptr);
6595         }
6596         total_btree_bytes += buf->len;
6597         if (fs_root_objectid(btrfs_header_owner(buf)))
6598                 total_fs_tree_bytes += buf->len;
6599         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6600                 total_extent_tree_bytes += buf->len;
6601         if (!found_old_backref &&
6602             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6603             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6604             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6605                 found_old_backref = 1;
6606 out:
6607         free_extent_buffer(buf);
6608         return ret;
6609 }
6610
6611 static int add_root_to_pending(struct extent_buffer *buf,
6612                                struct cache_tree *extent_cache,
6613                                struct cache_tree *pending,
6614                                struct cache_tree *seen,
6615                                struct cache_tree *nodes,
6616                                u64 objectid)
6617 {
6618         struct extent_record tmpl;
6619         int ret;
6620
6621         if (btrfs_header_level(buf) > 0)
6622                 add_pending(nodes, seen, buf->start, buf->len);
6623         else
6624                 add_pending(pending, seen, buf->start, buf->len);
6625
6626         memset(&tmpl, 0, sizeof(tmpl));
6627         tmpl.start = buf->start;
6628         tmpl.nr = buf->len;
6629         tmpl.is_root = 1;
6630         tmpl.refs = 1;
6631         tmpl.metadata = 1;
6632         tmpl.max_size = buf->len;
6633         add_extent_rec(extent_cache, &tmpl);
6634
6635         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6636             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6637                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6638                                 0, 1);
6639         else
6640                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6641                                 1);
6642         return ret;
6643 }
6644
6645 /* as we fix the tree, we might be deleting blocks that
6646  * we're tracking for repair.  This hook makes sure we
6647  * remove any backrefs for blocks as we are fixing them.
6648  */
6649 static int free_extent_hook(struct btrfs_trans_handle *trans,
6650                             struct btrfs_root *root,
6651                             u64 bytenr, u64 num_bytes, u64 parent,
6652                             u64 root_objectid, u64 owner, u64 offset,
6653                             int refs_to_drop)
6654 {
6655         struct extent_record *rec;
6656         struct cache_extent *cache;
6657         int is_data;
6658         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6659
6660         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6661         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6662         if (!cache)
6663                 return 0;
6664
6665         rec = container_of(cache, struct extent_record, cache);
6666         if (is_data) {
6667                 struct data_backref *back;
6668                 back = find_data_backref(rec, parent, root_objectid, owner,
6669                                          offset, 1, bytenr, num_bytes);
6670                 if (!back)
6671                         goto out;
6672                 if (back->node.found_ref) {
6673                         back->found_ref -= refs_to_drop;
6674                         if (rec->refs)
6675                                 rec->refs -= refs_to_drop;
6676                 }
6677                 if (back->node.found_extent_tree) {
6678                         back->num_refs -= refs_to_drop;
6679                         if (rec->extent_item_refs)
6680                                 rec->extent_item_refs -= refs_to_drop;
6681                 }
6682                 if (back->found_ref == 0)
6683                         back->node.found_ref = 0;
6684                 if (back->num_refs == 0)
6685                         back->node.found_extent_tree = 0;
6686
6687                 if (!back->node.found_extent_tree && back->node.found_ref) {
6688                         list_del(&back->node.list);
6689                         free(back);
6690                 }
6691         } else {
6692                 struct tree_backref *back;
6693                 back = find_tree_backref(rec, parent, root_objectid);
6694                 if (!back)
6695                         goto out;
6696                 if (back->node.found_ref) {
6697                         if (rec->refs)
6698                                 rec->refs--;
6699                         back->node.found_ref = 0;
6700                 }
6701                 if (back->node.found_extent_tree) {
6702                         if (rec->extent_item_refs)
6703                                 rec->extent_item_refs--;
6704                         back->node.found_extent_tree = 0;
6705                 }
6706                 if (!back->node.found_extent_tree && back->node.found_ref) {
6707                         list_del(&back->node.list);
6708                         free(back);
6709                 }
6710         }
6711         maybe_free_extent_rec(extent_cache, rec);
6712 out:
6713         return 0;
6714 }
6715
6716 static int delete_extent_records(struct btrfs_trans_handle *trans,
6717                                  struct btrfs_root *root,
6718                                  struct btrfs_path *path,
6719                                  u64 bytenr, u64 new_len)
6720 {
6721         struct btrfs_key key;
6722         struct btrfs_key found_key;
6723         struct extent_buffer *leaf;
6724         int ret;
6725         int slot;
6726
6727
6728         key.objectid = bytenr;
6729         key.type = (u8)-1;
6730         key.offset = (u64)-1;
6731
6732         while(1) {
6733                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6734                                         &key, path, 0, 1);
6735                 if (ret < 0)
6736                         break;
6737
6738                 if (ret > 0) {
6739                         ret = 0;
6740                         if (path->slots[0] == 0)
6741                                 break;
6742                         path->slots[0]--;
6743                 }
6744                 ret = 0;
6745
6746                 leaf = path->nodes[0];
6747                 slot = path->slots[0];
6748
6749                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6750                 if (found_key.objectid != bytenr)
6751                         break;
6752
6753                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6754                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6755                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6756                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6757                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6758                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6759                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6760                         btrfs_release_path(path);
6761                         if (found_key.type == 0) {
6762                                 if (found_key.offset == 0)
6763                                         break;
6764                                 key.offset = found_key.offset - 1;
6765                                 key.type = found_key.type;
6766                         }
6767                         key.type = found_key.type - 1;
6768                         key.offset = (u64)-1;
6769                         continue;
6770                 }
6771
6772                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6773                         found_key.objectid, found_key.type, found_key.offset);
6774
6775                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6776                 if (ret)
6777                         break;
6778                 btrfs_release_path(path);
6779
6780                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6781                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6782                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6783                                 found_key.offset : root->nodesize;
6784
6785                         ret = btrfs_update_block_group(trans, root, bytenr,
6786                                                        bytes, 0, 0);
6787                         if (ret)
6788                                 break;
6789                 }
6790         }
6791
6792         btrfs_release_path(path);
6793         return ret;
6794 }
6795
6796 /*
6797  * for a single backref, this will allocate a new extent
6798  * and add the backref to it.
6799  */
6800 static int record_extent(struct btrfs_trans_handle *trans,
6801                          struct btrfs_fs_info *info,
6802                          struct btrfs_path *path,
6803                          struct extent_record *rec,
6804                          struct extent_backref *back,
6805                          int allocated, u64 flags)
6806 {
6807         int ret;
6808         struct btrfs_root *extent_root = info->extent_root;
6809         struct extent_buffer *leaf;
6810         struct btrfs_key ins_key;
6811         struct btrfs_extent_item *ei;
6812         struct data_backref *dback;
6813         struct btrfs_tree_block_info *bi;
6814
6815         if (!back->is_data)
6816                 rec->max_size = max_t(u64, rec->max_size,
6817                                     info->extent_root->nodesize);
6818
6819         if (!allocated) {
6820                 u32 item_size = sizeof(*ei);
6821
6822                 if (!back->is_data)
6823                         item_size += sizeof(*bi);
6824
6825                 ins_key.objectid = rec->start;
6826                 ins_key.offset = rec->max_size;
6827                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6828
6829                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6830                                         &ins_key, item_size);
6831                 if (ret)
6832                         goto fail;
6833
6834                 leaf = path->nodes[0];
6835                 ei = btrfs_item_ptr(leaf, path->slots[0],
6836                                     struct btrfs_extent_item);
6837
6838                 btrfs_set_extent_refs(leaf, ei, 0);
6839                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6840
6841                 if (back->is_data) {
6842                         btrfs_set_extent_flags(leaf, ei,
6843                                                BTRFS_EXTENT_FLAG_DATA);
6844                 } else {
6845                         struct btrfs_disk_key copy_key;;
6846
6847                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6848                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6849                                              sizeof(*bi));
6850
6851                         btrfs_set_disk_key_objectid(&copy_key,
6852                                                     rec->info_objectid);
6853                         btrfs_set_disk_key_type(&copy_key, 0);
6854                         btrfs_set_disk_key_offset(&copy_key, 0);
6855
6856                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6857                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6858
6859                         btrfs_set_extent_flags(leaf, ei,
6860                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6861                 }
6862
6863                 btrfs_mark_buffer_dirty(leaf);
6864                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6865                                                rec->max_size, 1, 0);
6866                 if (ret)
6867                         goto fail;
6868                 btrfs_release_path(path);
6869         }
6870
6871         if (back->is_data) {
6872                 u64 parent;
6873                 int i;
6874
6875                 dback = to_data_backref(back);
6876                 if (back->full_backref)
6877                         parent = dback->parent;
6878                 else
6879                         parent = 0;
6880
6881                 for (i = 0; i < dback->found_ref; i++) {
6882                         /* if parent != 0, we're doing a full backref
6883                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6884                          * just makes the backref allocator create a data
6885                          * backref
6886                          */
6887                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6888                                                    rec->start, rec->max_size,
6889                                                    parent,
6890                                                    dback->root,
6891                                                    parent ?
6892                                                    BTRFS_FIRST_FREE_OBJECTID :
6893                                                    dback->owner,
6894                                                    dback->offset);
6895                         if (ret)
6896                                 break;
6897                 }
6898                 fprintf(stderr, "adding new data backref"
6899                                 " on %llu %s %llu owner %llu"
6900                                 " offset %llu found %d\n",
6901                                 (unsigned long long)rec->start,
6902                                 back->full_backref ?
6903                                 "parent" : "root",
6904                                 back->full_backref ?
6905                                 (unsigned long long)parent :
6906                                 (unsigned long long)dback->root,
6907                                 (unsigned long long)dback->owner,
6908                                 (unsigned long long)dback->offset,
6909                                 dback->found_ref);
6910         } else {
6911                 u64 parent;
6912                 struct tree_backref *tback;
6913
6914                 tback = to_tree_backref(back);
6915                 if (back->full_backref)
6916                         parent = tback->parent;
6917                 else
6918                         parent = 0;
6919
6920                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6921                                            rec->start, rec->max_size,
6922                                            parent, tback->root, 0, 0);
6923                 fprintf(stderr, "adding new tree backref on "
6924                         "start %llu len %llu parent %llu root %llu\n",
6925                         rec->start, rec->max_size, parent, tback->root);
6926         }
6927 fail:
6928         btrfs_release_path(path);
6929         return ret;
6930 }
6931
6932 static struct extent_entry *find_entry(struct list_head *entries,
6933                                        u64 bytenr, u64 bytes)
6934 {
6935         struct extent_entry *entry = NULL;
6936
6937         list_for_each_entry(entry, entries, list) {
6938                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6939                         return entry;
6940         }
6941
6942         return NULL;
6943 }
6944
6945 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6946 {
6947         struct extent_entry *entry, *best = NULL, *prev = NULL;
6948
6949         list_for_each_entry(entry, entries, list) {
6950                 /*
6951                  * If there are as many broken entries as entries then we know
6952                  * not to trust this particular entry.
6953                  */
6954                 if (entry->broken == entry->count)
6955                         continue;
6956
6957                 /*
6958                  * Special case, when there are only two entries and 'best' is
6959                  * the first one
6960                  */
6961                 if (!prev) {
6962                         best = entry;
6963                         prev = entry;
6964                         continue;
6965                 }
6966
6967                 /*
6968                  * If our current entry == best then we can't be sure our best
6969                  * is really the best, so we need to keep searching.
6970                  */
6971                 if (best && best->count == entry->count) {
6972                         prev = entry;
6973                         best = NULL;
6974                         continue;
6975                 }
6976
6977                 /* Prev == entry, not good enough, have to keep searching */
6978                 if (!prev->broken && prev->count == entry->count)
6979                         continue;
6980
6981                 if (!best)
6982                         best = (prev->count > entry->count) ? prev : entry;
6983                 else if (best->count < entry->count)
6984                         best = entry;
6985                 prev = entry;
6986         }
6987
6988         return best;
6989 }
6990
6991 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6992                       struct data_backref *dback, struct extent_entry *entry)
6993 {
6994         struct btrfs_trans_handle *trans;
6995         struct btrfs_root *root;
6996         struct btrfs_file_extent_item *fi;
6997         struct extent_buffer *leaf;
6998         struct btrfs_key key;
6999         u64 bytenr, bytes;
7000         int ret, err;
7001
7002         key.objectid = dback->root;
7003         key.type = BTRFS_ROOT_ITEM_KEY;
7004         key.offset = (u64)-1;
7005         root = btrfs_read_fs_root(info, &key);
7006         if (IS_ERR(root)) {
7007                 fprintf(stderr, "Couldn't find root for our ref\n");
7008                 return -EINVAL;
7009         }
7010
7011         /*
7012          * The backref points to the original offset of the extent if it was
7013          * split, so we need to search down to the offset we have and then walk
7014          * forward until we find the backref we're looking for.
7015          */
7016         key.objectid = dback->owner;
7017         key.type = BTRFS_EXTENT_DATA_KEY;
7018         key.offset = dback->offset;
7019         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7020         if (ret < 0) {
7021                 fprintf(stderr, "Error looking up ref %d\n", ret);
7022                 return ret;
7023         }
7024
7025         while (1) {
7026                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7027                         ret = btrfs_next_leaf(root, path);
7028                         if (ret) {
7029                                 fprintf(stderr, "Couldn't find our ref, next\n");
7030                                 return -EINVAL;
7031                         }
7032                 }
7033                 leaf = path->nodes[0];
7034                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7035                 if (key.objectid != dback->owner ||
7036                     key.type != BTRFS_EXTENT_DATA_KEY) {
7037                         fprintf(stderr, "Couldn't find our ref, search\n");
7038                         return -EINVAL;
7039                 }
7040                 fi = btrfs_item_ptr(leaf, path->slots[0],
7041                                     struct btrfs_file_extent_item);
7042                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7043                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7044
7045                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7046                         break;
7047                 path->slots[0]++;
7048         }
7049
7050         btrfs_release_path(path);
7051
7052         trans = btrfs_start_transaction(root, 1);
7053         if (IS_ERR(trans))
7054                 return PTR_ERR(trans);
7055
7056         /*
7057          * Ok we have the key of the file extent we want to fix, now we can cow
7058          * down to the thing and fix it.
7059          */
7060         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7061         if (ret < 0) {
7062                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7063                         key.objectid, key.type, key.offset, ret);
7064                 goto out;
7065         }
7066         if (ret > 0) {
7067                 fprintf(stderr, "Well that's odd, we just found this key "
7068                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7069                         key.offset);
7070                 ret = -EINVAL;
7071                 goto out;
7072         }
7073         leaf = path->nodes[0];
7074         fi = btrfs_item_ptr(leaf, path->slots[0],
7075                             struct btrfs_file_extent_item);
7076
7077         if (btrfs_file_extent_compression(leaf, fi) &&
7078             dback->disk_bytenr != entry->bytenr) {
7079                 fprintf(stderr, "Ref doesn't match the record start and is "
7080                         "compressed, please take a btrfs-image of this file "
7081                         "system and send it to a btrfs developer so they can "
7082                         "complete this functionality for bytenr %Lu\n",
7083                         dback->disk_bytenr);
7084                 ret = -EINVAL;
7085                 goto out;
7086         }
7087
7088         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7089                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7090         } else if (dback->disk_bytenr > entry->bytenr) {
7091                 u64 off_diff, offset;
7092
7093                 off_diff = dback->disk_bytenr - entry->bytenr;
7094                 offset = btrfs_file_extent_offset(leaf, fi);
7095                 if (dback->disk_bytenr + offset +
7096                     btrfs_file_extent_num_bytes(leaf, fi) >
7097                     entry->bytenr + entry->bytes) {
7098                         fprintf(stderr, "Ref is past the entry end, please "
7099                                 "take a btrfs-image of this file system and "
7100                                 "send it to a btrfs developer, ref %Lu\n",
7101                                 dback->disk_bytenr);
7102                         ret = -EINVAL;
7103                         goto out;
7104                 }
7105                 offset += off_diff;
7106                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7107                 btrfs_set_file_extent_offset(leaf, fi, offset);
7108         } else if (dback->disk_bytenr < entry->bytenr) {
7109                 u64 offset;
7110
7111                 offset = btrfs_file_extent_offset(leaf, fi);
7112                 if (dback->disk_bytenr + offset < entry->bytenr) {
7113                         fprintf(stderr, "Ref is before the entry start, please"
7114                                 " take a btrfs-image of this file system and "
7115                                 "send it to a btrfs developer, ref %Lu\n",
7116                                 dback->disk_bytenr);
7117                         ret = -EINVAL;
7118                         goto out;
7119                 }
7120
7121                 offset += dback->disk_bytenr;
7122                 offset -= entry->bytenr;
7123                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7124                 btrfs_set_file_extent_offset(leaf, fi, offset);
7125         }
7126
7127         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7128
7129         /*
7130          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7131          * only do this if we aren't using compression, otherwise it's a
7132          * trickier case.
7133          */
7134         if (!btrfs_file_extent_compression(leaf, fi))
7135                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7136         else
7137                 printf("ram bytes may be wrong?\n");
7138         btrfs_mark_buffer_dirty(leaf);
7139 out:
7140         err = btrfs_commit_transaction(trans, root);
7141         btrfs_release_path(path);
7142         return ret ? ret : err;
7143 }
7144
7145 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7146                            struct extent_record *rec)
7147 {
7148         struct extent_backref *back;
7149         struct data_backref *dback;
7150         struct extent_entry *entry, *best = NULL;
7151         LIST_HEAD(entries);
7152         int nr_entries = 0;
7153         int broken_entries = 0;
7154         int ret = 0;
7155         short mismatch = 0;
7156
7157         /*
7158          * Metadata is easy and the backrefs should always agree on bytenr and
7159          * size, if not we've got bigger issues.
7160          */
7161         if (rec->metadata)
7162                 return 0;
7163
7164         list_for_each_entry(back, &rec->backrefs, list) {
7165                 if (back->full_backref || !back->is_data)
7166                         continue;
7167
7168                 dback = to_data_backref(back);
7169
7170                 /*
7171                  * We only pay attention to backrefs that we found a real
7172                  * backref for.
7173                  */
7174                 if (dback->found_ref == 0)
7175                         continue;
7176
7177                 /*
7178                  * For now we only catch when the bytes don't match, not the
7179                  * bytenr.  We can easily do this at the same time, but I want
7180                  * to have a fs image to test on before we just add repair
7181                  * functionality willy-nilly so we know we won't screw up the
7182                  * repair.
7183                  */
7184
7185                 entry = find_entry(&entries, dback->disk_bytenr,
7186                                    dback->bytes);
7187                 if (!entry) {
7188                         entry = malloc(sizeof(struct extent_entry));
7189                         if (!entry) {
7190                                 ret = -ENOMEM;
7191                                 goto out;
7192                         }
7193                         memset(entry, 0, sizeof(*entry));
7194                         entry->bytenr = dback->disk_bytenr;
7195                         entry->bytes = dback->bytes;
7196                         list_add_tail(&entry->list, &entries);
7197                         nr_entries++;
7198                 }
7199
7200                 /*
7201                  * If we only have on entry we may think the entries agree when
7202                  * in reality they don't so we have to do some extra checking.
7203                  */
7204                 if (dback->disk_bytenr != rec->start ||
7205                     dback->bytes != rec->nr || back->broken)
7206                         mismatch = 1;
7207
7208                 if (back->broken) {
7209                         entry->broken++;
7210                         broken_entries++;
7211                 }
7212
7213                 entry->count++;
7214         }
7215
7216         /* Yay all the backrefs agree, carry on good sir */
7217         if (nr_entries <= 1 && !mismatch)
7218                 goto out;
7219
7220         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7221                 "%Lu\n", rec->start);
7222
7223         /*
7224          * First we want to see if the backrefs can agree amongst themselves who
7225          * is right, so figure out which one of the entries has the highest
7226          * count.
7227          */
7228         best = find_most_right_entry(&entries);
7229
7230         /*
7231          * Ok so we may have an even split between what the backrefs think, so
7232          * this is where we use the extent ref to see what it thinks.
7233          */
7234         if (!best) {
7235                 entry = find_entry(&entries, rec->start, rec->nr);
7236                 if (!entry && (!broken_entries || !rec->found_rec)) {
7237                         fprintf(stderr, "Backrefs don't agree with each other "
7238                                 "and extent record doesn't agree with anybody,"
7239                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7240                                 rec->start, rec->nr);
7241                         ret = -EINVAL;
7242                         goto out;
7243                 } else if (!entry) {
7244                         /*
7245                          * Ok our backrefs were broken, we'll assume this is the
7246                          * correct value and add an entry for this range.
7247                          */
7248                         entry = malloc(sizeof(struct extent_entry));
7249                         if (!entry) {
7250                                 ret = -ENOMEM;
7251                                 goto out;
7252                         }
7253                         memset(entry, 0, sizeof(*entry));
7254                         entry->bytenr = rec->start;
7255                         entry->bytes = rec->nr;
7256                         list_add_tail(&entry->list, &entries);
7257                         nr_entries++;
7258                 }
7259                 entry->count++;
7260                 best = find_most_right_entry(&entries);
7261                 if (!best) {
7262                         fprintf(stderr, "Backrefs and extent record evenly "
7263                                 "split on who is right, this is going to "
7264                                 "require user input to fix bytenr %Lu bytes "
7265                                 "%Lu\n", rec->start, rec->nr);
7266                         ret = -EINVAL;
7267                         goto out;
7268                 }
7269         }
7270
7271         /*
7272          * I don't think this can happen currently as we'll abort() if we catch
7273          * this case higher up, but in case somebody removes that we still can't
7274          * deal with it properly here yet, so just bail out of that's the case.
7275          */
7276         if (best->bytenr != rec->start) {
7277                 fprintf(stderr, "Extent start and backref starts don't match, "
7278                         "please use btrfs-image on this file system and send "
7279                         "it to a btrfs developer so they can make fsck fix "
7280                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7281                         rec->start, rec->nr);
7282                 ret = -EINVAL;
7283                 goto out;
7284         }
7285
7286         /*
7287          * Ok great we all agreed on an extent record, let's go find the real
7288          * references and fix up the ones that don't match.
7289          */
7290         list_for_each_entry(back, &rec->backrefs, list) {
7291                 if (back->full_backref || !back->is_data)
7292                         continue;
7293
7294                 dback = to_data_backref(back);
7295
7296                 /*
7297                  * Still ignoring backrefs that don't have a real ref attached
7298                  * to them.
7299                  */
7300                 if (dback->found_ref == 0)
7301                         continue;
7302
7303                 if (dback->bytes == best->bytes &&
7304                     dback->disk_bytenr == best->bytenr)
7305                         continue;
7306
7307                 ret = repair_ref(info, path, dback, best);
7308                 if (ret)
7309                         goto out;
7310         }
7311
7312         /*
7313          * Ok we messed with the actual refs, which means we need to drop our
7314          * entire cache and go back and rescan.  I know this is a huge pain and
7315          * adds a lot of extra work, but it's the only way to be safe.  Once all
7316          * the backrefs agree we may not need to do anything to the extent
7317          * record itself.
7318          */
7319         ret = -EAGAIN;
7320 out:
7321         while (!list_empty(&entries)) {
7322                 entry = list_entry(entries.next, struct extent_entry, list);
7323                 list_del_init(&entry->list);
7324                 free(entry);
7325         }
7326         return ret;
7327 }
7328
7329 static int process_duplicates(struct btrfs_root *root,
7330                               struct cache_tree *extent_cache,
7331                               struct extent_record *rec)
7332 {
7333         struct extent_record *good, *tmp;
7334         struct cache_extent *cache;
7335         int ret;
7336
7337         /*
7338          * If we found a extent record for this extent then return, or if we
7339          * have more than one duplicate we are likely going to need to delete
7340          * something.
7341          */
7342         if (rec->found_rec || rec->num_duplicates > 1)
7343                 return 0;
7344
7345         /* Shouldn't happen but just in case */
7346         BUG_ON(!rec->num_duplicates);
7347
7348         /*
7349          * So this happens if we end up with a backref that doesn't match the
7350          * actual extent entry.  So either the backref is bad or the extent
7351          * entry is bad.  Either way we want to have the extent_record actually
7352          * reflect what we found in the extent_tree, so we need to take the
7353          * duplicate out and use that as the extent_record since the only way we
7354          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7355          */
7356         remove_cache_extent(extent_cache, &rec->cache);
7357
7358         good = to_extent_record(rec->dups.next);
7359         list_del_init(&good->list);
7360         INIT_LIST_HEAD(&good->backrefs);
7361         INIT_LIST_HEAD(&good->dups);
7362         good->cache.start = good->start;
7363         good->cache.size = good->nr;
7364         good->content_checked = 0;
7365         good->owner_ref_checked = 0;
7366         good->num_duplicates = 0;
7367         good->refs = rec->refs;
7368         list_splice_init(&rec->backrefs, &good->backrefs);
7369         while (1) {
7370                 cache = lookup_cache_extent(extent_cache, good->start,
7371                                             good->nr);
7372                 if (!cache)
7373                         break;
7374                 tmp = container_of(cache, struct extent_record, cache);
7375
7376                 /*
7377                  * If we find another overlapping extent and it's found_rec is
7378                  * set then it's a duplicate and we need to try and delete
7379                  * something.
7380                  */
7381                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7382                         if (list_empty(&good->list))
7383                                 list_add_tail(&good->list,
7384                                               &duplicate_extents);
7385                         good->num_duplicates += tmp->num_duplicates + 1;
7386                         list_splice_init(&tmp->dups, &good->dups);
7387                         list_del_init(&tmp->list);
7388                         list_add_tail(&tmp->list, &good->dups);
7389                         remove_cache_extent(extent_cache, &tmp->cache);
7390                         continue;
7391                 }
7392
7393                 /*
7394                  * Ok we have another non extent item backed extent rec, so lets
7395                  * just add it to this extent and carry on like we did above.
7396                  */
7397                 good->refs += tmp->refs;
7398                 list_splice_init(&tmp->backrefs, &good->backrefs);
7399                 remove_cache_extent(extent_cache, &tmp->cache);
7400                 free(tmp);
7401         }
7402         ret = insert_cache_extent(extent_cache, &good->cache);
7403         BUG_ON(ret);
7404         free(rec);
7405         return good->num_duplicates ? 0 : 1;
7406 }
7407
7408 static int delete_duplicate_records(struct btrfs_root *root,
7409                                     struct extent_record *rec)
7410 {
7411         struct btrfs_trans_handle *trans;
7412         LIST_HEAD(delete_list);
7413         struct btrfs_path path;
7414         struct extent_record *tmp, *good, *n;
7415         int nr_del = 0;
7416         int ret = 0, err;
7417         struct btrfs_key key;
7418
7419         btrfs_init_path(&path);
7420
7421         good = rec;
7422         /* Find the record that covers all of the duplicates. */
7423         list_for_each_entry(tmp, &rec->dups, list) {
7424                 if (good->start < tmp->start)
7425                         continue;
7426                 if (good->nr > tmp->nr)
7427                         continue;
7428
7429                 if (tmp->start + tmp->nr < good->start + good->nr) {
7430                         fprintf(stderr, "Ok we have overlapping extents that "
7431                                 "aren't completely covered by each other, this "
7432                                 "is going to require more careful thought.  "
7433                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7434                                 tmp->start, tmp->nr, good->start, good->nr);
7435                         abort();
7436                 }
7437                 good = tmp;
7438         }
7439
7440         if (good != rec)
7441                 list_add_tail(&rec->list, &delete_list);
7442
7443         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7444                 if (tmp == good)
7445                         continue;
7446                 list_move_tail(&tmp->list, &delete_list);
7447         }
7448
7449         root = root->fs_info->extent_root;
7450         trans = btrfs_start_transaction(root, 1);
7451         if (IS_ERR(trans)) {
7452                 ret = PTR_ERR(trans);
7453                 goto out;
7454         }
7455
7456         list_for_each_entry(tmp, &delete_list, list) {
7457                 if (tmp->found_rec == 0)
7458                         continue;
7459                 key.objectid = tmp->start;
7460                 key.type = BTRFS_EXTENT_ITEM_KEY;
7461                 key.offset = tmp->nr;
7462
7463                 /* Shouldn't happen but just in case */
7464                 if (tmp->metadata) {
7465                         fprintf(stderr, "Well this shouldn't happen, extent "
7466                                 "record overlaps but is metadata? "
7467                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7468                         abort();
7469                 }
7470
7471                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7472                 if (ret) {
7473                         if (ret > 0)
7474                                 ret = -EINVAL;
7475                         break;
7476                 }
7477                 ret = btrfs_del_item(trans, root, &path);
7478                 if (ret)
7479                         break;
7480                 btrfs_release_path(&path);
7481                 nr_del++;
7482         }
7483         err = btrfs_commit_transaction(trans, root);
7484         if (err && !ret)
7485                 ret = err;
7486 out:
7487         while (!list_empty(&delete_list)) {
7488                 tmp = to_extent_record(delete_list.next);
7489                 list_del_init(&tmp->list);
7490                 if (tmp == rec)
7491                         continue;
7492                 free(tmp);
7493         }
7494
7495         while (!list_empty(&rec->dups)) {
7496                 tmp = to_extent_record(rec->dups.next);
7497                 list_del_init(&tmp->list);
7498                 free(tmp);
7499         }
7500
7501         btrfs_release_path(&path);
7502
7503         if (!ret && !nr_del)
7504                 rec->num_duplicates = 0;
7505
7506         return ret ? ret : nr_del;
7507 }
7508
7509 static int find_possible_backrefs(struct btrfs_fs_info *info,
7510                                   struct btrfs_path *path,
7511                                   struct cache_tree *extent_cache,
7512                                   struct extent_record *rec)
7513 {
7514         struct btrfs_root *root;
7515         struct extent_backref *back;
7516         struct data_backref *dback;
7517         struct cache_extent *cache;
7518         struct btrfs_file_extent_item *fi;
7519         struct btrfs_key key;
7520         u64 bytenr, bytes;
7521         int ret;
7522
7523         list_for_each_entry(back, &rec->backrefs, list) {
7524                 /* Don't care about full backrefs (poor unloved backrefs) */
7525                 if (back->full_backref || !back->is_data)
7526                         continue;
7527
7528                 dback = to_data_backref(back);
7529
7530                 /* We found this one, we don't need to do a lookup */
7531                 if (dback->found_ref)
7532                         continue;
7533
7534                 key.objectid = dback->root;
7535                 key.type = BTRFS_ROOT_ITEM_KEY;
7536                 key.offset = (u64)-1;
7537
7538                 root = btrfs_read_fs_root(info, &key);
7539
7540                 /* No root, definitely a bad ref, skip */
7541                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7542                         continue;
7543                 /* Other err, exit */
7544                 if (IS_ERR(root))
7545                         return PTR_ERR(root);
7546
7547                 key.objectid = dback->owner;
7548                 key.type = BTRFS_EXTENT_DATA_KEY;
7549                 key.offset = dback->offset;
7550                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7551                 if (ret) {
7552                         btrfs_release_path(path);
7553                         if (ret < 0)
7554                                 return ret;
7555                         /* Didn't find it, we can carry on */
7556                         ret = 0;
7557                         continue;
7558                 }
7559
7560                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7561                                     struct btrfs_file_extent_item);
7562                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7563                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7564                 btrfs_release_path(path);
7565                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7566                 if (cache) {
7567                         struct extent_record *tmp;
7568                         tmp = container_of(cache, struct extent_record, cache);
7569
7570                         /*
7571                          * If we found an extent record for the bytenr for this
7572                          * particular backref then we can't add it to our
7573                          * current extent record.  We only want to add backrefs
7574                          * that don't have a corresponding extent item in the
7575                          * extent tree since they likely belong to this record
7576                          * and we need to fix it if it doesn't match bytenrs.
7577                          */
7578                         if  (tmp->found_rec)
7579                                 continue;
7580                 }
7581
7582                 dback->found_ref += 1;
7583                 dback->disk_bytenr = bytenr;
7584                 dback->bytes = bytes;
7585
7586                 /*
7587                  * Set this so the verify backref code knows not to trust the
7588                  * values in this backref.
7589                  */
7590                 back->broken = 1;
7591         }
7592
7593         return 0;
7594 }
7595
7596 /*
7597  * Record orphan data ref into corresponding root.
7598  *
7599  * Return 0 if the extent item contains data ref and recorded.
7600  * Return 1 if the extent item contains no useful data ref
7601  *   On that case, it may contains only shared_dataref or metadata backref
7602  *   or the file extent exists(this should be handled by the extent bytenr
7603  *   recovery routine)
7604  * Return <0 if something goes wrong.
7605  */
7606 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7607                                       struct extent_record *rec)
7608 {
7609         struct btrfs_key key;
7610         struct btrfs_root *dest_root;
7611         struct extent_backref *back;
7612         struct data_backref *dback;
7613         struct orphan_data_extent *orphan;
7614         struct btrfs_path path;
7615         int recorded_data_ref = 0;
7616         int ret = 0;
7617
7618         if (rec->metadata)
7619                 return 1;
7620         btrfs_init_path(&path);
7621         list_for_each_entry(back, &rec->backrefs, list) {
7622                 if (back->full_backref || !back->is_data ||
7623                     !back->found_extent_tree)
7624                         continue;
7625                 dback = to_data_backref(back);
7626                 if (dback->found_ref)
7627                         continue;
7628                 key.objectid = dback->root;
7629                 key.type = BTRFS_ROOT_ITEM_KEY;
7630                 key.offset = (u64)-1;
7631
7632                 dest_root = btrfs_read_fs_root(fs_info, &key);
7633
7634                 /* For non-exist root we just skip it */
7635                 if (IS_ERR(dest_root) || !dest_root)
7636                         continue;
7637
7638                 key.objectid = dback->owner;
7639                 key.type = BTRFS_EXTENT_DATA_KEY;
7640                 key.offset = dback->offset;
7641
7642                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7643                 btrfs_release_path(&path);
7644                 /*
7645                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7646                  * we need to record it for inode/file extent rebuild.
7647                  * For ret > 0, we record it only for file extent rebuild.
7648                  * For ret == 0, the file extent exists but only bytenr
7649                  * mismatch, let the original bytenr fix routine to handle,
7650                  * don't record it.
7651                  */
7652                 if (ret == 0)
7653                         continue;
7654                 ret = 0;
7655                 orphan = malloc(sizeof(*orphan));
7656                 if (!orphan) {
7657                         ret = -ENOMEM;
7658                         goto out;
7659                 }
7660                 INIT_LIST_HEAD(&orphan->list);
7661                 orphan->root = dback->root;
7662                 orphan->objectid = dback->owner;
7663                 orphan->offset = dback->offset;
7664                 orphan->disk_bytenr = rec->cache.start;
7665                 orphan->disk_len = rec->cache.size;
7666                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7667                 recorded_data_ref = 1;
7668         }
7669 out:
7670         btrfs_release_path(&path);
7671         if (!ret)
7672                 return !recorded_data_ref;
7673         else
7674                 return ret;
7675 }
7676
7677 /*
7678  * when an incorrect extent item is found, this will delete
7679  * all of the existing entries for it and recreate them
7680  * based on what the tree scan found.
7681  */
7682 static int fixup_extent_refs(struct btrfs_fs_info *info,
7683                              struct cache_tree *extent_cache,
7684                              struct extent_record *rec)
7685 {
7686         struct btrfs_trans_handle *trans = NULL;
7687         int ret;
7688         struct btrfs_path path;
7689         struct list_head *cur = rec->backrefs.next;
7690         struct cache_extent *cache;
7691         struct extent_backref *back;
7692         int allocated = 0;
7693         u64 flags = 0;
7694
7695         if (rec->flag_block_full_backref)
7696                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7697
7698         btrfs_init_path(&path);
7699         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7700                 /*
7701                  * Sometimes the backrefs themselves are so broken they don't
7702                  * get attached to any meaningful rec, so first go back and
7703                  * check any of our backrefs that we couldn't find and throw
7704                  * them into the list if we find the backref so that
7705                  * verify_backrefs can figure out what to do.
7706                  */
7707                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7708                 if (ret < 0)
7709                         goto out;
7710         }
7711
7712         /* step one, make sure all of the backrefs agree */
7713         ret = verify_backrefs(info, &path, rec);
7714         if (ret < 0)
7715                 goto out;
7716
7717         trans = btrfs_start_transaction(info->extent_root, 1);
7718         if (IS_ERR(trans)) {
7719                 ret = PTR_ERR(trans);
7720                 goto out;
7721         }
7722
7723         /* step two, delete all the existing records */
7724         ret = delete_extent_records(trans, info->extent_root, &path,
7725                                     rec->start, rec->max_size);
7726
7727         if (ret < 0)
7728                 goto out;
7729
7730         /* was this block corrupt?  If so, don't add references to it */
7731         cache = lookup_cache_extent(info->corrupt_blocks,
7732                                     rec->start, rec->max_size);
7733         if (cache) {
7734                 ret = 0;
7735                 goto out;
7736         }
7737
7738         /* step three, recreate all the refs we did find */
7739         while(cur != &rec->backrefs) {
7740                 back = to_extent_backref(cur);
7741                 cur = cur->next;
7742
7743                 /*
7744                  * if we didn't find any references, don't create a
7745                  * new extent record
7746                  */
7747                 if (!back->found_ref)
7748                         continue;
7749
7750                 rec->bad_full_backref = 0;
7751                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7752                 allocated = 1;
7753
7754                 if (ret)
7755                         goto out;
7756         }
7757 out:
7758         if (trans) {
7759                 int err = btrfs_commit_transaction(trans, info->extent_root);
7760                 if (!ret)
7761                         ret = err;
7762         }
7763
7764         btrfs_release_path(&path);
7765         return ret;
7766 }
7767
7768 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7769                               struct extent_record *rec)
7770 {
7771         struct btrfs_trans_handle *trans;
7772         struct btrfs_root *root = fs_info->extent_root;
7773         struct btrfs_path path;
7774         struct btrfs_extent_item *ei;
7775         struct btrfs_key key;
7776         u64 flags;
7777         int ret = 0;
7778
7779         key.objectid = rec->start;
7780         if (rec->metadata) {
7781                 key.type = BTRFS_METADATA_ITEM_KEY;
7782                 key.offset = rec->info_level;
7783         } else {
7784                 key.type = BTRFS_EXTENT_ITEM_KEY;
7785                 key.offset = rec->max_size;
7786         }
7787
7788         trans = btrfs_start_transaction(root, 0);
7789         if (IS_ERR(trans))
7790                 return PTR_ERR(trans);
7791
7792         btrfs_init_path(&path);
7793         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7794         if (ret < 0) {
7795                 btrfs_release_path(&path);
7796                 btrfs_commit_transaction(trans, root);
7797                 return ret;
7798         } else if (ret) {
7799                 fprintf(stderr, "Didn't find extent for %llu\n",
7800                         (unsigned long long)rec->start);
7801                 btrfs_release_path(&path);
7802                 btrfs_commit_transaction(trans, root);
7803                 return -ENOENT;
7804         }
7805
7806         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7807                             struct btrfs_extent_item);
7808         flags = btrfs_extent_flags(path.nodes[0], ei);
7809         if (rec->flag_block_full_backref) {
7810                 fprintf(stderr, "setting full backref on %llu\n",
7811                         (unsigned long long)key.objectid);
7812                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7813         } else {
7814                 fprintf(stderr, "clearing full backref on %llu\n",
7815                         (unsigned long long)key.objectid);
7816                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7817         }
7818         btrfs_set_extent_flags(path.nodes[0], ei, flags);
7819         btrfs_mark_buffer_dirty(path.nodes[0]);
7820         btrfs_release_path(&path);
7821         return btrfs_commit_transaction(trans, root);
7822 }
7823
7824 /* right now we only prune from the extent allocation tree */
7825 static int prune_one_block(struct btrfs_trans_handle *trans,
7826                            struct btrfs_fs_info *info,
7827                            struct btrfs_corrupt_block *corrupt)
7828 {
7829         int ret;
7830         struct btrfs_path path;
7831         struct extent_buffer *eb;
7832         u64 found;
7833         int slot;
7834         int nritems;
7835         int level = corrupt->level + 1;
7836
7837         btrfs_init_path(&path);
7838 again:
7839         /* we want to stop at the parent to our busted block */
7840         path.lowest_level = level;
7841
7842         ret = btrfs_search_slot(trans, info->extent_root,
7843                                 &corrupt->key, &path, -1, 1);
7844
7845         if (ret < 0)
7846                 goto out;
7847
7848         eb = path.nodes[level];
7849         if (!eb) {
7850                 ret = -ENOENT;
7851                 goto out;
7852         }
7853
7854         /*
7855          * hopefully the search gave us the block we want to prune,
7856          * lets try that first
7857          */
7858         slot = path.slots[level];
7859         found =  btrfs_node_blockptr(eb, slot);
7860         if (found == corrupt->cache.start)
7861                 goto del_ptr;
7862
7863         nritems = btrfs_header_nritems(eb);
7864
7865         /* the search failed, lets scan this node and hope we find it */
7866         for (slot = 0; slot < nritems; slot++) {
7867                 found =  btrfs_node_blockptr(eb, slot);
7868                 if (found == corrupt->cache.start)
7869                         goto del_ptr;
7870         }
7871         /*
7872          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7873          * to this block
7874          */
7875         if (eb == info->extent_root->node) {
7876                 ret = -ENOENT;
7877                 goto out;
7878         } else {
7879                 level++;
7880                 btrfs_release_path(&path);
7881                 goto again;
7882         }
7883
7884 del_ptr:
7885         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7886         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7887
7888 out:
7889         btrfs_release_path(&path);
7890         return ret;
7891 }
7892
7893 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7894 {
7895         struct btrfs_trans_handle *trans = NULL;
7896         struct cache_extent *cache;
7897         struct btrfs_corrupt_block *corrupt;
7898
7899         while (1) {
7900                 cache = search_cache_extent(info->corrupt_blocks, 0);
7901                 if (!cache)
7902                         break;
7903                 if (!trans) {
7904                         trans = btrfs_start_transaction(info->extent_root, 1);
7905                         if (IS_ERR(trans))
7906                                 return PTR_ERR(trans);
7907                 }
7908                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7909                 prune_one_block(trans, info, corrupt);
7910                 remove_cache_extent(info->corrupt_blocks, cache);
7911         }
7912         if (trans)
7913                 return btrfs_commit_transaction(trans, info->extent_root);
7914         return 0;
7915 }
7916
7917 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7918 {
7919         struct btrfs_block_group_cache *cache;
7920         u64 start, end;
7921         int ret;
7922
7923         while (1) {
7924                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7925                                             &start, &end, EXTENT_DIRTY);
7926                 if (ret)
7927                         break;
7928                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7929                                    GFP_NOFS);
7930         }
7931
7932         start = 0;
7933         while (1) {
7934                 cache = btrfs_lookup_first_block_group(fs_info, start);
7935                 if (!cache)
7936                         break;
7937                 if (cache->cached)
7938                         cache->cached = 0;
7939                 start = cache->key.objectid + cache->key.offset;
7940         }
7941 }
7942
7943 static int check_extent_refs(struct btrfs_root *root,
7944                              struct cache_tree *extent_cache)
7945 {
7946         struct extent_record *rec;
7947         struct cache_extent *cache;
7948         int err = 0;
7949         int ret = 0;
7950         int fixed = 0;
7951         int had_dups = 0;
7952         int recorded = 0;
7953
7954         if (repair) {
7955                 /*
7956                  * if we're doing a repair, we have to make sure
7957                  * we don't allocate from the problem extents.
7958                  * In the worst case, this will be all the
7959                  * extents in the FS
7960                  */
7961                 cache = search_cache_extent(extent_cache, 0);
7962                 while(cache) {
7963                         rec = container_of(cache, struct extent_record, cache);
7964                         set_extent_dirty(root->fs_info->excluded_extents,
7965                                          rec->start,
7966                                          rec->start + rec->max_size - 1,
7967                                          GFP_NOFS);
7968                         cache = next_cache_extent(cache);
7969                 }
7970
7971                 /* pin down all the corrupted blocks too */
7972                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7973                 while(cache) {
7974                         set_extent_dirty(root->fs_info->excluded_extents,
7975                                          cache->start,
7976                                          cache->start + cache->size - 1,
7977                                          GFP_NOFS);
7978                         cache = next_cache_extent(cache);
7979                 }
7980                 prune_corrupt_blocks(root->fs_info);
7981                 reset_cached_block_groups(root->fs_info);
7982         }
7983
7984         reset_cached_block_groups(root->fs_info);
7985
7986         /*
7987          * We need to delete any duplicate entries we find first otherwise we
7988          * could mess up the extent tree when we have backrefs that actually
7989          * belong to a different extent item and not the weird duplicate one.
7990          */
7991         while (repair && !list_empty(&duplicate_extents)) {
7992                 rec = to_extent_record(duplicate_extents.next);
7993                 list_del_init(&rec->list);
7994
7995                 /* Sometimes we can find a backref before we find an actual
7996                  * extent, so we need to process it a little bit to see if there
7997                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7998                  * if this is a backref screwup.  If we need to delete stuff
7999                  * process_duplicates() will return 0, otherwise it will return
8000                  * 1 and we
8001                  */
8002                 if (process_duplicates(root, extent_cache, rec))
8003                         continue;
8004                 ret = delete_duplicate_records(root, rec);
8005                 if (ret < 0)
8006                         return ret;
8007                 /*
8008                  * delete_duplicate_records will return the number of entries
8009                  * deleted, so if it's greater than 0 then we know we actually
8010                  * did something and we need to remove.
8011                  */
8012                 if (ret)
8013                         had_dups = 1;
8014         }
8015
8016         if (had_dups)
8017                 return -EAGAIN;
8018
8019         while(1) {
8020                 int cur_err = 0;
8021
8022                 fixed = 0;
8023                 recorded = 0;
8024                 cache = search_cache_extent(extent_cache, 0);
8025                 if (!cache)
8026                         break;
8027                 rec = container_of(cache, struct extent_record, cache);
8028                 if (rec->num_duplicates) {
8029                         fprintf(stderr, "extent item %llu has multiple extent "
8030                                 "items\n", (unsigned long long)rec->start);
8031                         err = 1;
8032                         cur_err = 1;
8033                 }
8034
8035                 if (rec->refs != rec->extent_item_refs) {
8036                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8037                                 (unsigned long long)rec->start,
8038                                 (unsigned long long)rec->nr);
8039                         fprintf(stderr, "extent item %llu, found %llu\n",
8040                                 (unsigned long long)rec->extent_item_refs,
8041                                 (unsigned long long)rec->refs);
8042                         ret = record_orphan_data_extents(root->fs_info, rec);
8043                         if (ret < 0)
8044                                 goto repair_abort;
8045                         if (ret == 0) {
8046                                 recorded = 1;
8047                         } else {
8048                                 /*
8049                                  * we can't use the extent to repair file
8050                                  * extent, let the fallback method handle it.
8051                                  */
8052                                 if (!fixed && repair) {
8053                                         ret = fixup_extent_refs(
8054                                                         root->fs_info,
8055                                                         extent_cache, rec);
8056                                         if (ret)
8057                                                 goto repair_abort;
8058                                         fixed = 1;
8059                                 }
8060                         }
8061                         err = 1;
8062                         cur_err = 1;
8063                 }
8064                 if (all_backpointers_checked(rec, 1)) {
8065                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8066                                 (unsigned long long)rec->start,
8067                                 (unsigned long long)rec->nr);
8068
8069                         if (!fixed && !recorded && repair) {
8070                                 ret = fixup_extent_refs(root->fs_info,
8071                                                         extent_cache, rec);
8072                                 if (ret)
8073                                         goto repair_abort;
8074                                 fixed = 1;
8075                         }
8076                         cur_err = 1;
8077                         err = 1;
8078                 }
8079                 if (!rec->owner_ref_checked) {
8080                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8081                                 (unsigned long long)rec->start,
8082                                 (unsigned long long)rec->nr);
8083                         if (!fixed && !recorded && repair) {
8084                                 ret = fixup_extent_refs(root->fs_info,
8085                                                         extent_cache, rec);
8086                                 if (ret)
8087                                         goto repair_abort;
8088                                 fixed = 1;
8089                         }
8090                         err = 1;
8091                         cur_err = 1;
8092                 }
8093                 if (rec->bad_full_backref) {
8094                         fprintf(stderr, "bad full backref, on [%llu]\n",
8095                                 (unsigned long long)rec->start);
8096                         if (repair) {
8097                                 ret = fixup_extent_flags(root->fs_info, rec);
8098                                 if (ret)
8099                                         goto repair_abort;
8100                                 fixed = 1;
8101                         }
8102                         err = 1;
8103                         cur_err = 1;
8104                 }
8105                 /*
8106                  * Although it's not a extent ref's problem, we reuse this
8107                  * routine for error reporting.
8108                  * No repair function yet.
8109                  */
8110                 if (rec->crossing_stripes) {
8111                         fprintf(stderr,
8112                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8113                                 rec->start, rec->start + rec->max_size);
8114                         err = 1;
8115                         cur_err = 1;
8116                 }
8117
8118                 if (rec->wrong_chunk_type) {
8119                         fprintf(stderr,
8120                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8121                                 rec->start, rec->start + rec->max_size);
8122                         err = 1;
8123                         cur_err = 1;
8124                 }
8125
8126                 remove_cache_extent(extent_cache, cache);
8127                 free_all_extent_backrefs(rec);
8128                 if (!init_extent_tree && repair && (!cur_err || fixed))
8129                         clear_extent_dirty(root->fs_info->excluded_extents,
8130                                            rec->start,
8131                                            rec->start + rec->max_size - 1,
8132                                            GFP_NOFS);
8133                 free(rec);
8134         }
8135 repair_abort:
8136         if (repair) {
8137                 if (ret && ret != -EAGAIN) {
8138                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8139                         exit(1);
8140                 } else if (!ret) {
8141                         struct btrfs_trans_handle *trans;
8142
8143                         root = root->fs_info->extent_root;
8144                         trans = btrfs_start_transaction(root, 1);
8145                         if (IS_ERR(trans)) {
8146                                 ret = PTR_ERR(trans);
8147                                 goto repair_abort;
8148                         }
8149
8150                         btrfs_fix_block_accounting(trans, root);
8151                         ret = btrfs_commit_transaction(trans, root);
8152                         if (ret)
8153                                 goto repair_abort;
8154                 }
8155                 if (err)
8156                         fprintf(stderr, "repaired damaged extent references\n");
8157                 return ret;
8158         }
8159         return err;
8160 }
8161
8162 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8163 {
8164         u64 stripe_size;
8165
8166         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8167                 stripe_size = length;
8168                 stripe_size /= num_stripes;
8169         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8170                 stripe_size = length * 2;
8171                 stripe_size /= num_stripes;
8172         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8173                 stripe_size = length;
8174                 stripe_size /= (num_stripes - 1);
8175         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8176                 stripe_size = length;
8177                 stripe_size /= (num_stripes - 2);
8178         } else {
8179                 stripe_size = length;
8180         }
8181         return stripe_size;
8182 }
8183
8184 /*
8185  * Check the chunk with its block group/dev list ref:
8186  * Return 0 if all refs seems valid.
8187  * Return 1 if part of refs seems valid, need later check for rebuild ref
8188  * like missing block group and needs to search extent tree to rebuild them.
8189  * Return -1 if essential refs are missing and unable to rebuild.
8190  */
8191 static int check_chunk_refs(struct chunk_record *chunk_rec,
8192                             struct block_group_tree *block_group_cache,
8193                             struct device_extent_tree *dev_extent_cache,
8194                             int silent)
8195 {
8196         struct cache_extent *block_group_item;
8197         struct block_group_record *block_group_rec;
8198         struct cache_extent *dev_extent_item;
8199         struct device_extent_record *dev_extent_rec;
8200         u64 devid;
8201         u64 offset;
8202         u64 length;
8203         int metadump_v2 = 0;
8204         int i;
8205         int ret = 0;
8206
8207         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8208                                                chunk_rec->offset,
8209                                                chunk_rec->length);
8210         if (block_group_item) {
8211                 block_group_rec = container_of(block_group_item,
8212                                                struct block_group_record,
8213                                                cache);
8214                 if (chunk_rec->length != block_group_rec->offset ||
8215                     chunk_rec->offset != block_group_rec->objectid ||
8216                     (!metadump_v2 &&
8217                      chunk_rec->type_flags != block_group_rec->flags)) {
8218                         if (!silent)
8219                                 fprintf(stderr,
8220                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8221                                         chunk_rec->objectid,
8222                                         chunk_rec->type,
8223                                         chunk_rec->offset,
8224                                         chunk_rec->length,
8225                                         chunk_rec->offset,
8226                                         chunk_rec->type_flags,
8227                                         block_group_rec->objectid,
8228                                         block_group_rec->type,
8229                                         block_group_rec->offset,
8230                                         block_group_rec->offset,
8231                                         block_group_rec->objectid,
8232                                         block_group_rec->flags);
8233                         ret = -1;
8234                 } else {
8235                         list_del_init(&block_group_rec->list);
8236                         chunk_rec->bg_rec = block_group_rec;
8237                 }
8238         } else {
8239                 if (!silent)
8240                         fprintf(stderr,
8241                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8242                                 chunk_rec->objectid,
8243                                 chunk_rec->type,
8244                                 chunk_rec->offset,
8245                                 chunk_rec->length,
8246                                 chunk_rec->offset,
8247                                 chunk_rec->type_flags);
8248                 ret = 1;
8249         }
8250
8251         if (metadump_v2)
8252                 return ret;
8253
8254         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8255                                     chunk_rec->num_stripes);
8256         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8257                 devid = chunk_rec->stripes[i].devid;
8258                 offset = chunk_rec->stripes[i].offset;
8259                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8260                                                        devid, offset, length);
8261                 if (dev_extent_item) {
8262                         dev_extent_rec = container_of(dev_extent_item,
8263                                                 struct device_extent_record,
8264                                                 cache);
8265                         if (dev_extent_rec->objectid != devid ||
8266                             dev_extent_rec->offset != offset ||
8267                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8268                             dev_extent_rec->length != length) {
8269                                 if (!silent)
8270                                         fprintf(stderr,
8271                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8272                                                 chunk_rec->objectid,
8273                                                 chunk_rec->type,
8274                                                 chunk_rec->offset,
8275                                                 chunk_rec->stripes[i].devid,
8276                                                 chunk_rec->stripes[i].offset,
8277                                                 dev_extent_rec->objectid,
8278                                                 dev_extent_rec->offset,
8279                                                 dev_extent_rec->length);
8280                                 ret = -1;
8281                         } else {
8282                                 list_move(&dev_extent_rec->chunk_list,
8283                                           &chunk_rec->dextents);
8284                         }
8285                 } else {
8286                         if (!silent)
8287                                 fprintf(stderr,
8288                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8289                                         chunk_rec->objectid,
8290                                         chunk_rec->type,
8291                                         chunk_rec->offset,
8292                                         chunk_rec->stripes[i].devid,
8293                                         chunk_rec->stripes[i].offset);
8294                         ret = -1;
8295                 }
8296         }
8297         return ret;
8298 }
8299
8300 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8301 int check_chunks(struct cache_tree *chunk_cache,
8302                  struct block_group_tree *block_group_cache,
8303                  struct device_extent_tree *dev_extent_cache,
8304                  struct list_head *good, struct list_head *bad,
8305                  struct list_head *rebuild, int silent)
8306 {
8307         struct cache_extent *chunk_item;
8308         struct chunk_record *chunk_rec;
8309         struct block_group_record *bg_rec;
8310         struct device_extent_record *dext_rec;
8311         int err;
8312         int ret = 0;
8313
8314         chunk_item = first_cache_extent(chunk_cache);
8315         while (chunk_item) {
8316                 chunk_rec = container_of(chunk_item, struct chunk_record,
8317                                          cache);
8318                 err = check_chunk_refs(chunk_rec, block_group_cache,
8319                                        dev_extent_cache, silent);
8320                 if (err < 0)
8321                         ret = err;
8322                 if (err == 0 && good)
8323                         list_add_tail(&chunk_rec->list, good);
8324                 if (err > 0 && rebuild)
8325                         list_add_tail(&chunk_rec->list, rebuild);
8326                 if (err < 0 && bad)
8327                         list_add_tail(&chunk_rec->list, bad);
8328                 chunk_item = next_cache_extent(chunk_item);
8329         }
8330
8331         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8332                 if (!silent)
8333                         fprintf(stderr,
8334                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8335                                 bg_rec->objectid,
8336                                 bg_rec->offset,
8337                                 bg_rec->flags);
8338                 if (!ret)
8339                         ret = 1;
8340         }
8341
8342         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8343                             chunk_list) {
8344                 if (!silent)
8345                         fprintf(stderr,
8346                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8347                                 dext_rec->objectid,
8348                                 dext_rec->offset,
8349                                 dext_rec->length);
8350                 if (!ret)
8351                         ret = 1;
8352         }
8353         return ret;
8354 }
8355
8356
8357 static int check_device_used(struct device_record *dev_rec,
8358                              struct device_extent_tree *dext_cache)
8359 {
8360         struct cache_extent *cache;
8361         struct device_extent_record *dev_extent_rec;
8362         u64 total_byte = 0;
8363
8364         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8365         while (cache) {
8366                 dev_extent_rec = container_of(cache,
8367                                               struct device_extent_record,
8368                                               cache);
8369                 if (dev_extent_rec->objectid != dev_rec->devid)
8370                         break;
8371
8372                 list_del_init(&dev_extent_rec->device_list);
8373                 total_byte += dev_extent_rec->length;
8374                 cache = next_cache_extent(cache);
8375         }
8376
8377         if (total_byte != dev_rec->byte_used) {
8378                 fprintf(stderr,
8379                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8380                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8381                         dev_rec->type, dev_rec->offset);
8382                 return -1;
8383         } else {
8384                 return 0;
8385         }
8386 }
8387
8388 /* check btrfs_dev_item -> btrfs_dev_extent */
8389 static int check_devices(struct rb_root *dev_cache,
8390                          struct device_extent_tree *dev_extent_cache)
8391 {
8392         struct rb_node *dev_node;
8393         struct device_record *dev_rec;
8394         struct device_extent_record *dext_rec;
8395         int err;
8396         int ret = 0;
8397
8398         dev_node = rb_first(dev_cache);
8399         while (dev_node) {
8400                 dev_rec = container_of(dev_node, struct device_record, node);
8401                 err = check_device_used(dev_rec, dev_extent_cache);
8402                 if (err)
8403                         ret = err;
8404
8405                 dev_node = rb_next(dev_node);
8406         }
8407         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8408                             device_list) {
8409                 fprintf(stderr,
8410                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8411                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8412                 if (!ret)
8413                         ret = 1;
8414         }
8415         return ret;
8416 }
8417
8418 static int add_root_item_to_list(struct list_head *head,
8419                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8420                                   u8 level, u8 drop_level,
8421                                   int level_size, struct btrfs_key *drop_key)
8422 {
8423
8424         struct root_item_record *ri_rec;
8425         ri_rec = malloc(sizeof(*ri_rec));
8426         if (!ri_rec)
8427                 return -ENOMEM;
8428         ri_rec->bytenr = bytenr;
8429         ri_rec->objectid = objectid;
8430         ri_rec->level = level;
8431         ri_rec->level_size = level_size;
8432         ri_rec->drop_level = drop_level;
8433         ri_rec->last_snapshot = last_snapshot;
8434         if (drop_key)
8435                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8436         list_add_tail(&ri_rec->list, head);
8437
8438         return 0;
8439 }
8440
8441 static void free_root_item_list(struct list_head *list)
8442 {
8443         struct root_item_record *ri_rec;
8444
8445         while (!list_empty(list)) {
8446                 ri_rec = list_first_entry(list, struct root_item_record,
8447                                           list);
8448                 list_del_init(&ri_rec->list);
8449                 free(ri_rec);
8450         }
8451 }
8452
8453 static int deal_root_from_list(struct list_head *list,
8454                                struct btrfs_root *root,
8455                                struct block_info *bits,
8456                                int bits_nr,
8457                                struct cache_tree *pending,
8458                                struct cache_tree *seen,
8459                                struct cache_tree *reada,
8460                                struct cache_tree *nodes,
8461                                struct cache_tree *extent_cache,
8462                                struct cache_tree *chunk_cache,
8463                                struct rb_root *dev_cache,
8464                                struct block_group_tree *block_group_cache,
8465                                struct device_extent_tree *dev_extent_cache)
8466 {
8467         int ret = 0;
8468         u64 last;
8469
8470         while (!list_empty(list)) {
8471                 struct root_item_record *rec;
8472                 struct extent_buffer *buf;
8473                 rec = list_entry(list->next,
8474                                  struct root_item_record, list);
8475                 last = 0;
8476                 buf = read_tree_block(root->fs_info->tree_root,
8477                                       rec->bytenr, rec->level_size, 0);
8478                 if (!extent_buffer_uptodate(buf)) {
8479                         free_extent_buffer(buf);
8480                         ret = -EIO;
8481                         break;
8482                 }
8483                 ret = add_root_to_pending(buf, extent_cache, pending,
8484                                     seen, nodes, rec->objectid);
8485                 if (ret < 0)
8486                         break;
8487                 /*
8488                  * To rebuild extent tree, we need deal with snapshot
8489                  * one by one, otherwise we deal with node firstly which
8490                  * can maximize readahead.
8491                  */
8492                 while (1) {
8493                         ret = run_next_block(root, bits, bits_nr, &last,
8494                                              pending, seen, reada, nodes,
8495                                              extent_cache, chunk_cache,
8496                                              dev_cache, block_group_cache,
8497                                              dev_extent_cache, rec);
8498                         if (ret != 0)
8499                                 break;
8500                 }
8501                 free_extent_buffer(buf);
8502                 list_del(&rec->list);
8503                 free(rec);
8504                 if (ret < 0)
8505                         break;
8506         }
8507         while (ret >= 0) {
8508                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8509                                      reada, nodes, extent_cache, chunk_cache,
8510                                      dev_cache, block_group_cache,
8511                                      dev_extent_cache, NULL);
8512                 if (ret != 0) {
8513                         if (ret > 0)
8514                                 ret = 0;
8515                         break;
8516                 }
8517         }
8518         return ret;
8519 }
8520
8521 static int check_chunks_and_extents(struct btrfs_root *root)
8522 {
8523         struct rb_root dev_cache;
8524         struct cache_tree chunk_cache;
8525         struct block_group_tree block_group_cache;
8526         struct device_extent_tree dev_extent_cache;
8527         struct cache_tree extent_cache;
8528         struct cache_tree seen;
8529         struct cache_tree pending;
8530         struct cache_tree reada;
8531         struct cache_tree nodes;
8532         struct extent_io_tree excluded_extents;
8533         struct cache_tree corrupt_blocks;
8534         struct btrfs_path path;
8535         struct btrfs_key key;
8536         struct btrfs_key found_key;
8537         int ret, err = 0;
8538         struct block_info *bits;
8539         int bits_nr;
8540         struct extent_buffer *leaf;
8541         int slot;
8542         struct btrfs_root_item ri;
8543         struct list_head dropping_trees;
8544         struct list_head normal_trees;
8545         struct btrfs_root *root1;
8546         u64 objectid;
8547         u32 level_size;
8548         u8 level;
8549
8550         dev_cache = RB_ROOT;
8551         cache_tree_init(&chunk_cache);
8552         block_group_tree_init(&block_group_cache);
8553         device_extent_tree_init(&dev_extent_cache);
8554
8555         cache_tree_init(&extent_cache);
8556         cache_tree_init(&seen);
8557         cache_tree_init(&pending);
8558         cache_tree_init(&nodes);
8559         cache_tree_init(&reada);
8560         cache_tree_init(&corrupt_blocks);
8561         extent_io_tree_init(&excluded_extents);
8562         INIT_LIST_HEAD(&dropping_trees);
8563         INIT_LIST_HEAD(&normal_trees);
8564
8565         if (repair) {
8566                 root->fs_info->excluded_extents = &excluded_extents;
8567                 root->fs_info->fsck_extent_cache = &extent_cache;
8568                 root->fs_info->free_extent_hook = free_extent_hook;
8569                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8570         }
8571
8572         bits_nr = 1024;
8573         bits = malloc(bits_nr * sizeof(struct block_info));
8574         if (!bits) {
8575                 perror("malloc");
8576                 exit(1);
8577         }
8578
8579         if (ctx.progress_enabled) {
8580                 ctx.tp = TASK_EXTENTS;
8581                 task_start(ctx.info);
8582         }
8583
8584 again:
8585         root1 = root->fs_info->tree_root;
8586         level = btrfs_header_level(root1->node);
8587         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8588                                     root1->node->start, 0, level, 0,
8589                                     root1->nodesize, NULL);
8590         if (ret < 0)
8591                 goto out;
8592         root1 = root->fs_info->chunk_root;
8593         level = btrfs_header_level(root1->node);
8594         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8595                                     root1->node->start, 0, level, 0,
8596                                     root1->nodesize, NULL);
8597         if (ret < 0)
8598                 goto out;
8599         btrfs_init_path(&path);
8600         key.offset = 0;
8601         key.objectid = 0;
8602         key.type = BTRFS_ROOT_ITEM_KEY;
8603         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8604                                         &key, &path, 0, 0);
8605         if (ret < 0)
8606                 goto out;
8607         while(1) {
8608                 leaf = path.nodes[0];
8609                 slot = path.slots[0];
8610                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8611                         ret = btrfs_next_leaf(root, &path);
8612                         if (ret != 0)
8613                                 break;
8614                         leaf = path.nodes[0];
8615                         slot = path.slots[0];
8616                 }
8617                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8618                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8619                         unsigned long offset;
8620                         u64 last_snapshot;
8621
8622                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8623                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8624                         last_snapshot = btrfs_root_last_snapshot(&ri);
8625                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8626                                 level = btrfs_root_level(&ri);
8627                                 level_size = root->nodesize;
8628                                 ret = add_root_item_to_list(&normal_trees,
8629                                                 found_key.objectid,
8630                                                 btrfs_root_bytenr(&ri),
8631                                                 last_snapshot, level,
8632                                                 0, level_size, NULL);
8633                                 if (ret < 0)
8634                                         goto out;
8635                         } else {
8636                                 level = btrfs_root_level(&ri);
8637                                 level_size = root->nodesize;
8638                                 objectid = found_key.objectid;
8639                                 btrfs_disk_key_to_cpu(&found_key,
8640                                                       &ri.drop_progress);
8641                                 ret = add_root_item_to_list(&dropping_trees,
8642                                                 objectid,
8643                                                 btrfs_root_bytenr(&ri),
8644                                                 last_snapshot, level,
8645                                                 ri.drop_level,
8646                                                 level_size, &found_key);
8647                                 if (ret < 0)
8648                                         goto out;
8649                         }
8650                 }
8651                 path.slots[0]++;
8652         }
8653         btrfs_release_path(&path);
8654
8655         /*
8656          * check_block can return -EAGAIN if it fixes something, please keep
8657          * this in mind when dealing with return values from these functions, if
8658          * we get -EAGAIN we want to fall through and restart the loop.
8659          */
8660         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8661                                   &seen, &reada, &nodes, &extent_cache,
8662                                   &chunk_cache, &dev_cache, &block_group_cache,
8663                                   &dev_extent_cache);
8664         if (ret < 0) {
8665                 if (ret == -EAGAIN)
8666                         goto loop;
8667                 goto out;
8668         }
8669         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8670                                   &pending, &seen, &reada, &nodes,
8671                                   &extent_cache, &chunk_cache, &dev_cache,
8672                                   &block_group_cache, &dev_extent_cache);
8673         if (ret < 0) {
8674                 if (ret == -EAGAIN)
8675                         goto loop;
8676                 goto out;
8677         }
8678
8679         ret = check_chunks(&chunk_cache, &block_group_cache,
8680                            &dev_extent_cache, NULL, NULL, NULL, 0);
8681         if (ret) {
8682                 if (ret == -EAGAIN)
8683                         goto loop;
8684                 err = ret;
8685         }
8686
8687         ret = check_extent_refs(root, &extent_cache);
8688         if (ret < 0) {
8689                 if (ret == -EAGAIN)
8690                         goto loop;
8691                 goto out;
8692         }
8693
8694         ret = check_devices(&dev_cache, &dev_extent_cache);
8695         if (ret && err)
8696                 ret = err;
8697
8698 out:
8699         task_stop(ctx.info);
8700         if (repair) {
8701                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8702                 extent_io_tree_cleanup(&excluded_extents);
8703                 root->fs_info->fsck_extent_cache = NULL;
8704                 root->fs_info->free_extent_hook = NULL;
8705                 root->fs_info->corrupt_blocks = NULL;
8706                 root->fs_info->excluded_extents = NULL;
8707         }
8708         free(bits);
8709         free_chunk_cache_tree(&chunk_cache);
8710         free_device_cache_tree(&dev_cache);
8711         free_block_group_tree(&block_group_cache);
8712         free_device_extent_tree(&dev_extent_cache);
8713         free_extent_cache_tree(&seen);
8714         free_extent_cache_tree(&pending);
8715         free_extent_cache_tree(&reada);
8716         free_extent_cache_tree(&nodes);
8717         return ret;
8718 loop:
8719         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8720         free_extent_cache_tree(&seen);
8721         free_extent_cache_tree(&pending);
8722         free_extent_cache_tree(&reada);
8723         free_extent_cache_tree(&nodes);
8724         free_chunk_cache_tree(&chunk_cache);
8725         free_block_group_tree(&block_group_cache);
8726         free_device_cache_tree(&dev_cache);
8727         free_device_extent_tree(&dev_extent_cache);
8728         free_extent_record_cache(root->fs_info, &extent_cache);
8729         free_root_item_list(&normal_trees);
8730         free_root_item_list(&dropping_trees);
8731         extent_io_tree_cleanup(&excluded_extents);
8732         goto again;
8733 }
8734
8735 /*
8736  * Check backrefs of a tree block given by @bytenr or @eb.
8737  *
8738  * @root:       the root containing the @bytenr or @eb
8739  * @eb:         tree block extent buffer, can be NULL
8740  * @bytenr:     bytenr of the tree block to search
8741  * @level:      tree level of the tree block
8742  * @owner:      owner of the tree block
8743  *
8744  * Return >0 for any error found and output error message
8745  * Return 0 for no error found
8746  */
8747 static int check_tree_block_ref(struct btrfs_root *root,
8748                                 struct extent_buffer *eb, u64 bytenr,
8749                                 int level, u64 owner)
8750 {
8751         struct btrfs_key key;
8752         struct btrfs_root *extent_root = root->fs_info->extent_root;
8753         struct btrfs_path path;
8754         struct btrfs_extent_item *ei;
8755         struct btrfs_extent_inline_ref *iref;
8756         struct extent_buffer *leaf;
8757         unsigned long end;
8758         unsigned long ptr;
8759         int slot;
8760         int skinny_level;
8761         int type;
8762         u32 nodesize = root->nodesize;
8763         u32 item_size;
8764         u64 offset;
8765         int found_ref = 0;
8766         int err = 0;
8767         int ret;
8768
8769         btrfs_init_path(&path);
8770         key.objectid = bytenr;
8771         if (btrfs_fs_incompat(root->fs_info,
8772                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8773                 key.type = BTRFS_METADATA_ITEM_KEY;
8774         else
8775                 key.type = BTRFS_EXTENT_ITEM_KEY;
8776         key.offset = (u64)-1;
8777
8778         /* Search for the backref in extent tree */
8779         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8780         if (ret < 0) {
8781                 err |= BACKREF_MISSING;
8782                 goto out;
8783         }
8784         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8785         if (ret) {
8786                 err |= BACKREF_MISSING;
8787                 goto out;
8788         }
8789
8790         leaf = path.nodes[0];
8791         slot = path.slots[0];
8792         btrfs_item_key_to_cpu(leaf, &key, slot);
8793
8794         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8795
8796         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8797                 skinny_level = (int)key.offset;
8798                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8799         } else {
8800                 struct btrfs_tree_block_info *info;
8801
8802                 info = (struct btrfs_tree_block_info *)(ei + 1);
8803                 skinny_level = btrfs_tree_block_level(leaf, info);
8804                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8805         }
8806
8807         if (eb) {
8808                 u64 header_gen;
8809                 u64 extent_gen;
8810
8811                 if (!(btrfs_extent_flags(leaf, ei) &
8812                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8813                         error(
8814                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8815                                 key.objectid, nodesize,
8816                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8817                         err = BACKREF_MISMATCH;
8818                 }
8819                 header_gen = btrfs_header_generation(eb);
8820                 extent_gen = btrfs_extent_generation(leaf, ei);
8821                 if (header_gen != extent_gen) {
8822                         error(
8823         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8824                                 key.objectid, nodesize, header_gen,
8825                                 extent_gen);
8826                         err = BACKREF_MISMATCH;
8827                 }
8828                 if (level != skinny_level) {
8829                         error(
8830                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8831                                 key.objectid, nodesize, level, skinny_level);
8832                         err = BACKREF_MISMATCH;
8833                 }
8834                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8835                         error(
8836                         "extent[%llu %u] is referred by other roots than %llu",
8837                                 key.objectid, nodesize, root->objectid);
8838                         err = BACKREF_MISMATCH;
8839                 }
8840         }
8841
8842         /*
8843          * Iterate the extent/metadata item to find the exact backref
8844          */
8845         item_size = btrfs_item_size_nr(leaf, slot);
8846         ptr = (unsigned long)iref;
8847         end = (unsigned long)ei + item_size;
8848         while (ptr < end) {
8849                 iref = (struct btrfs_extent_inline_ref *)ptr;
8850                 type = btrfs_extent_inline_ref_type(leaf, iref);
8851                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8852
8853                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8854                         (offset == root->objectid || offset == owner)) {
8855                         found_ref = 1;
8856                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8857                         /* Check if the backref points to valid referencer */
8858                         found_ref = !check_tree_block_ref(root, NULL, offset,
8859                                                           level + 1, owner);
8860                 }
8861
8862                 if (found_ref)
8863                         break;
8864                 ptr += btrfs_extent_inline_ref_size(type);
8865         }
8866
8867         /*
8868          * Inlined extent item doesn't have what we need, check
8869          * TREE_BLOCK_REF_KEY
8870          */
8871         if (!found_ref) {
8872                 btrfs_release_path(&path);
8873                 key.objectid = bytenr;
8874                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8875                 key.offset = root->objectid;
8876
8877                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8878                 if (!ret)
8879                         found_ref = 1;
8880         }
8881         if (!found_ref)
8882                 err |= BACKREF_MISSING;
8883 out:
8884         btrfs_release_path(&path);
8885         if (eb && (err & BACKREF_MISSING))
8886                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8887                         bytenr, nodesize, owner, level);
8888         return err;
8889 }
8890
8891 /*
8892  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8893  *
8894  * Return >0 any error found and output error message
8895  * Return 0 for no error found
8896  */
8897 static int check_extent_data_item(struct btrfs_root *root,
8898                                   struct extent_buffer *eb, int slot)
8899 {
8900         struct btrfs_file_extent_item *fi;
8901         struct btrfs_path path;
8902         struct btrfs_root *extent_root = root->fs_info->extent_root;
8903         struct btrfs_key fi_key;
8904         struct btrfs_key dbref_key;
8905         struct extent_buffer *leaf;
8906         struct btrfs_extent_item *ei;
8907         struct btrfs_extent_inline_ref *iref;
8908         struct btrfs_extent_data_ref *dref;
8909         u64 owner;
8910         u64 file_extent_gen;
8911         u64 disk_bytenr;
8912         u64 disk_num_bytes;
8913         u64 extent_num_bytes;
8914         u64 extent_flags;
8915         u64 extent_gen;
8916         u32 item_size;
8917         unsigned long end;
8918         unsigned long ptr;
8919         int type;
8920         u64 ref_root;
8921         int found_dbackref = 0;
8922         int err = 0;
8923         int ret;
8924
8925         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8926         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8927         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8928
8929         /* Nothing to check for hole and inline data extents */
8930         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8931             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8932                 return 0;
8933
8934         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8935         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8936         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8937
8938         /* Check unaligned disk_num_bytes and num_bytes */
8939         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8940                 error(
8941 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8942                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8943                         root->sectorsize);
8944                 err |= BYTES_UNALIGNED;
8945         } else {
8946                 data_bytes_allocated += disk_num_bytes;
8947         }
8948         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8949                 error(
8950 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8951                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8952                         root->sectorsize);
8953                 err |= BYTES_UNALIGNED;
8954         } else {
8955                 data_bytes_referenced += extent_num_bytes;
8956         }
8957         owner = btrfs_header_owner(eb);
8958
8959         /* Check the extent item of the file extent in extent tree */
8960         btrfs_init_path(&path);
8961         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8962         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8963         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8964
8965         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8966         if (ret) {
8967                 err |= BACKREF_MISSING;
8968                 goto error;
8969         }
8970
8971         leaf = path.nodes[0];
8972         slot = path.slots[0];
8973         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8974
8975         extent_flags = btrfs_extent_flags(leaf, ei);
8976         extent_gen = btrfs_extent_generation(leaf, ei);
8977
8978         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8979                 error(
8980                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8981                     disk_bytenr, disk_num_bytes,
8982                     BTRFS_EXTENT_FLAG_DATA);
8983                 err |= BACKREF_MISMATCH;
8984         }
8985
8986         if (file_extent_gen < extent_gen) {
8987                 error(
8988 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8989                         disk_bytenr, disk_num_bytes, file_extent_gen,
8990                         extent_gen);
8991                 err |= BACKREF_MISMATCH;
8992         }
8993
8994         /* Check data backref inside that extent item */
8995         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8996         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8997         ptr = (unsigned long)iref;
8998         end = (unsigned long)ei + item_size;
8999         while (ptr < end) {
9000                 iref = (struct btrfs_extent_inline_ref *)ptr;
9001                 type = btrfs_extent_inline_ref_type(leaf, iref);
9002                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9003
9004                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9005                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9006                         if (ref_root == owner || ref_root == root->objectid)
9007                                 found_dbackref = 1;
9008                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9009                         found_dbackref = !check_tree_block_ref(root, NULL,
9010                                 btrfs_extent_inline_ref_offset(leaf, iref),
9011                                 0, owner);
9012                 }
9013
9014                 if (found_dbackref)
9015                         break;
9016                 ptr += btrfs_extent_inline_ref_size(type);
9017         }
9018
9019         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9020         if (!found_dbackref) {
9021                 btrfs_release_path(&path);
9022
9023                 btrfs_init_path(&path);
9024                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9025                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9026                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9027                                 fi_key.objectid, fi_key.offset);
9028
9029                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9030                                         &dbref_key, &path, 0, 0);
9031                 if (!ret)
9032                         found_dbackref = 1;
9033         }
9034
9035         if (!found_dbackref)
9036                 err |= BACKREF_MISSING;
9037 error:
9038         btrfs_release_path(&path);
9039         if (err & BACKREF_MISSING) {
9040                 error("data extent[%llu %llu] backref lost",
9041                       disk_bytenr, disk_num_bytes);
9042         }
9043         return err;
9044 }
9045
9046 /*
9047  * Get real tree block level for the case like shared block
9048  * Return >= 0 as tree level
9049  * Return <0 for error
9050  */
9051 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9052 {
9053         struct extent_buffer *eb;
9054         struct btrfs_path path;
9055         struct btrfs_key key;
9056         struct btrfs_extent_item *ei;
9057         u64 flags;
9058         u64 transid;
9059         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9060         u8 backref_level;
9061         u8 header_level;
9062         int ret;
9063
9064         /* Search extent tree for extent generation and level */
9065         key.objectid = bytenr;
9066         key.type = BTRFS_METADATA_ITEM_KEY;
9067         key.offset = (u64)-1;
9068
9069         btrfs_init_path(&path);
9070         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9071         if (ret < 0)
9072                 goto release_out;
9073         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9074         if (ret < 0)
9075                 goto release_out;
9076         if (ret > 0) {
9077                 ret = -ENOENT;
9078                 goto release_out;
9079         }
9080
9081         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9082         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9083                             struct btrfs_extent_item);
9084         flags = btrfs_extent_flags(path.nodes[0], ei);
9085         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9086                 ret = -ENOENT;
9087                 goto release_out;
9088         }
9089
9090         /* Get transid for later read_tree_block() check */
9091         transid = btrfs_extent_generation(path.nodes[0], ei);
9092
9093         /* Get backref level as one source */
9094         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9095                 backref_level = key.offset;
9096         } else {
9097                 struct btrfs_tree_block_info *info;
9098
9099                 info = (struct btrfs_tree_block_info *)(ei + 1);
9100                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9101         }
9102         btrfs_release_path(&path);
9103
9104         /* Get level from tree block as an alternative source */
9105         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9106         if (!extent_buffer_uptodate(eb)) {
9107                 free_extent_buffer(eb);
9108                 return -EIO;
9109         }
9110         header_level = btrfs_header_level(eb);
9111         free_extent_buffer(eb);
9112
9113         if (header_level != backref_level)
9114                 return -EIO;
9115         return header_level;
9116
9117 release_out:
9118         btrfs_release_path(&path);
9119         return ret;
9120 }
9121
9122 /*
9123  * Check if a tree block backref is valid (points to a valid tree block)
9124  * if level == -1, level will be resolved
9125  * Return >0 for any error found and print error message
9126  */
9127 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9128                                     u64 bytenr, int level)
9129 {
9130         struct btrfs_root *root;
9131         struct btrfs_key key;
9132         struct btrfs_path path;
9133         struct extent_buffer *eb;
9134         struct extent_buffer *node;
9135         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9136         int err = 0;
9137         int ret;
9138
9139         /* Query level for level == -1 special case */
9140         if (level == -1)
9141                 level = query_tree_block_level(fs_info, bytenr);
9142         if (level < 0) {
9143                 err |= REFERENCER_MISSING;
9144                 goto out;
9145         }
9146
9147         key.objectid = root_id;
9148         key.type = BTRFS_ROOT_ITEM_KEY;
9149         key.offset = (u64)-1;
9150
9151         root = btrfs_read_fs_root(fs_info, &key);
9152         if (IS_ERR(root)) {
9153                 err |= REFERENCER_MISSING;
9154                 goto out;
9155         }
9156
9157         /* Read out the tree block to get item/node key */
9158         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9159         if (!extent_buffer_uptodate(eb)) {
9160                 err |= REFERENCER_MISSING;
9161                 free_extent_buffer(eb);
9162                 goto out;
9163         }
9164
9165         /* Empty tree, no need to check key */
9166         if (!btrfs_header_nritems(eb) && !level) {
9167                 free_extent_buffer(eb);
9168                 goto out;
9169         }
9170
9171         if (level)
9172                 btrfs_node_key_to_cpu(eb, &key, 0);
9173         else
9174                 btrfs_item_key_to_cpu(eb, &key, 0);
9175
9176         free_extent_buffer(eb);
9177
9178         btrfs_init_path(&path);
9179         path.lowest_level = level;
9180         /* Search with the first key, to ensure we can reach it */
9181         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9182         if (ret < 0) {
9183                 err |= REFERENCER_MISSING;
9184                 goto release_out;
9185         }
9186
9187         node = path.nodes[level];
9188         if (btrfs_header_bytenr(node) != bytenr) {
9189                 error(
9190         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9191                         bytenr, nodesize, bytenr,
9192                         btrfs_header_bytenr(node));
9193                 err |= REFERENCER_MISMATCH;
9194         }
9195         if (btrfs_header_level(node) != level) {
9196                 error(
9197         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9198                         bytenr, nodesize, level,
9199                         btrfs_header_level(node));
9200                 err |= REFERENCER_MISMATCH;
9201         }
9202
9203 release_out:
9204         btrfs_release_path(&path);
9205 out:
9206         if (err & REFERENCER_MISSING) {
9207                 if (level < 0)
9208                         error("extent [%llu %d] lost referencer (owner: %llu)",
9209                                 bytenr, nodesize, root_id);
9210                 else
9211                         error(
9212                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9213                                 bytenr, nodesize, root_id, level);
9214         }
9215
9216         return err;
9217 }
9218
9219 /*
9220  * Check referencer for shared block backref
9221  * If level == -1, this function will resolve the level.
9222  */
9223 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9224                                      u64 parent, u64 bytenr, int level)
9225 {
9226         struct extent_buffer *eb;
9227         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9228         u32 nr;
9229         int found_parent = 0;
9230         int i;
9231
9232         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9233         if (!extent_buffer_uptodate(eb))
9234                 goto out;
9235
9236         if (level == -1)
9237                 level = query_tree_block_level(fs_info, bytenr);
9238         if (level < 0)
9239                 goto out;
9240
9241         if (level + 1 != btrfs_header_level(eb))
9242                 goto out;
9243
9244         nr = btrfs_header_nritems(eb);
9245         for (i = 0; i < nr; i++) {
9246                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9247                         found_parent = 1;
9248                         break;
9249                 }
9250         }
9251 out:
9252         free_extent_buffer(eb);
9253         if (!found_parent) {
9254                 error(
9255         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9256                         bytenr, nodesize, parent, level);
9257                 return REFERENCER_MISSING;
9258         }
9259         return 0;
9260 }
9261
9262 /*
9263  * Check referencer for normal (inlined) data ref
9264  * If len == 0, it will be resolved by searching in extent tree
9265  */
9266 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9267                                      u64 root_id, u64 objectid, u64 offset,
9268                                      u64 bytenr, u64 len, u32 count)
9269 {
9270         struct btrfs_root *root;
9271         struct btrfs_root *extent_root = fs_info->extent_root;
9272         struct btrfs_key key;
9273         struct btrfs_path path;
9274         struct extent_buffer *leaf;
9275         struct btrfs_file_extent_item *fi;
9276         u32 found_count = 0;
9277         int slot;
9278         int ret = 0;
9279
9280         if (!len) {
9281                 key.objectid = bytenr;
9282                 key.type = BTRFS_EXTENT_ITEM_KEY;
9283                 key.offset = (u64)-1;
9284
9285                 btrfs_init_path(&path);
9286                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9287                 if (ret < 0)
9288                         goto out;
9289                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9290                 if (ret)
9291                         goto out;
9292                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9293                 if (key.objectid != bytenr ||
9294                     key.type != BTRFS_EXTENT_ITEM_KEY)
9295                         goto out;
9296                 len = key.offset;
9297                 btrfs_release_path(&path);
9298         }
9299         key.objectid = root_id;
9300         key.type = BTRFS_ROOT_ITEM_KEY;
9301         key.offset = (u64)-1;
9302         btrfs_init_path(&path);
9303
9304         root = btrfs_read_fs_root(fs_info, &key);
9305         if (IS_ERR(root))
9306                 goto out;
9307
9308         key.objectid = objectid;
9309         key.type = BTRFS_EXTENT_DATA_KEY;
9310         /*
9311          * It can be nasty as data backref offset is
9312          * file offset - file extent offset, which is smaller or
9313          * equal to original backref offset.  The only special case is
9314          * overflow.  So we need to special check and do further search.
9315          */
9316         key.offset = offset & (1ULL << 63) ? 0 : offset;
9317
9318         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9319         if (ret < 0)
9320                 goto out;
9321
9322         /*
9323          * Search afterwards to get correct one
9324          * NOTE: As we must do a comprehensive check on the data backref to
9325          * make sure the dref count also matches, we must iterate all file
9326          * extents for that inode.
9327          */
9328         while (1) {
9329                 leaf = path.nodes[0];
9330                 slot = path.slots[0];
9331
9332                 btrfs_item_key_to_cpu(leaf, &key, slot);
9333                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9334                         break;
9335                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9336                 /*
9337                  * Except normal disk bytenr and disk num bytes, we still
9338                  * need to do extra check on dbackref offset as
9339                  * dbackref offset = file_offset - file_extent_offset
9340                  */
9341                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9342                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9343                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9344                     offset)
9345                         found_count++;
9346
9347                 ret = btrfs_next_item(root, &path);
9348                 if (ret)
9349                         break;
9350         }
9351 out:
9352         btrfs_release_path(&path);
9353         if (found_count != count) {
9354                 error(
9355 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9356                         bytenr, len, root_id, objectid, offset, count, found_count);
9357                 return REFERENCER_MISSING;
9358         }
9359         return 0;
9360 }
9361
9362 /*
9363  * Check if the referencer of a shared data backref exists
9364  */
9365 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9366                                      u64 parent, u64 bytenr)
9367 {
9368         struct extent_buffer *eb;
9369         struct btrfs_key key;
9370         struct btrfs_file_extent_item *fi;
9371         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9372         u32 nr;
9373         int found_parent = 0;
9374         int i;
9375
9376         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9377         if (!extent_buffer_uptodate(eb))
9378                 goto out;
9379
9380         nr = btrfs_header_nritems(eb);
9381         for (i = 0; i < nr; i++) {
9382                 btrfs_item_key_to_cpu(eb, &key, i);
9383                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9384                         continue;
9385
9386                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9387                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9388                         continue;
9389
9390                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9391                         found_parent = 1;
9392                         break;
9393                 }
9394         }
9395
9396 out:
9397         free_extent_buffer(eb);
9398         if (!found_parent) {
9399                 error("shared extent %llu referencer lost (parent: %llu)",
9400                         bytenr, parent);
9401                 return REFERENCER_MISSING;
9402         }
9403         return 0;
9404 }
9405
9406 /*
9407  * This function will check a given extent item, including its backref and
9408  * itself (like crossing stripe boundary and type)
9409  *
9410  * Since we don't use extent_record anymore, introduce new error bit
9411  */
9412 static int check_extent_item(struct btrfs_fs_info *fs_info,
9413                              struct extent_buffer *eb, int slot)
9414 {
9415         struct btrfs_extent_item *ei;
9416         struct btrfs_extent_inline_ref *iref;
9417         struct btrfs_extent_data_ref *dref;
9418         unsigned long end;
9419         unsigned long ptr;
9420         int type;
9421         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9422         u32 item_size = btrfs_item_size_nr(eb, slot);
9423         u64 flags;
9424         u64 offset;
9425         int metadata = 0;
9426         int level;
9427         struct btrfs_key key;
9428         int ret;
9429         int err = 0;
9430
9431         btrfs_item_key_to_cpu(eb, &key, slot);
9432         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9433                 bytes_used += key.offset;
9434         else
9435                 bytes_used += nodesize;
9436
9437         if (item_size < sizeof(*ei)) {
9438                 /*
9439                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9440                  * old thing when on disk format is still un-determined.
9441                  * No need to care about it anymore
9442                  */
9443                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9444                 return -ENOTTY;
9445         }
9446
9447         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9448         flags = btrfs_extent_flags(eb, ei);
9449
9450         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9451                 metadata = 1;
9452         if (metadata && check_crossing_stripes(global_info, key.objectid,
9453                                                eb->len)) {
9454                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9455                       key.objectid, key.objectid + nodesize);
9456                 err |= CROSSING_STRIPE_BOUNDARY;
9457         }
9458
9459         ptr = (unsigned long)(ei + 1);
9460
9461         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9462                 /* Old EXTENT_ITEM metadata */
9463                 struct btrfs_tree_block_info *info;
9464
9465                 info = (struct btrfs_tree_block_info *)ptr;
9466                 level = btrfs_tree_block_level(eb, info);
9467                 ptr += sizeof(struct btrfs_tree_block_info);
9468         } else {
9469                 /* New METADATA_ITEM */
9470                 level = key.offset;
9471         }
9472         end = (unsigned long)ei + item_size;
9473
9474         if (ptr >= end) {
9475                 err |= ITEM_SIZE_MISMATCH;
9476                 goto out;
9477         }
9478
9479         /* Now check every backref in this extent item */
9480 next:
9481         iref = (struct btrfs_extent_inline_ref *)ptr;
9482         type = btrfs_extent_inline_ref_type(eb, iref);
9483         offset = btrfs_extent_inline_ref_offset(eb, iref);
9484         switch (type) {
9485         case BTRFS_TREE_BLOCK_REF_KEY:
9486                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9487                                                level);
9488                 err |= ret;
9489                 break;
9490         case BTRFS_SHARED_BLOCK_REF_KEY:
9491                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9492                                                  level);
9493                 err |= ret;
9494                 break;
9495         case BTRFS_EXTENT_DATA_REF_KEY:
9496                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9497                 ret = check_extent_data_backref(fs_info,
9498                                 btrfs_extent_data_ref_root(eb, dref),
9499                                 btrfs_extent_data_ref_objectid(eb, dref),
9500                                 btrfs_extent_data_ref_offset(eb, dref),
9501                                 key.objectid, key.offset,
9502                                 btrfs_extent_data_ref_count(eb, dref));
9503                 err |= ret;
9504                 break;
9505         case BTRFS_SHARED_DATA_REF_KEY:
9506                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9507                 err |= ret;
9508                 break;
9509         default:
9510                 error("extent[%llu %d %llu] has unknown ref type: %d",
9511                         key.objectid, key.type, key.offset, type);
9512                 err |= UNKNOWN_TYPE;
9513                 goto out;
9514         }
9515
9516         ptr += btrfs_extent_inline_ref_size(type);
9517         if (ptr < end)
9518                 goto next;
9519
9520 out:
9521         return err;
9522 }
9523
9524 /*
9525  * Check if a dev extent item is referred correctly by its chunk
9526  */
9527 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9528                                  struct extent_buffer *eb, int slot)
9529 {
9530         struct btrfs_root *chunk_root = fs_info->chunk_root;
9531         struct btrfs_dev_extent *ptr;
9532         struct btrfs_path path;
9533         struct btrfs_key chunk_key;
9534         struct btrfs_key devext_key;
9535         struct btrfs_chunk *chunk;
9536         struct extent_buffer *l;
9537         int num_stripes;
9538         u64 length;
9539         int i;
9540         int found_chunk = 0;
9541         int ret;
9542
9543         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9544         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9545         length = btrfs_dev_extent_length(eb, ptr);
9546
9547         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9548         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9549         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9550
9551         btrfs_init_path(&path);
9552         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9553         if (ret)
9554                 goto out;
9555
9556         l = path.nodes[0];
9557         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9558         if (btrfs_chunk_length(l, chunk) != length)
9559                 goto out;
9560
9561         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9562         for (i = 0; i < num_stripes; i++) {
9563                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9564                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9565
9566                 if (devid == devext_key.objectid &&
9567                     offset == devext_key.offset) {
9568                         found_chunk = 1;
9569                         break;
9570                 }
9571         }
9572 out:
9573         btrfs_release_path(&path);
9574         if (!found_chunk) {
9575                 error(
9576                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9577                         devext_key.objectid, devext_key.offset, length);
9578                 return REFERENCER_MISSING;
9579         }
9580         return 0;
9581 }
9582
9583 /*
9584  * Check if the used space is correct with the dev item
9585  */
9586 static int check_dev_item(struct btrfs_fs_info *fs_info,
9587                           struct extent_buffer *eb, int slot)
9588 {
9589         struct btrfs_root *dev_root = fs_info->dev_root;
9590         struct btrfs_dev_item *dev_item;
9591         struct btrfs_path path;
9592         struct btrfs_key key;
9593         struct btrfs_dev_extent *ptr;
9594         u64 dev_id;
9595         u64 used;
9596         u64 total = 0;
9597         int ret;
9598
9599         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9600         dev_id = btrfs_device_id(eb, dev_item);
9601         used = btrfs_device_bytes_used(eb, dev_item);
9602
9603         key.objectid = dev_id;
9604         key.type = BTRFS_DEV_EXTENT_KEY;
9605         key.offset = 0;
9606
9607         btrfs_init_path(&path);
9608         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9609         if (ret < 0) {
9610                 btrfs_item_key_to_cpu(eb, &key, slot);
9611                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9612                         key.objectid, key.type, key.offset);
9613                 btrfs_release_path(&path);
9614                 return REFERENCER_MISSING;
9615         }
9616
9617         /* Iterate dev_extents to calculate the used space of a device */
9618         while (1) {
9619                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9620
9621                 if (key.objectid > dev_id)
9622                         break;
9623                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9624                         goto next;
9625
9626                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9627                                      struct btrfs_dev_extent);
9628                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9629 next:
9630                 ret = btrfs_next_item(dev_root, &path);
9631                 if (ret)
9632                         break;
9633         }
9634         btrfs_release_path(&path);
9635
9636         if (used != total) {
9637                 btrfs_item_key_to_cpu(eb, &key, slot);
9638                 error(
9639 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9640                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9641                         BTRFS_DEV_EXTENT_KEY, dev_id);
9642                 return ACCOUNTING_MISMATCH;
9643         }
9644         return 0;
9645 }
9646
9647 /*
9648  * Check a block group item with its referener (chunk) and its used space
9649  * with extent/metadata item
9650  */
9651 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9652                                   struct extent_buffer *eb, int slot)
9653 {
9654         struct btrfs_root *extent_root = fs_info->extent_root;
9655         struct btrfs_root *chunk_root = fs_info->chunk_root;
9656         struct btrfs_block_group_item *bi;
9657         struct btrfs_block_group_item bg_item;
9658         struct btrfs_path path;
9659         struct btrfs_key bg_key;
9660         struct btrfs_key chunk_key;
9661         struct btrfs_key extent_key;
9662         struct btrfs_chunk *chunk;
9663         struct extent_buffer *leaf;
9664         struct btrfs_extent_item *ei;
9665         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9666         u64 flags;
9667         u64 bg_flags;
9668         u64 used;
9669         u64 total = 0;
9670         int ret;
9671         int err = 0;
9672
9673         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9674         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9675         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9676         used = btrfs_block_group_used(&bg_item);
9677         bg_flags = btrfs_block_group_flags(&bg_item);
9678
9679         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9680         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9681         chunk_key.offset = bg_key.objectid;
9682
9683         btrfs_init_path(&path);
9684         /* Search for the referencer chunk */
9685         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9686         if (ret) {
9687                 error(
9688                 "block group[%llu %llu] did not find the related chunk item",
9689                         bg_key.objectid, bg_key.offset);
9690                 err |= REFERENCER_MISSING;
9691         } else {
9692                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9693                                         struct btrfs_chunk);
9694                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9695                                                 bg_key.offset) {
9696                         error(
9697         "block group[%llu %llu] related chunk item length does not match",
9698                                 bg_key.objectid, bg_key.offset);
9699                         err |= REFERENCER_MISMATCH;
9700                 }
9701         }
9702         btrfs_release_path(&path);
9703
9704         /* Search from the block group bytenr */
9705         extent_key.objectid = bg_key.objectid;
9706         extent_key.type = 0;
9707         extent_key.offset = 0;
9708
9709         btrfs_init_path(&path);
9710         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9711         if (ret < 0)
9712                 goto out;
9713
9714         /* Iterate extent tree to account used space */
9715         while (1) {
9716                 leaf = path.nodes[0];
9717                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9718                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9719                         break;
9720
9721                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9722                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9723                         goto next;
9724                 if (extent_key.objectid < bg_key.objectid)
9725                         goto next;
9726
9727                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9728                         total += nodesize;
9729                 else
9730                         total += extent_key.offset;
9731
9732                 ei = btrfs_item_ptr(leaf, path.slots[0],
9733                                     struct btrfs_extent_item);
9734                 flags = btrfs_extent_flags(leaf, ei);
9735                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9736                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9737                                 error(
9738                         "bad extent[%llu, %llu) type mismatch with chunk",
9739                                         extent_key.objectid,
9740                                         extent_key.objectid + extent_key.offset);
9741                                 err |= CHUNK_TYPE_MISMATCH;
9742                         }
9743                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9744                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9745                                     BTRFS_BLOCK_GROUP_METADATA))) {
9746                                 error(
9747                         "bad extent[%llu, %llu) type mismatch with chunk",
9748                                         extent_key.objectid,
9749                                         extent_key.objectid + nodesize);
9750                                 err |= CHUNK_TYPE_MISMATCH;
9751                         }
9752                 }
9753 next:
9754                 ret = btrfs_next_item(extent_root, &path);
9755                 if (ret)
9756                         break;
9757         }
9758
9759 out:
9760         btrfs_release_path(&path);
9761
9762         if (total != used) {
9763                 error(
9764                 "block group[%llu %llu] used %llu but extent items used %llu",
9765                         bg_key.objectid, bg_key.offset, used, total);
9766                 err |= ACCOUNTING_MISMATCH;
9767         }
9768         return err;
9769 }
9770
9771 /*
9772  * Check a chunk item.
9773  * Including checking all referred dev_extents and block group
9774  */
9775 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9776                             struct extent_buffer *eb, int slot)
9777 {
9778         struct btrfs_root *extent_root = fs_info->extent_root;
9779         struct btrfs_root *dev_root = fs_info->dev_root;
9780         struct btrfs_path path;
9781         struct btrfs_key chunk_key;
9782         struct btrfs_key bg_key;
9783         struct btrfs_key devext_key;
9784         struct btrfs_chunk *chunk;
9785         struct extent_buffer *leaf;
9786         struct btrfs_block_group_item *bi;
9787         struct btrfs_block_group_item bg_item;
9788         struct btrfs_dev_extent *ptr;
9789         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9790         u64 length;
9791         u64 chunk_end;
9792         u64 type;
9793         u64 profile;
9794         int num_stripes;
9795         u64 offset;
9796         u64 objectid;
9797         int i;
9798         int ret;
9799         int err = 0;
9800
9801         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9802         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9803         length = btrfs_chunk_length(eb, chunk);
9804         chunk_end = chunk_key.offset + length;
9805         if (!IS_ALIGNED(length, sectorsize)) {
9806                 error("chunk[%llu %llu) not aligned to %u",
9807                         chunk_key.offset, chunk_end, sectorsize);
9808                 err |= BYTES_UNALIGNED;
9809                 goto out;
9810         }
9811
9812         type = btrfs_chunk_type(eb, chunk);
9813         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9814         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9815                 error("chunk[%llu %llu) has no chunk type",
9816                         chunk_key.offset, chunk_end);
9817                 err |= UNKNOWN_TYPE;
9818         }
9819         if (profile && (profile & (profile - 1))) {
9820                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9821                         chunk_key.offset, chunk_end, profile);
9822                 err |= UNKNOWN_TYPE;
9823         }
9824
9825         bg_key.objectid = chunk_key.offset;
9826         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9827         bg_key.offset = length;
9828
9829         btrfs_init_path(&path);
9830         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9831         if (ret) {
9832                 error(
9833                 "chunk[%llu %llu) did not find the related block group item",
9834                         chunk_key.offset, chunk_end);
9835                 err |= REFERENCER_MISSING;
9836         } else{
9837                 leaf = path.nodes[0];
9838                 bi = btrfs_item_ptr(leaf, path.slots[0],
9839                                     struct btrfs_block_group_item);
9840                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9841                                    sizeof(bg_item));
9842                 if (btrfs_block_group_flags(&bg_item) != type) {
9843                         error(
9844 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9845                                 chunk_key.offset, chunk_end, type,
9846                                 btrfs_block_group_flags(&bg_item));
9847                         err |= REFERENCER_MISSING;
9848                 }
9849         }
9850
9851         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9852         for (i = 0; i < num_stripes; i++) {
9853                 btrfs_release_path(&path);
9854                 btrfs_init_path(&path);
9855                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9856                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9857                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9858
9859                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9860                                         0, 0);
9861                 if (ret)
9862                         goto not_match_dev;
9863
9864                 leaf = path.nodes[0];
9865                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9866                                      struct btrfs_dev_extent);
9867                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9868                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9869                 if (objectid != chunk_key.objectid ||
9870                     offset != chunk_key.offset ||
9871                     btrfs_dev_extent_length(leaf, ptr) != length)
9872                         goto not_match_dev;
9873                 continue;
9874 not_match_dev:
9875                 err |= BACKREF_MISSING;
9876                 error(
9877                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9878                         chunk_key.objectid, chunk_end, i);
9879                 continue;
9880         }
9881         btrfs_release_path(&path);
9882 out:
9883         return err;
9884 }
9885
9886 /*
9887  * Main entry function to check known items and update related accounting info
9888  */
9889 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9890 {
9891         struct btrfs_fs_info *fs_info = root->fs_info;
9892         struct btrfs_key key;
9893         int slot = 0;
9894         int type;
9895         struct btrfs_extent_data_ref *dref;
9896         int ret;
9897         int err = 0;
9898
9899 next:
9900         btrfs_item_key_to_cpu(eb, &key, slot);
9901         type = key.type;
9902
9903         switch (type) {
9904         case BTRFS_EXTENT_DATA_KEY:
9905                 ret = check_extent_data_item(root, eb, slot);
9906                 err |= ret;
9907                 break;
9908         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9909                 ret = check_block_group_item(fs_info, eb, slot);
9910                 err |= ret;
9911                 break;
9912         case BTRFS_DEV_ITEM_KEY:
9913                 ret = check_dev_item(fs_info, eb, slot);
9914                 err |= ret;
9915                 break;
9916         case BTRFS_CHUNK_ITEM_KEY:
9917                 ret = check_chunk_item(fs_info, eb, slot);
9918                 err |= ret;
9919                 break;
9920         case BTRFS_DEV_EXTENT_KEY:
9921                 ret = check_dev_extent_item(fs_info, eb, slot);
9922                 err |= ret;
9923                 break;
9924         case BTRFS_EXTENT_ITEM_KEY:
9925         case BTRFS_METADATA_ITEM_KEY:
9926                 ret = check_extent_item(fs_info, eb, slot);
9927                 err |= ret;
9928                 break;
9929         case BTRFS_EXTENT_CSUM_KEY:
9930                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9931                 break;
9932         case BTRFS_TREE_BLOCK_REF_KEY:
9933                 ret = check_tree_block_backref(fs_info, key.offset,
9934                                                key.objectid, -1);
9935                 err |= ret;
9936                 break;
9937         case BTRFS_EXTENT_DATA_REF_KEY:
9938                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9939                 ret = check_extent_data_backref(fs_info,
9940                                 btrfs_extent_data_ref_root(eb, dref),
9941                                 btrfs_extent_data_ref_objectid(eb, dref),
9942                                 btrfs_extent_data_ref_offset(eb, dref),
9943                                 key.objectid, 0,
9944                                 btrfs_extent_data_ref_count(eb, dref));
9945                 err |= ret;
9946                 break;
9947         case BTRFS_SHARED_BLOCK_REF_KEY:
9948                 ret = check_shared_block_backref(fs_info, key.offset,
9949                                                  key.objectid, -1);
9950                 err |= ret;
9951                 break;
9952         case BTRFS_SHARED_DATA_REF_KEY:
9953                 ret = check_shared_data_backref(fs_info, key.offset,
9954                                                 key.objectid);
9955                 err |= ret;
9956                 break;
9957         default:
9958                 break;
9959         }
9960
9961         if (++slot < btrfs_header_nritems(eb))
9962                 goto next;
9963
9964         return err;
9965 }
9966
9967 /*
9968  * Helper function for later fs/subvol tree check.  To determine if a tree
9969  * block should be checked.
9970  * This function will ensure only the direct referencer with lowest rootid to
9971  * check a fs/subvolume tree block.
9972  *
9973  * Backref check at extent tree would detect errors like missing subvolume
9974  * tree, so we can do aggressive check to reduce duplicated checks.
9975  */
9976 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9977 {
9978         struct btrfs_root *extent_root = root->fs_info->extent_root;
9979         struct btrfs_key key;
9980         struct btrfs_path path;
9981         struct extent_buffer *leaf;
9982         int slot;
9983         struct btrfs_extent_item *ei;
9984         unsigned long ptr;
9985         unsigned long end;
9986         int type;
9987         u32 item_size;
9988         u64 offset;
9989         struct btrfs_extent_inline_ref *iref;
9990         int ret;
9991
9992         btrfs_init_path(&path);
9993         key.objectid = btrfs_header_bytenr(eb);
9994         key.type = BTRFS_METADATA_ITEM_KEY;
9995         key.offset = (u64)-1;
9996
9997         /*
9998          * Any failure in backref resolving means we can't determine
9999          * whom the tree block belongs to.
10000          * So in that case, we need to check that tree block
10001          */
10002         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10003         if (ret < 0)
10004                 goto need_check;
10005
10006         ret = btrfs_previous_extent_item(extent_root, &path,
10007                                          btrfs_header_bytenr(eb));
10008         if (ret)
10009                 goto need_check;
10010
10011         leaf = path.nodes[0];
10012         slot = path.slots[0];
10013         btrfs_item_key_to_cpu(leaf, &key, slot);
10014         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10015
10016         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10017                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10018         } else {
10019                 struct btrfs_tree_block_info *info;
10020
10021                 info = (struct btrfs_tree_block_info *)(ei + 1);
10022                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10023         }
10024
10025         item_size = btrfs_item_size_nr(leaf, slot);
10026         ptr = (unsigned long)iref;
10027         end = (unsigned long)ei + item_size;
10028         while (ptr < end) {
10029                 iref = (struct btrfs_extent_inline_ref *)ptr;
10030                 type = btrfs_extent_inline_ref_type(leaf, iref);
10031                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10032
10033                 /*
10034                  * We only check the tree block if current root is
10035                  * the lowest referencer of it.
10036                  */
10037                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10038                     offset < root->objectid) {
10039                         btrfs_release_path(&path);
10040                         return 0;
10041                 }
10042
10043                 ptr += btrfs_extent_inline_ref_size(type);
10044         }
10045         /*
10046          * Normally we should also check keyed tree block ref, but that may be
10047          * very time consuming.  Inlined ref should already make us skip a lot
10048          * of refs now.  So skip search keyed tree block ref.
10049          */
10050
10051 need_check:
10052         btrfs_release_path(&path);
10053         return 1;
10054 }
10055
10056 /*
10057  * Traversal function for tree block. We will do:
10058  * 1) Skip shared fs/subvolume tree blocks
10059  * 2) Update related bytes accounting
10060  * 3) Pre-order traversal
10061  */
10062 static int traverse_tree_block(struct btrfs_root *root,
10063                                 struct extent_buffer *node)
10064 {
10065         struct extent_buffer *eb;
10066         struct btrfs_key key;
10067         struct btrfs_key drop_key;
10068         int level;
10069         u64 nr;
10070         int i;
10071         int err = 0;
10072         int ret;
10073
10074         /*
10075          * Skip shared fs/subvolume tree block, in that case they will
10076          * be checked by referencer with lowest rootid
10077          */
10078         if (is_fstree(root->objectid) && !should_check(root, node))
10079                 return 0;
10080
10081         /* Update bytes accounting */
10082         total_btree_bytes += node->len;
10083         if (fs_root_objectid(btrfs_header_owner(node)))
10084                 total_fs_tree_bytes += node->len;
10085         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10086                 total_extent_tree_bytes += node->len;
10087         if (!found_old_backref &&
10088             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10089             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10090             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10091                 found_old_backref = 1;
10092
10093         /* pre-order tranversal, check itself first */
10094         level = btrfs_header_level(node);
10095         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10096                                    btrfs_header_level(node),
10097                                    btrfs_header_owner(node));
10098         err |= ret;
10099         if (err)
10100                 error(
10101         "check %s failed root %llu bytenr %llu level %d, force continue check",
10102                         level ? "node":"leaf", root->objectid,
10103                         btrfs_header_bytenr(node), btrfs_header_level(node));
10104
10105         if (!level) {
10106                 btree_space_waste += btrfs_leaf_free_space(root, node);
10107                 ret = check_leaf_items(root, node);
10108                 err |= ret;
10109                 return err;
10110         }
10111
10112         nr = btrfs_header_nritems(node);
10113         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10114         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10115                 sizeof(struct btrfs_key_ptr);
10116
10117         /* Then check all its children */
10118         for (i = 0; i < nr; i++) {
10119                 u64 blocknr = btrfs_node_blockptr(node, i);
10120
10121                 btrfs_node_key_to_cpu(node, &key, i);
10122                 if (level == root->root_item.drop_level &&
10123                     is_dropped_key(&key, &drop_key))
10124                         continue;
10125
10126                 /*
10127                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10128                  * to call the function itself.
10129                  */
10130                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10131                 if (extent_buffer_uptodate(eb)) {
10132                         ret = traverse_tree_block(root, eb);
10133                         err |= ret;
10134                 }
10135                 free_extent_buffer(eb);
10136         }
10137
10138         return err;
10139 }
10140
10141 /*
10142  * Low memory usage version check_chunks_and_extents.
10143  */
10144 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10145 {
10146         struct btrfs_path path;
10147         struct btrfs_key key;
10148         struct btrfs_root *root1;
10149         struct btrfs_root *cur_root;
10150         int err = 0;
10151         int ret;
10152
10153         root1 = root->fs_info->chunk_root;
10154         ret = traverse_tree_block(root1, root1->node);
10155         err |= ret;
10156
10157         root1 = root->fs_info->tree_root;
10158         ret = traverse_tree_block(root1, root1->node);
10159         err |= ret;
10160
10161         btrfs_init_path(&path);
10162         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10163         key.offset = 0;
10164         key.type = BTRFS_ROOT_ITEM_KEY;
10165
10166         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10167         if (ret) {
10168                 error("cannot find extent treet in tree_root");
10169                 goto out;
10170         }
10171
10172         while (1) {
10173                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10174                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10175                         goto next;
10176                 key.offset = (u64)-1;
10177
10178                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10179                 if (IS_ERR(cur_root) || !cur_root) {
10180                         error("failed to read tree: %lld", key.objectid);
10181                         goto next;
10182                 }
10183
10184                 ret = traverse_tree_block(cur_root, cur_root->node);
10185                 err |= ret;
10186
10187 next:
10188                 ret = btrfs_next_item(root1, &path);
10189                 if (ret)
10190                         goto out;
10191         }
10192
10193 out:
10194         btrfs_release_path(&path);
10195         return err;
10196 }
10197
10198 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10199                            struct btrfs_root *root, int overwrite)
10200 {
10201         struct extent_buffer *c;
10202         struct extent_buffer *old = root->node;
10203         int level;
10204         int ret;
10205         struct btrfs_disk_key disk_key = {0,0,0};
10206
10207         level = 0;
10208
10209         if (overwrite) {
10210                 c = old;
10211                 extent_buffer_get(c);
10212                 goto init;
10213         }
10214         c = btrfs_alloc_free_block(trans, root,
10215                                    root->nodesize,
10216                                    root->root_key.objectid,
10217                                    &disk_key, level, 0, 0);
10218         if (IS_ERR(c)) {
10219                 c = old;
10220                 extent_buffer_get(c);
10221                 overwrite = 1;
10222         }
10223 init:
10224         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10225         btrfs_set_header_level(c, level);
10226         btrfs_set_header_bytenr(c, c->start);
10227         btrfs_set_header_generation(c, trans->transid);
10228         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10229         btrfs_set_header_owner(c, root->root_key.objectid);
10230
10231         write_extent_buffer(c, root->fs_info->fsid,
10232                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10233
10234         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10235                             btrfs_header_chunk_tree_uuid(c),
10236                             BTRFS_UUID_SIZE);
10237
10238         btrfs_mark_buffer_dirty(c);
10239         /*
10240          * this case can happen in the following case:
10241          *
10242          * 1.overwrite previous root.
10243          *
10244          * 2.reinit reloc data root, this is because we skip pin
10245          * down reloc data tree before which means we can allocate
10246          * same block bytenr here.
10247          */
10248         if (old->start == c->start) {
10249                 btrfs_set_root_generation(&root->root_item,
10250                                           trans->transid);
10251                 root->root_item.level = btrfs_header_level(root->node);
10252                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10253                                         &root->root_key, &root->root_item);
10254                 if (ret) {
10255                         free_extent_buffer(c);
10256                         return ret;
10257                 }
10258         }
10259         free_extent_buffer(old);
10260         root->node = c;
10261         add_root_to_dirty_list(root);
10262         return 0;
10263 }
10264
10265 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10266                                 struct extent_buffer *eb, int tree_root)
10267 {
10268         struct extent_buffer *tmp;
10269         struct btrfs_root_item *ri;
10270         struct btrfs_key key;
10271         u64 bytenr;
10272         u32 nodesize;
10273         int level = btrfs_header_level(eb);
10274         int nritems;
10275         int ret;
10276         int i;
10277
10278         /*
10279          * If we have pinned this block before, don't pin it again.
10280          * This can not only avoid forever loop with broken filesystem
10281          * but also give us some speedups.
10282          */
10283         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10284                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10285                 return 0;
10286
10287         btrfs_pin_extent(fs_info, eb->start, eb->len);
10288
10289         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10290         nritems = btrfs_header_nritems(eb);
10291         for (i = 0; i < nritems; i++) {
10292                 if (level == 0) {
10293                         btrfs_item_key_to_cpu(eb, &key, i);
10294                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10295                                 continue;
10296                         /* Skip the extent root and reloc roots */
10297                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10298                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10299                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10300                                 continue;
10301                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10302                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10303
10304                         /*
10305                          * If at any point we start needing the real root we
10306                          * will have to build a stump root for the root we are
10307                          * in, but for now this doesn't actually use the root so
10308                          * just pass in extent_root.
10309                          */
10310                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10311                                               nodesize, 0);
10312                         if (!extent_buffer_uptodate(tmp)) {
10313                                 fprintf(stderr, "Error reading root block\n");
10314                                 return -EIO;
10315                         }
10316                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10317                         free_extent_buffer(tmp);
10318                         if (ret)
10319                                 return ret;
10320                 } else {
10321                         bytenr = btrfs_node_blockptr(eb, i);
10322
10323                         /* If we aren't the tree root don't read the block */
10324                         if (level == 1 && !tree_root) {
10325                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10326                                 continue;
10327                         }
10328
10329                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10330                                               nodesize, 0);
10331                         if (!extent_buffer_uptodate(tmp)) {
10332                                 fprintf(stderr, "Error reading tree block\n");
10333                                 return -EIO;
10334                         }
10335                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10336                         free_extent_buffer(tmp);
10337                         if (ret)
10338                                 return ret;
10339                 }
10340         }
10341
10342         return 0;
10343 }
10344
10345 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10346 {
10347         int ret;
10348
10349         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10350         if (ret)
10351                 return ret;
10352
10353         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10354 }
10355
10356 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10357 {
10358         struct btrfs_block_group_cache *cache;
10359         struct btrfs_path path;
10360         struct extent_buffer *leaf;
10361         struct btrfs_chunk *chunk;
10362         struct btrfs_key key;
10363         int ret;
10364         u64 start;
10365
10366         btrfs_init_path(&path);
10367         key.objectid = 0;
10368         key.type = BTRFS_CHUNK_ITEM_KEY;
10369         key.offset = 0;
10370         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10371         if (ret < 0) {
10372                 btrfs_release_path(&path);
10373                 return ret;
10374         }
10375
10376         /*
10377          * We do this in case the block groups were screwed up and had alloc
10378          * bits that aren't actually set on the chunks.  This happens with
10379          * restored images every time and could happen in real life I guess.
10380          */
10381         fs_info->avail_data_alloc_bits = 0;
10382         fs_info->avail_metadata_alloc_bits = 0;
10383         fs_info->avail_system_alloc_bits = 0;
10384
10385         /* First we need to create the in-memory block groups */
10386         while (1) {
10387                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10388                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10389                         if (ret < 0) {
10390                                 btrfs_release_path(&path);
10391                                 return ret;
10392                         }
10393                         if (ret) {
10394                                 ret = 0;
10395                                 break;
10396                         }
10397                 }
10398                 leaf = path.nodes[0];
10399                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10400                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10401                         path.slots[0]++;
10402                         continue;
10403                 }
10404
10405                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10406                 btrfs_add_block_group(fs_info, 0,
10407                                       btrfs_chunk_type(leaf, chunk),
10408                                       key.objectid, key.offset,
10409                                       btrfs_chunk_length(leaf, chunk));
10410                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10411                                  key.offset + btrfs_chunk_length(leaf, chunk),
10412                                  GFP_NOFS);
10413                 path.slots[0]++;
10414         }
10415         start = 0;
10416         while (1) {
10417                 cache = btrfs_lookup_first_block_group(fs_info, start);
10418                 if (!cache)
10419                         break;
10420                 cache->cached = 1;
10421                 start = cache->key.objectid + cache->key.offset;
10422         }
10423
10424         btrfs_release_path(&path);
10425         return 0;
10426 }
10427
10428 static int reset_balance(struct btrfs_trans_handle *trans,
10429                          struct btrfs_fs_info *fs_info)
10430 {
10431         struct btrfs_root *root = fs_info->tree_root;
10432         struct btrfs_path path;
10433         struct extent_buffer *leaf;
10434         struct btrfs_key key;
10435         int del_slot, del_nr = 0;
10436         int ret;
10437         int found = 0;
10438
10439         btrfs_init_path(&path);
10440         key.objectid = BTRFS_BALANCE_OBJECTID;
10441         key.type = BTRFS_BALANCE_ITEM_KEY;
10442         key.offset = 0;
10443         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10444         if (ret) {
10445                 if (ret > 0)
10446                         ret = 0;
10447                 if (!ret)
10448                         goto reinit_data_reloc;
10449                 else
10450                         goto out;
10451         }
10452
10453         ret = btrfs_del_item(trans, root, &path);
10454         if (ret)
10455                 goto out;
10456         btrfs_release_path(&path);
10457
10458         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10459         key.type = BTRFS_ROOT_ITEM_KEY;
10460         key.offset = 0;
10461         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10462         if (ret < 0)
10463                 goto out;
10464         while (1) {
10465                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10466                         if (!found)
10467                                 break;
10468
10469                         if (del_nr) {
10470                                 ret = btrfs_del_items(trans, root, &path,
10471                                                       del_slot, del_nr);
10472                                 del_nr = 0;
10473                                 if (ret)
10474                                         goto out;
10475                         }
10476                         key.offset++;
10477                         btrfs_release_path(&path);
10478
10479                         found = 0;
10480                         ret = btrfs_search_slot(trans, root, &key, &path,
10481                                                 -1, 1);
10482                         if (ret < 0)
10483                                 goto out;
10484                         continue;
10485                 }
10486                 found = 1;
10487                 leaf = path.nodes[0];
10488                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10489                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10490                         break;
10491                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10492                         path.slots[0]++;
10493                         continue;
10494                 }
10495                 if (!del_nr) {
10496                         del_slot = path.slots[0];
10497                         del_nr = 1;
10498                 } else {
10499                         del_nr++;
10500                 }
10501                 path.slots[0]++;
10502         }
10503
10504         if (del_nr) {
10505                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10506                 if (ret)
10507                         goto out;
10508         }
10509         btrfs_release_path(&path);
10510
10511 reinit_data_reloc:
10512         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10513         key.type = BTRFS_ROOT_ITEM_KEY;
10514         key.offset = (u64)-1;
10515         root = btrfs_read_fs_root(fs_info, &key);
10516         if (IS_ERR(root)) {
10517                 fprintf(stderr, "Error reading data reloc tree\n");
10518                 ret = PTR_ERR(root);
10519                 goto out;
10520         }
10521         record_root_in_trans(trans, root);
10522         ret = btrfs_fsck_reinit_root(trans, root, 0);
10523         if (ret)
10524                 goto out;
10525         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10526 out:
10527         btrfs_release_path(&path);
10528         return ret;
10529 }
10530
10531 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10532                               struct btrfs_fs_info *fs_info)
10533 {
10534         u64 start = 0;
10535         int ret;
10536
10537         /*
10538          * The only reason we don't do this is because right now we're just
10539          * walking the trees we find and pinning down their bytes, we don't look
10540          * at any of the leaves.  In order to do mixed groups we'd have to check
10541          * the leaves of any fs roots and pin down the bytes for any file
10542          * extents we find.  Not hard but why do it if we don't have to?
10543          */
10544         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10545                 fprintf(stderr, "We don't support re-initing the extent tree "
10546                         "for mixed block groups yet, please notify a btrfs "
10547                         "developer you want to do this so they can add this "
10548                         "functionality.\n");
10549                 return -EINVAL;
10550         }
10551
10552         /*
10553          * first we need to walk all of the trees except the extent tree and pin
10554          * down the bytes that are in use so we don't overwrite any existing
10555          * metadata.
10556          */
10557         ret = pin_metadata_blocks(fs_info);
10558         if (ret) {
10559                 fprintf(stderr, "error pinning down used bytes\n");
10560                 return ret;
10561         }
10562
10563         /*
10564          * Need to drop all the block groups since we're going to recreate all
10565          * of them again.
10566          */
10567         btrfs_free_block_groups(fs_info);
10568         ret = reset_block_groups(fs_info);
10569         if (ret) {
10570                 fprintf(stderr, "error resetting the block groups\n");
10571                 return ret;
10572         }
10573
10574         /* Ok we can allocate now, reinit the extent root */
10575         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10576         if (ret) {
10577                 fprintf(stderr, "extent root initialization failed\n");
10578                 /*
10579                  * When the transaction code is updated we should end the
10580                  * transaction, but for now progs only knows about commit so
10581                  * just return an error.
10582                  */
10583                 return ret;
10584         }
10585
10586         /*
10587          * Now we have all the in-memory block groups setup so we can make
10588          * allocations properly, and the metadata we care about is safe since we
10589          * pinned all of it above.
10590          */
10591         while (1) {
10592                 struct btrfs_block_group_cache *cache;
10593
10594                 cache = btrfs_lookup_first_block_group(fs_info, start);
10595                 if (!cache)
10596                         break;
10597                 start = cache->key.objectid + cache->key.offset;
10598                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10599                                         &cache->key, &cache->item,
10600                                         sizeof(cache->item));
10601                 if (ret) {
10602                         fprintf(stderr, "Error adding block group\n");
10603                         return ret;
10604                 }
10605                 btrfs_extent_post_op(trans, fs_info->extent_root);
10606         }
10607
10608         ret = reset_balance(trans, fs_info);
10609         if (ret)
10610                 fprintf(stderr, "error resetting the pending balance\n");
10611
10612         return ret;
10613 }
10614
10615 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10616 {
10617         struct btrfs_path path;
10618         struct btrfs_trans_handle *trans;
10619         struct btrfs_key key;
10620         int ret;
10621
10622         printf("Recowing metadata block %llu\n", eb->start);
10623         key.objectid = btrfs_header_owner(eb);
10624         key.type = BTRFS_ROOT_ITEM_KEY;
10625         key.offset = (u64)-1;
10626
10627         root = btrfs_read_fs_root(root->fs_info, &key);
10628         if (IS_ERR(root)) {
10629                 fprintf(stderr, "Couldn't find owner root %llu\n",
10630                         key.objectid);
10631                 return PTR_ERR(root);
10632         }
10633
10634         trans = btrfs_start_transaction(root, 1);
10635         if (IS_ERR(trans))
10636                 return PTR_ERR(trans);
10637
10638         btrfs_init_path(&path);
10639         path.lowest_level = btrfs_header_level(eb);
10640         if (path.lowest_level)
10641                 btrfs_node_key_to_cpu(eb, &key, 0);
10642         else
10643                 btrfs_item_key_to_cpu(eb, &key, 0);
10644
10645         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10646         btrfs_commit_transaction(trans, root);
10647         btrfs_release_path(&path);
10648         return ret;
10649 }
10650
10651 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10652 {
10653         struct btrfs_path path;
10654         struct btrfs_trans_handle *trans;
10655         struct btrfs_key key;
10656         int ret;
10657
10658         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10659                bad->key.type, bad->key.offset);
10660         key.objectid = bad->root_id;
10661         key.type = BTRFS_ROOT_ITEM_KEY;
10662         key.offset = (u64)-1;
10663
10664         root = btrfs_read_fs_root(root->fs_info, &key);
10665         if (IS_ERR(root)) {
10666                 fprintf(stderr, "Couldn't find owner root %llu\n",
10667                         key.objectid);
10668                 return PTR_ERR(root);
10669         }
10670
10671         trans = btrfs_start_transaction(root, 1);
10672         if (IS_ERR(trans))
10673                 return PTR_ERR(trans);
10674
10675         btrfs_init_path(&path);
10676         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10677         if (ret) {
10678                 if (ret > 0)
10679                         ret = 0;
10680                 goto out;
10681         }
10682         ret = btrfs_del_item(trans, root, &path);
10683 out:
10684         btrfs_commit_transaction(trans, root);
10685         btrfs_release_path(&path);
10686         return ret;
10687 }
10688
10689 static int zero_log_tree(struct btrfs_root *root)
10690 {
10691         struct btrfs_trans_handle *trans;
10692         int ret;
10693
10694         trans = btrfs_start_transaction(root, 1);
10695         if (IS_ERR(trans)) {
10696                 ret = PTR_ERR(trans);
10697                 return ret;
10698         }
10699         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10700         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10701         ret = btrfs_commit_transaction(trans, root);
10702         return ret;
10703 }
10704
10705 static int populate_csum(struct btrfs_trans_handle *trans,
10706                          struct btrfs_root *csum_root, char *buf, u64 start,
10707                          u64 len)
10708 {
10709         u64 offset = 0;
10710         u64 sectorsize;
10711         int ret = 0;
10712
10713         while (offset < len) {
10714                 sectorsize = csum_root->sectorsize;
10715                 ret = read_extent_data(csum_root, buf, start + offset,
10716                                        &sectorsize, 0);
10717                 if (ret)
10718                         break;
10719                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10720                                             start + offset, buf, sectorsize);
10721                 if (ret)
10722                         break;
10723                 offset += sectorsize;
10724         }
10725         return ret;
10726 }
10727
10728 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10729                                       struct btrfs_root *csum_root,
10730                                       struct btrfs_root *cur_root)
10731 {
10732         struct btrfs_path path;
10733         struct btrfs_key key;
10734         struct extent_buffer *node;
10735         struct btrfs_file_extent_item *fi;
10736         char *buf = NULL;
10737         u64 start = 0;
10738         u64 len = 0;
10739         int slot = 0;
10740         int ret = 0;
10741
10742         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10743         if (!buf)
10744                 return -ENOMEM;
10745
10746         btrfs_init_path(&path);
10747         key.objectid = 0;
10748         key.offset = 0;
10749         key.type = 0;
10750         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
10751         if (ret < 0)
10752                 goto out;
10753         /* Iterate all regular file extents and fill its csum */
10754         while (1) {
10755                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10756
10757                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10758                         goto next;
10759                 node = path.nodes[0];
10760                 slot = path.slots[0];
10761                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10762                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10763                         goto next;
10764                 start = btrfs_file_extent_disk_bytenr(node, fi);
10765                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10766
10767                 ret = populate_csum(trans, csum_root, buf, start, len);
10768                 if (ret == -EEXIST)
10769                         ret = 0;
10770                 if (ret < 0)
10771                         goto out;
10772 next:
10773                 /*
10774                  * TODO: if next leaf is corrupted, jump to nearest next valid
10775                  * leaf.
10776                  */
10777                 ret = btrfs_next_item(cur_root, &path);
10778                 if (ret < 0)
10779                         goto out;
10780                 if (ret > 0) {
10781                         ret = 0;
10782                         goto out;
10783                 }
10784         }
10785
10786 out:
10787         btrfs_release_path(&path);
10788         free(buf);
10789         return ret;
10790 }
10791
10792 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10793                                   struct btrfs_root *csum_root)
10794 {
10795         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10796         struct btrfs_path path;
10797         struct btrfs_root *tree_root = fs_info->tree_root;
10798         struct btrfs_root *cur_root;
10799         struct extent_buffer *node;
10800         struct btrfs_key key;
10801         int slot = 0;
10802         int ret = 0;
10803
10804         btrfs_init_path(&path);
10805         key.objectid = BTRFS_FS_TREE_OBJECTID;
10806         key.offset = 0;
10807         key.type = BTRFS_ROOT_ITEM_KEY;
10808         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
10809         if (ret < 0)
10810                 goto out;
10811         if (ret > 0) {
10812                 ret = -ENOENT;
10813                 goto out;
10814         }
10815
10816         while (1) {
10817                 node = path.nodes[0];
10818                 slot = path.slots[0];
10819                 btrfs_item_key_to_cpu(node, &key, slot);
10820                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10821                         goto out;
10822                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10823                         goto next;
10824                 if (!is_fstree(key.objectid))
10825                         goto next;
10826                 key.offset = (u64)-1;
10827
10828                 cur_root = btrfs_read_fs_root(fs_info, &key);
10829                 if (IS_ERR(cur_root) || !cur_root) {
10830                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10831                                 key.objectid);
10832                         goto out;
10833                 }
10834                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10835                                 cur_root);
10836                 if (ret < 0)
10837                         goto out;
10838 next:
10839                 ret = btrfs_next_item(tree_root, &path);
10840                 if (ret > 0) {
10841                         ret = 0;
10842                         goto out;
10843                 }
10844                 if (ret < 0)
10845                         goto out;
10846         }
10847
10848 out:
10849         btrfs_release_path(&path);
10850         return ret;
10851 }
10852
10853 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10854                                       struct btrfs_root *csum_root)
10855 {
10856         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10857         struct btrfs_path path;
10858         struct btrfs_extent_item *ei;
10859         struct extent_buffer *leaf;
10860         char *buf;
10861         struct btrfs_key key;
10862         int ret;
10863
10864         btrfs_init_path(&path);
10865         key.objectid = 0;
10866         key.type = BTRFS_EXTENT_ITEM_KEY;
10867         key.offset = 0;
10868         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10869         if (ret < 0) {
10870                 btrfs_release_path(&path);
10871                 return ret;
10872         }
10873
10874         buf = malloc(csum_root->sectorsize);
10875         if (!buf) {
10876                 btrfs_release_path(&path);
10877                 return -ENOMEM;
10878         }
10879
10880         while (1) {
10881                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10882                         ret = btrfs_next_leaf(extent_root, &path);
10883                         if (ret < 0)
10884                                 break;
10885                         if (ret) {
10886                                 ret = 0;
10887                                 break;
10888                         }
10889                 }
10890                 leaf = path.nodes[0];
10891
10892                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10893                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10894                         path.slots[0]++;
10895                         continue;
10896                 }
10897
10898                 ei = btrfs_item_ptr(leaf, path.slots[0],
10899                                     struct btrfs_extent_item);
10900                 if (!(btrfs_extent_flags(leaf, ei) &
10901                       BTRFS_EXTENT_FLAG_DATA)) {
10902                         path.slots[0]++;
10903                         continue;
10904                 }
10905
10906                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10907                                     key.offset);
10908                 if (ret)
10909                         break;
10910                 path.slots[0]++;
10911         }
10912
10913         btrfs_release_path(&path);
10914         free(buf);
10915         return ret;
10916 }
10917
10918 /*
10919  * Recalculate the csum and put it into the csum tree.
10920  *
10921  * Extent tree init will wipe out all the extent info, so in that case, we
10922  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10923  * will use fs/subvol trees to init the csum tree.
10924  */
10925 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10926                           struct btrfs_root *csum_root,
10927                           int search_fs_tree)
10928 {
10929         if (search_fs_tree)
10930                 return fill_csum_tree_from_fs(trans, csum_root);
10931         else
10932                 return fill_csum_tree_from_extent(trans, csum_root);
10933 }
10934
10935 static void free_roots_info_cache(void)
10936 {
10937         if (!roots_info_cache)
10938                 return;
10939
10940         while (!cache_tree_empty(roots_info_cache)) {
10941                 struct cache_extent *entry;
10942                 struct root_item_info *rii;
10943
10944                 entry = first_cache_extent(roots_info_cache);
10945                 if (!entry)
10946                         break;
10947                 remove_cache_extent(roots_info_cache, entry);
10948                 rii = container_of(entry, struct root_item_info, cache_extent);
10949                 free(rii);
10950         }
10951
10952         free(roots_info_cache);
10953         roots_info_cache = NULL;
10954 }
10955
10956 static int build_roots_info_cache(struct btrfs_fs_info *info)
10957 {
10958         int ret = 0;
10959         struct btrfs_key key;
10960         struct extent_buffer *leaf;
10961         struct btrfs_path path;
10962
10963         if (!roots_info_cache) {
10964                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10965                 if (!roots_info_cache)
10966                         return -ENOMEM;
10967                 cache_tree_init(roots_info_cache);
10968         }
10969
10970         btrfs_init_path(&path);
10971         key.objectid = 0;
10972         key.type = BTRFS_EXTENT_ITEM_KEY;
10973         key.offset = 0;
10974         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
10975         if (ret < 0)
10976                 goto out;
10977         leaf = path.nodes[0];
10978
10979         while (1) {
10980                 struct btrfs_key found_key;
10981                 struct btrfs_extent_item *ei;
10982                 struct btrfs_extent_inline_ref *iref;
10983                 int slot = path.slots[0];
10984                 int type;
10985                 u64 flags;
10986                 u64 root_id;
10987                 u8 level;
10988                 struct cache_extent *entry;
10989                 struct root_item_info *rii;
10990
10991                 if (slot >= btrfs_header_nritems(leaf)) {
10992                         ret = btrfs_next_leaf(info->extent_root, &path);
10993                         if (ret < 0) {
10994                                 break;
10995                         } else if (ret) {
10996                                 ret = 0;
10997                                 break;
10998                         }
10999                         leaf = path.nodes[0];
11000                         slot = path.slots[0];
11001                 }
11002
11003                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11004
11005                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11006                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11007                         goto next;
11008
11009                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11010                 flags = btrfs_extent_flags(leaf, ei);
11011
11012                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11013                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11014                         goto next;
11015
11016                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11017                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11018                         level = found_key.offset;
11019                 } else {
11020                         struct btrfs_tree_block_info *binfo;
11021
11022                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11023                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11024                         level = btrfs_tree_block_level(leaf, binfo);
11025                 }
11026
11027                 /*
11028                  * For a root extent, it must be of the following type and the
11029                  * first (and only one) iref in the item.
11030                  */
11031                 type = btrfs_extent_inline_ref_type(leaf, iref);
11032                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11033                         goto next;
11034
11035                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11036                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11037                 if (!entry) {
11038                         rii = malloc(sizeof(struct root_item_info));
11039                         if (!rii) {
11040                                 ret = -ENOMEM;
11041                                 goto out;
11042                         }
11043                         rii->cache_extent.start = root_id;
11044                         rii->cache_extent.size = 1;
11045                         rii->level = (u8)-1;
11046                         entry = &rii->cache_extent;
11047                         ret = insert_cache_extent(roots_info_cache, entry);
11048                         ASSERT(ret == 0);
11049                 } else {
11050                         rii = container_of(entry, struct root_item_info,
11051                                            cache_extent);
11052                 }
11053
11054                 ASSERT(rii->cache_extent.start == root_id);
11055                 ASSERT(rii->cache_extent.size == 1);
11056
11057                 if (level > rii->level || rii->level == (u8)-1) {
11058                         rii->level = level;
11059                         rii->bytenr = found_key.objectid;
11060                         rii->gen = btrfs_extent_generation(leaf, ei);
11061                         rii->node_count = 1;
11062                 } else if (level == rii->level) {
11063                         rii->node_count++;
11064                 }
11065 next:
11066                 path.slots[0]++;
11067         }
11068
11069 out:
11070         btrfs_release_path(&path);
11071
11072         return ret;
11073 }
11074
11075 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11076                                   struct btrfs_path *path,
11077                                   const struct btrfs_key *root_key,
11078                                   const int read_only_mode)
11079 {
11080         const u64 root_id = root_key->objectid;
11081         struct cache_extent *entry;
11082         struct root_item_info *rii;
11083         struct btrfs_root_item ri;
11084         unsigned long offset;
11085
11086         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11087         if (!entry) {
11088                 fprintf(stderr,
11089                         "Error: could not find extent items for root %llu\n",
11090                         root_key->objectid);
11091                 return -ENOENT;
11092         }
11093
11094         rii = container_of(entry, struct root_item_info, cache_extent);
11095         ASSERT(rii->cache_extent.start == root_id);
11096         ASSERT(rii->cache_extent.size == 1);
11097
11098         if (rii->node_count != 1) {
11099                 fprintf(stderr,
11100                         "Error: could not find btree root extent for root %llu\n",
11101                         root_id);
11102                 return -ENOENT;
11103         }
11104
11105         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11106         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11107
11108         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11109             btrfs_root_level(&ri) != rii->level ||
11110             btrfs_root_generation(&ri) != rii->gen) {
11111
11112                 /*
11113                  * If we're in repair mode but our caller told us to not update
11114                  * the root item, i.e. just check if it needs to be updated, don't
11115                  * print this message, since the caller will call us again shortly
11116                  * for the same root item without read only mode (the caller will
11117                  * open a transaction first).
11118                  */
11119                 if (!(read_only_mode && repair))
11120                         fprintf(stderr,
11121                                 "%sroot item for root %llu,"
11122                                 " current bytenr %llu, current gen %llu, current level %u,"
11123                                 " new bytenr %llu, new gen %llu, new level %u\n",
11124                                 (read_only_mode ? "" : "fixing "),
11125                                 root_id,
11126                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11127                                 btrfs_root_level(&ri),
11128                                 rii->bytenr, rii->gen, rii->level);
11129
11130                 if (btrfs_root_generation(&ri) > rii->gen) {
11131                         fprintf(stderr,
11132                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11133                                 root_id, btrfs_root_generation(&ri), rii->gen);
11134                         return -EINVAL;
11135                 }
11136
11137                 if (!read_only_mode) {
11138                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11139                         btrfs_set_root_level(&ri, rii->level);
11140                         btrfs_set_root_generation(&ri, rii->gen);
11141                         write_extent_buffer(path->nodes[0], &ri,
11142                                             offset, sizeof(ri));
11143                 }
11144
11145                 return 1;
11146         }
11147
11148         return 0;
11149 }
11150
11151 /*
11152  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11153  * caused read-only snapshots to be corrupted if they were created at a moment
11154  * when the source subvolume/snapshot had orphan items. The issue was that the
11155  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11156  * node instead of the post orphan cleanup root node.
11157  * So this function, and its callees, just detects and fixes those cases. Even
11158  * though the regression was for read-only snapshots, this function applies to
11159  * any snapshot/subvolume root.
11160  * This must be run before any other repair code - not doing it so, makes other
11161  * repair code delete or modify backrefs in the extent tree for example, which
11162  * will result in an inconsistent fs after repairing the root items.
11163  */
11164 static int repair_root_items(struct btrfs_fs_info *info)
11165 {
11166         struct btrfs_path path;
11167         struct btrfs_key key;
11168         struct extent_buffer *leaf;
11169         struct btrfs_trans_handle *trans = NULL;
11170         int ret = 0;
11171         int bad_roots = 0;
11172         int need_trans = 0;
11173
11174         btrfs_init_path(&path);
11175
11176         ret = build_roots_info_cache(info);
11177         if (ret)
11178                 goto out;
11179
11180         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11181         key.type = BTRFS_ROOT_ITEM_KEY;
11182         key.offset = 0;
11183
11184 again:
11185         /*
11186          * Avoid opening and committing transactions if a leaf doesn't have
11187          * any root items that need to be fixed, so that we avoid rotating
11188          * backup roots unnecessarily.
11189          */
11190         if (need_trans) {
11191                 trans = btrfs_start_transaction(info->tree_root, 1);
11192                 if (IS_ERR(trans)) {
11193                         ret = PTR_ERR(trans);
11194                         goto out;
11195                 }
11196         }
11197
11198         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11199                                 0, trans ? 1 : 0);
11200         if (ret < 0)
11201                 goto out;
11202         leaf = path.nodes[0];
11203
11204         while (1) {
11205                 struct btrfs_key found_key;
11206
11207                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11208                         int no_more_keys = find_next_key(&path, &key);
11209
11210                         btrfs_release_path(&path);
11211                         if (trans) {
11212                                 ret = btrfs_commit_transaction(trans,
11213                                                                info->tree_root);
11214                                 trans = NULL;
11215                                 if (ret < 0)
11216                                         goto out;
11217                         }
11218                         need_trans = 0;
11219                         if (no_more_keys)
11220                                 break;
11221                         goto again;
11222                 }
11223
11224                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11225
11226                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11227                         goto next;
11228                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11229                         goto next;
11230
11231                 ret = maybe_repair_root_item(info, &path, &found_key,
11232                                              trans ? 0 : 1);
11233                 if (ret < 0)
11234                         goto out;
11235                 if (ret) {
11236                         if (!trans && repair) {
11237                                 need_trans = 1;
11238                                 key = found_key;
11239                                 btrfs_release_path(&path);
11240                                 goto again;
11241                         }
11242                         bad_roots++;
11243                 }
11244 next:
11245                 path.slots[0]++;
11246         }
11247         ret = 0;
11248 out:
11249         free_roots_info_cache();
11250         btrfs_release_path(&path);
11251         if (trans)
11252                 btrfs_commit_transaction(trans, info->tree_root);
11253         if (ret < 0)
11254                 return ret;
11255
11256         return bad_roots;
11257 }
11258
11259 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11260 {
11261         struct btrfs_trans_handle *trans;
11262         struct btrfs_block_group_cache *bg_cache;
11263         u64 current = 0;
11264         int ret = 0;
11265
11266         /* Clear all free space cache inodes and its extent data */
11267         while (1) {
11268                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11269                 if (!bg_cache)
11270                         break;
11271                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11272                 if (ret < 0)
11273                         return ret;
11274                 current = bg_cache->key.objectid + bg_cache->key.offset;
11275         }
11276
11277         /* Don't forget to set cache_generation to -1 */
11278         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11279         if (IS_ERR(trans)) {
11280                 error("failed to update super block cache generation");
11281                 return PTR_ERR(trans);
11282         }
11283         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11284         btrfs_commit_transaction(trans, fs_info->tree_root);
11285
11286         return ret;
11287 }
11288
11289 const char * const cmd_check_usage[] = {
11290         "btrfs check [options] <device>",
11291         "Check structural integrity of a filesystem (unmounted).",
11292         "Check structural integrity of an unmounted filesystem. Verify internal",
11293         "trees' consistency and item connectivity. In the repair mode try to",
11294         "fix the problems found. ",
11295         "WARNING: the repair mode is considered dangerous",
11296         "",
11297         "-s|--super <superblock>     use this superblock copy",
11298         "-b|--backup                 use the first valid backup root copy",
11299         "--repair                    try to repair the filesystem",
11300         "--readonly                  run in read-only mode (default)",
11301         "--init-csum-tree            create a new CRC tree",
11302         "--init-extent-tree          create a new extent tree",
11303         "--mode <MODE>               allows choice of memory/IO trade-offs",
11304         "                            where MODE is one of:",
11305         "                            original - read inodes and extents to memory (requires",
11306         "                                       more memory, does less IO)",
11307         "                            lowmem   - try to use less memory but read blocks again",
11308         "                                       when needed",
11309         "--check-data-csum           verify checksums of data blocks",
11310         "-Q|--qgroup-report          print a report on qgroup consistency",
11311         "-E|--subvol-extents <subvolid>",
11312         "                            print subvolume extents and sharing state",
11313         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11314         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11315         "-p|--progress               indicate progress",
11316         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11317         NULL
11318 };
11319
11320 int cmd_check(int argc, char **argv)
11321 {
11322         struct cache_tree root_cache;
11323         struct btrfs_root *root;
11324         struct btrfs_fs_info *info;
11325         u64 bytenr = 0;
11326         u64 subvolid = 0;
11327         u64 tree_root_bytenr = 0;
11328         u64 chunk_root_bytenr = 0;
11329         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11330         int ret;
11331         u64 num;
11332         int init_csum_tree = 0;
11333         int readonly = 0;
11334         int clear_space_cache = 0;
11335         int qgroup_report = 0;
11336         int qgroups_repaired = 0;
11337         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11338
11339         while(1) {
11340                 int c;
11341                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11342                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11343                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11344                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11345                 static const struct option long_options[] = {
11346                         { "super", required_argument, NULL, 's' },
11347                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11348                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11349                         { "init-csum-tree", no_argument, NULL,
11350                                 GETOPT_VAL_INIT_CSUM },
11351                         { "init-extent-tree", no_argument, NULL,
11352                                 GETOPT_VAL_INIT_EXTENT },
11353                         { "check-data-csum", no_argument, NULL,
11354                                 GETOPT_VAL_CHECK_CSUM },
11355                         { "backup", no_argument, NULL, 'b' },
11356                         { "subvol-extents", required_argument, NULL, 'E' },
11357                         { "qgroup-report", no_argument, NULL, 'Q' },
11358                         { "tree-root", required_argument, NULL, 'r' },
11359                         { "chunk-root", required_argument, NULL,
11360                                 GETOPT_VAL_CHUNK_TREE },
11361                         { "progress", no_argument, NULL, 'p' },
11362                         { "mode", required_argument, NULL,
11363                                 GETOPT_VAL_MODE },
11364                         { "clear-space-cache", required_argument, NULL,
11365                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11366                         { NULL, 0, NULL, 0}
11367                 };
11368
11369                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11370                 if (c < 0)
11371                         break;
11372                 switch(c) {
11373                         case 'a': /* ignored */ break;
11374                         case 'b':
11375                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11376                                 break;
11377                         case 's':
11378                                 num = arg_strtou64(optarg);
11379                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11380                                         error(
11381                                         "super mirror should be less than %d",
11382                                                 BTRFS_SUPER_MIRROR_MAX);
11383                                         exit(1);
11384                                 }
11385                                 bytenr = btrfs_sb_offset(((int)num));
11386                                 printf("using SB copy %llu, bytenr %llu\n", num,
11387                                        (unsigned long long)bytenr);
11388                                 break;
11389                         case 'Q':
11390                                 qgroup_report = 1;
11391                                 break;
11392                         case 'E':
11393                                 subvolid = arg_strtou64(optarg);
11394                                 break;
11395                         case 'r':
11396                                 tree_root_bytenr = arg_strtou64(optarg);
11397                                 break;
11398                         case GETOPT_VAL_CHUNK_TREE:
11399                                 chunk_root_bytenr = arg_strtou64(optarg);
11400                                 break;
11401                         case 'p':
11402                                 ctx.progress_enabled = true;
11403                                 break;
11404                         case '?':
11405                         case 'h':
11406                                 usage(cmd_check_usage);
11407                         case GETOPT_VAL_REPAIR:
11408                                 printf("enabling repair mode\n");
11409                                 repair = 1;
11410                                 ctree_flags |= OPEN_CTREE_WRITES;
11411                                 break;
11412                         case GETOPT_VAL_READONLY:
11413                                 readonly = 1;
11414                                 break;
11415                         case GETOPT_VAL_INIT_CSUM:
11416                                 printf("Creating a new CRC tree\n");
11417                                 init_csum_tree = 1;
11418                                 repair = 1;
11419                                 ctree_flags |= OPEN_CTREE_WRITES;
11420                                 break;
11421                         case GETOPT_VAL_INIT_EXTENT:
11422                                 init_extent_tree = 1;
11423                                 ctree_flags |= (OPEN_CTREE_WRITES |
11424                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11425                                 repair = 1;
11426                                 break;
11427                         case GETOPT_VAL_CHECK_CSUM:
11428                                 check_data_csum = 1;
11429                                 break;
11430                         case GETOPT_VAL_MODE:
11431                                 check_mode = parse_check_mode(optarg);
11432                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11433                                         error("unknown mode: %s", optarg);
11434                                         exit(1);
11435                                 }
11436                                 break;
11437                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11438                                 if (strcmp(optarg, "v1") == 0) {
11439                                         clear_space_cache = 1;
11440                                 } else if (strcmp(optarg, "v2") == 0) {
11441                                         clear_space_cache = 2;
11442                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11443                                 } else {
11444                                         error(
11445                 "invalid argument to --clear-space-cache, must be v1 or v2");
11446                                         exit(1);
11447                                 }
11448                                 ctree_flags |= OPEN_CTREE_WRITES;
11449                                 break;
11450                 }
11451         }
11452
11453         if (check_argc_exact(argc - optind, 1))
11454                 usage(cmd_check_usage);
11455
11456         if (ctx.progress_enabled) {
11457                 ctx.tp = TASK_NOTHING;
11458                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11459         }
11460
11461         /* This check is the only reason for --readonly to exist */
11462         if (readonly && repair) {
11463                 error("repair options are not compatible with --readonly");
11464                 exit(1);
11465         }
11466
11467         /*
11468          * Not supported yet
11469          */
11470         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11471                 error("low memory mode doesn't support repair yet");
11472                 exit(1);
11473         }
11474
11475         radix_tree_init();
11476         cache_tree_init(&root_cache);
11477
11478         if((ret = check_mounted(argv[optind])) < 0) {
11479                 error("could not check mount status: %s", strerror(-ret));
11480                 goto err_out;
11481         } else if(ret) {
11482                 error("%s is currently mounted, aborting", argv[optind]);
11483                 ret = -EBUSY;
11484                 goto err_out;
11485         }
11486
11487         /* only allow partial opening under repair mode */
11488         if (repair)
11489                 ctree_flags |= OPEN_CTREE_PARTIAL;
11490
11491         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11492                                   chunk_root_bytenr, ctree_flags);
11493         if (!info) {
11494                 error("cannot open file system");
11495                 ret = -EIO;
11496                 goto err_out;
11497         }
11498
11499         global_info = info;
11500         root = info->fs_root;
11501         if (clear_space_cache == 1) {
11502                 if (btrfs_fs_compat_ro(info,
11503                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11504                         error(
11505                 "free space cache v2 detected, use --clear-space-cache v2");
11506                         ret = 1;
11507                         goto close_out;
11508                 }
11509                 printf("Clearing free space cache\n");
11510                 ret = clear_free_space_cache(info);
11511                 if (ret) {
11512                         error("failed to clear free space cache");
11513                         ret = 1;
11514                 } else {
11515                         printf("Free space cache cleared\n");
11516                 }
11517                 goto close_out;
11518         } else if (clear_space_cache == 2) {
11519                 if (!btrfs_fs_compat_ro(info,
11520                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11521                         printf("no free space cache v2 to clear\n");
11522                         ret = 0;
11523                         goto close_out;
11524                 }
11525                 printf("Clear free space cache v2\n");
11526                 ret = btrfs_clear_free_space_tree(info);
11527                 if (ret) {
11528                         error("failed to clear free space cache v2: %d", ret);
11529                         ret = 1;
11530                 } else {
11531                         printf("free space cache v2 cleared\n");
11532                 }
11533                 goto close_out;
11534         }
11535
11536         /*
11537          * repair mode will force us to commit transaction which
11538          * will make us fail to load log tree when mounting.
11539          */
11540         if (repair && btrfs_super_log_root(info->super_copy)) {
11541                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11542                 if (!ret) {
11543                         ret = 1;
11544                         goto close_out;
11545                 }
11546                 ret = zero_log_tree(root);
11547                 if (ret) {
11548                         error("failed to zero log tree: %d", ret);
11549                         goto close_out;
11550                 }
11551         }
11552
11553         uuid_unparse(info->super_copy->fsid, uuidbuf);
11554         if (qgroup_report) {
11555                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11556                        uuidbuf);
11557                 ret = qgroup_verify_all(info);
11558                 if (ret == 0)
11559                         report_qgroups(1);
11560                 goto close_out;
11561         }
11562         if (subvolid) {
11563                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11564                        subvolid, argv[optind], uuidbuf);
11565                 ret = print_extent_state(info, subvolid);
11566                 goto close_out;
11567         }
11568         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11569
11570         if (!extent_buffer_uptodate(info->tree_root->node) ||
11571             !extent_buffer_uptodate(info->dev_root->node) ||
11572             !extent_buffer_uptodate(info->chunk_root->node)) {
11573                 error("critical roots corrupted, unable to check the filesystem");
11574                 ret = -EIO;
11575                 goto close_out;
11576         }
11577
11578         if (init_extent_tree || init_csum_tree) {
11579                 struct btrfs_trans_handle *trans;
11580
11581                 trans = btrfs_start_transaction(info->extent_root, 0);
11582                 if (IS_ERR(trans)) {
11583                         error("error starting transaction");
11584                         ret = PTR_ERR(trans);
11585                         goto close_out;
11586                 }
11587
11588                 if (init_extent_tree) {
11589                         printf("Creating a new extent tree\n");
11590                         ret = reinit_extent_tree(trans, info);
11591                         if (ret)
11592                                 goto close_out;
11593                 }
11594
11595                 if (init_csum_tree) {
11596                         printf("Reinitialize checksum tree\n");
11597                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11598                         if (ret) {
11599                                 error("checksum tree initialization failed: %d",
11600                                                 ret);
11601                                 ret = -EIO;
11602                                 goto close_out;
11603                         }
11604
11605                         ret = fill_csum_tree(trans, info->csum_root,
11606                                              init_extent_tree);
11607                         if (ret) {
11608                                 error("checksum tree refilling failed: %d", ret);
11609                                 return -EIO;
11610                         }
11611                 }
11612                 /*
11613                  * Ok now we commit and run the normal fsck, which will add
11614                  * extent entries for all of the items it finds.
11615                  */
11616                 ret = btrfs_commit_transaction(trans, info->extent_root);
11617                 if (ret)
11618                         goto close_out;
11619         }
11620         if (!extent_buffer_uptodate(info->extent_root->node)) {
11621                 error("critical: extent_root, unable to check the filesystem");
11622                 ret = -EIO;
11623                 goto close_out;
11624         }
11625         if (!extent_buffer_uptodate(info->csum_root->node)) {
11626                 error("critical: csum_root, unable to check the filesystem");
11627                 ret = -EIO;
11628                 goto close_out;
11629         }
11630
11631         if (!ctx.progress_enabled)
11632                 fprintf(stderr, "checking extents\n");
11633         if (check_mode == CHECK_MODE_LOWMEM)
11634                 ret = check_chunks_and_extents_v2(root);
11635         else
11636                 ret = check_chunks_and_extents(root);
11637         if (ret)
11638                 error(
11639                 "errors found in extent allocation tree or chunk allocation");
11640
11641         ret = repair_root_items(info);
11642         if (ret < 0)
11643                 goto close_out;
11644         if (repair) {
11645                 fprintf(stderr, "Fixed %d roots.\n", ret);
11646                 ret = 0;
11647         } else if (ret > 0) {
11648                 fprintf(stderr,
11649                        "Found %d roots with an outdated root item.\n",
11650                        ret);
11651                 fprintf(stderr,
11652                         "Please run a filesystem check with the option --repair to fix them.\n");
11653                 ret = 1;
11654                 goto close_out;
11655         }
11656
11657         if (!ctx.progress_enabled) {
11658                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11659                         fprintf(stderr, "checking free space tree\n");
11660                 else
11661                         fprintf(stderr, "checking free space cache\n");
11662         }
11663         ret = check_space_cache(root);
11664         if (ret)
11665                 goto out;
11666
11667         /*
11668          * We used to have to have these hole extents in between our real
11669          * extents so if we don't have this flag set we need to make sure there
11670          * are no gaps in the file extents for inodes, otherwise we can just
11671          * ignore it when this happens.
11672          */
11673         no_holes = btrfs_fs_incompat(root->fs_info,
11674                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11675         if (!ctx.progress_enabled)
11676                 fprintf(stderr, "checking fs roots\n");
11677         ret = check_fs_roots(root, &root_cache);
11678         if (ret)
11679                 goto out;
11680
11681         fprintf(stderr, "checking csums\n");
11682         ret = check_csums(root);
11683         if (ret)
11684                 goto out;
11685
11686         fprintf(stderr, "checking root refs\n");
11687         ret = check_root_refs(root, &root_cache);
11688         if (ret)
11689                 goto out;
11690
11691         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11692                 struct extent_buffer *eb;
11693
11694                 eb = list_first_entry(&root->fs_info->recow_ebs,
11695                                       struct extent_buffer, recow);
11696                 list_del_init(&eb->recow);
11697                 ret = recow_extent_buffer(root, eb);
11698                 if (ret)
11699                         break;
11700         }
11701
11702         while (!list_empty(&delete_items)) {
11703                 struct bad_item *bad;
11704
11705                 bad = list_first_entry(&delete_items, struct bad_item, list);
11706                 list_del_init(&bad->list);
11707                 if (repair)
11708                         ret = delete_bad_item(root, bad);
11709                 free(bad);
11710         }
11711
11712         if (info->quota_enabled) {
11713                 int err;
11714                 fprintf(stderr, "checking quota groups\n");
11715                 err = qgroup_verify_all(info);
11716                 if (err)
11717                         goto out;
11718                 report_qgroups(0);
11719                 err = repair_qgroups(info, &qgroups_repaired);
11720                 if (err)
11721                         goto out;
11722         }
11723
11724         if (!list_empty(&root->fs_info->recow_ebs)) {
11725                 error("transid errors in file system");
11726                 ret = 1;
11727         }
11728 out:
11729         /* Don't override original ret */
11730         if (!ret && qgroups_repaired)
11731                 ret = qgroups_repaired;
11732
11733         if (found_old_backref) { /*
11734                  * there was a disk format change when mixed
11735                  * backref was in testing tree. The old format
11736                  * existed about one week.
11737                  */
11738                 printf("\n * Found old mixed backref format. "
11739                        "The old format is not supported! *"
11740                        "\n * Please mount the FS in readonly mode, "
11741                        "backup data and re-format the FS. *\n\n");
11742                 ret = 1;
11743         }
11744         printf("found %llu bytes used err is %d\n",
11745                (unsigned long long)bytes_used, ret);
11746         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11747         printf("total tree bytes: %llu\n",
11748                (unsigned long long)total_btree_bytes);
11749         printf("total fs tree bytes: %llu\n",
11750                (unsigned long long)total_fs_tree_bytes);
11751         printf("total extent tree bytes: %llu\n",
11752                (unsigned long long)total_extent_tree_bytes);
11753         printf("btree space waste bytes: %llu\n",
11754                (unsigned long long)btree_space_waste);
11755         printf("file data blocks allocated: %llu\n referenced %llu\n",
11756                 (unsigned long long)data_bytes_allocated,
11757                 (unsigned long long)data_bytes_referenced);
11758
11759         free_qgroup_counts();
11760         free_root_recs_tree(&root_cache);
11761 close_out:
11762         close_ctree(root);
11763 err_out:
11764         if (ctx.progress_enabled)
11765                 task_deinit(ctx.info);
11766
11767         return ret;
11768 }