btrfs-progs: check: use on-stack path buffer in try_to_fix_bad_block
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path path;
2254         int ret = 0;
2255
2256         trans = btrfs_start_transaction(root, 1);
2257         if (IS_ERR(trans))
2258                 return PTR_ERR(trans);
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         btrfs_init_path(&path);
2266         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267                                     backref->name, backref->namelen,
2268                                     backref->index, -1);
2269         if (IS_ERR(di)) {
2270                 ret = PTR_ERR(di);
2271                 btrfs_release_path(&path);
2272                 btrfs_commit_transaction(trans, root);
2273                 if (ret == -ENOENT)
2274                         return 0;
2275                 return ret;
2276         }
2277
2278         if (!di)
2279                 ret = btrfs_del_item(trans, root, &path);
2280         else
2281                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2282         BUG_ON(ret);
2283         btrfs_release_path(&path);
2284         btrfs_commit_transaction(trans, root);
2285         return ret;
2286 }
2287
2288 static int create_inode_item(struct btrfs_root *root,
2289                              struct inode_record *rec,
2290                              struct inode_backref *backref, int root_dir)
2291 {
2292         struct btrfs_trans_handle *trans;
2293         struct btrfs_inode_item inode_item;
2294         time_t now = time(NULL);
2295         int ret;
2296
2297         trans = btrfs_start_transaction(root, 1);
2298         if (IS_ERR(trans)) {
2299                 ret = PTR_ERR(trans);
2300                 return ret;
2301         }
2302
2303         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304                 "be incomplete, please check permissions and content after "
2305                 "the fsck completes.\n", (unsigned long long)root->objectid,
2306                 (unsigned long long)rec->ino);
2307
2308         memset(&inode_item, 0, sizeof(inode_item));
2309         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310         if (root_dir)
2311                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312         else
2313                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315         if (rec->found_dir_item) {
2316                 if (rec->found_file_extent)
2317                         fprintf(stderr, "root %llu inode %llu has both a dir "
2318                                 "item and extents, unsure if it is a dir or a "
2319                                 "regular file so setting it as a directory\n",
2320                                 (unsigned long long)root->objectid,
2321                                 (unsigned long long)rec->ino);
2322                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324         } else if (!rec->found_dir_item) {
2325                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327         }
2328         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336
2337         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338         BUG_ON(ret);
2339         btrfs_commit_transaction(trans, root);
2340         return 0;
2341 }
2342
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344                                  struct inode_record *rec,
2345                                  struct cache_tree *inode_cache,
2346                                  int delete)
2347 {
2348         struct inode_backref *tmp, *backref;
2349         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2350         int ret = 0;
2351         int repaired = 0;
2352
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 if (!delete && rec->ino == root_dirid) {
2355                         if (!rec->found_inode_item) {
2356                                 ret = create_inode_item(root, rec, backref, 1);
2357                                 if (ret)
2358                                         break;
2359                                 repaired++;
2360                         }
2361                 }
2362
2363                 /* Index 0 for root dir's are special, don't mess with it */
2364                 if (rec->ino == root_dirid && backref->index == 0)
2365                         continue;
2366
2367                 if (delete &&
2368                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2369                      (backref->found_dir_index && backref->found_inode_ref &&
2370                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371                         ret = delete_dir_index(root, inode_cache, rec, backref);
2372                         if (ret)
2373                                 break;
2374                         repaired++;
2375                         list_del(&backref->list);
2376                         free(backref);
2377                 }
2378
2379                 if (!delete && !backref->found_dir_index &&
2380                     backref->found_dir_item && backref->found_inode_ref) {
2381                         ret = add_missing_dir_index(root, inode_cache, rec,
2382                                                     backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         if (backref->found_dir_item &&
2387                             backref->found_dir_index &&
2388                             backref->found_dir_index) {
2389                                 if (!backref->errors &&
2390                                     backref->found_inode_ref) {
2391                                         list_del(&backref->list);
2392                                         free(backref);
2393                                 }
2394                         }
2395                 }
2396
2397                 if (!delete && (!backref->found_dir_index &&
2398                                 !backref->found_dir_item &&
2399                                 backref->found_inode_ref)) {
2400                         struct btrfs_trans_handle *trans;
2401                         struct btrfs_key location;
2402
2403                         ret = check_dir_conflict(root, backref->name,
2404                                                  backref->namelen,
2405                                                  backref->dir,
2406                                                  backref->index);
2407                         if (ret) {
2408                                 /*
2409                                  * let nlink fixing routine to handle it,
2410                                  * which can do it better.
2411                                  */
2412                                 ret = 0;
2413                                 break;
2414                         }
2415                         location.objectid = rec->ino;
2416                         location.type = BTRFS_INODE_ITEM_KEY;
2417                         location.offset = 0;
2418
2419                         trans = btrfs_start_transaction(root, 1);
2420                         if (IS_ERR(trans)) {
2421                                 ret = PTR_ERR(trans);
2422                                 break;
2423                         }
2424                         fprintf(stderr, "adding missing dir index/item pair "
2425                                 "for inode %llu\n",
2426                                 (unsigned long long)rec->ino);
2427                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2428                                                     backref->namelen,
2429                                                     backref->dir, &location,
2430                                                     imode_to_type(rec->imode),
2431                                                     backref->index);
2432                         BUG_ON(ret);
2433                         btrfs_commit_transaction(trans, root);
2434                         repaired++;
2435                 }
2436
2437                 if (!delete && (backref->found_inode_ref &&
2438                                 backref->found_dir_index &&
2439                                 backref->found_dir_item &&
2440                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441                                 !rec->found_inode_item)) {
2442                         ret = create_inode_item(root, rec, backref, 0);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                 }
2447
2448         }
2449         return ret ? ret : repaired;
2450 }
2451
2452 /*
2453  * To determine the file type for nlink/inode_item repair
2454  *
2455  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456  * Return -ENOENT if file type is not found.
2457  */
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2459 {
2460         struct inode_backref *backref;
2461
2462         /* For inode item recovered case */
2463         if (rec->found_inode_item) {
2464                 *type = imode_to_type(rec->imode);
2465                 return 0;
2466         }
2467
2468         list_for_each_entry(backref, &rec->backrefs, list) {
2469                 if (backref->found_dir_index || backref->found_dir_item) {
2470                         *type = backref->filetype;
2471                         return 0;
2472                 }
2473         }
2474         return -ENOENT;
2475 }
2476
2477 /*
2478  * To determine the file name for nlink repair
2479  *
2480  * Return 0 if file name is found, set name and namelen.
2481  * Return -ENOENT if file name is not found.
2482  */
2483 static int find_file_name(struct inode_record *rec,
2484                           char *name, int *namelen)
2485 {
2486         struct inode_backref *backref;
2487
2488         list_for_each_entry(backref, &rec->backrefs, list) {
2489                 if (backref->found_dir_index || backref->found_dir_item ||
2490                     backref->found_inode_ref) {
2491                         memcpy(name, backref->name, backref->namelen);
2492                         *namelen = backref->namelen;
2493                         return 0;
2494                 }
2495         }
2496         return -ENOENT;
2497 }
2498
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501                        struct btrfs_root *root,
2502                        struct btrfs_path *path,
2503                        struct inode_record *rec)
2504 {
2505         struct inode_backref *backref;
2506         struct inode_backref *tmp;
2507         struct btrfs_key key;
2508         struct btrfs_inode_item *inode_item;
2509         int ret = 0;
2510
2511         /* We don't believe this either, reset it and iterate backref */
2512         rec->found_link = 0;
2513
2514         /* Remove all backref including the valid ones */
2515         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517                                    backref->index, backref->name,
2518                                    backref->namelen, 0);
2519                 if (ret < 0)
2520                         goto out;
2521
2522                 /* remove invalid backref, so it won't be added back */
2523                 if (!(backref->found_dir_index &&
2524                       backref->found_dir_item &&
2525                       backref->found_inode_ref)) {
2526                         list_del(&backref->list);
2527                         free(backref);
2528                 } else {
2529                         rec->found_link++;
2530                 }
2531         }
2532
2533         /* Set nlink to 0 */
2534         key.objectid = rec->ino;
2535         key.type = BTRFS_INODE_ITEM_KEY;
2536         key.offset = 0;
2537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2538         if (ret < 0)
2539                 goto out;
2540         if (ret > 0) {
2541                 ret = -ENOENT;
2542                 goto out;
2543         }
2544         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545                                     struct btrfs_inode_item);
2546         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547         btrfs_mark_buffer_dirty(path->nodes[0]);
2548         btrfs_release_path(path);
2549
2550         /*
2551          * Add back valid inode_ref/dir_item/dir_index,
2552          * add_link() will handle the nlink inc, so new nlink must be correct
2553          */
2554         list_for_each_entry(backref, &rec->backrefs, list) {
2555                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556                                      backref->name, backref->namelen,
2557                                      backref->filetype, &backref->index, 1);
2558                 if (ret < 0)
2559                         goto out;
2560         }
2561 out:
2562         btrfs_release_path(path);
2563         return ret;
2564 }
2565
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567                                struct btrfs_root *root,
2568                                struct btrfs_path *path,
2569                                struct inode_record *rec)
2570 {
2571         char *dir_name = "lost+found";
2572         char namebuf[BTRFS_NAME_LEN] = {0};
2573         u64 lost_found_ino;
2574         u32 mode = 0700;
2575         u8 type = 0;
2576         int namelen = 0;
2577         int name_recovered = 0;
2578         int type_recovered = 0;
2579         int ret = 0;
2580
2581         /*
2582          * Get file name and type first before these invalid inode ref
2583          * are deleted by remove_all_invalid_backref()
2584          */
2585         name_recovered = !find_file_name(rec, namebuf, &namelen);
2586         type_recovered = !find_file_type(rec, &type);
2587
2588         if (!name_recovered) {
2589                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590                        rec->ino, rec->ino);
2591                 namelen = count_digits(rec->ino);
2592                 sprintf(namebuf, "%llu", rec->ino);
2593                 name_recovered = 1;
2594         }
2595         if (!type_recovered) {
2596                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597                        rec->ino);
2598                 type = BTRFS_FT_REG_FILE;
2599                 type_recovered = 1;
2600         }
2601
2602         ret = reset_nlink(trans, root, path, rec);
2603         if (ret < 0) {
2604                 fprintf(stderr,
2605                         "Failed to reset nlink for inode %llu: %s\n",
2606                         rec->ino, strerror(-ret));
2607                 goto out;
2608         }
2609
2610         if (rec->found_link == 0) {
2611                 lost_found_ino = root->highest_inode;
2612                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2613                         ret = -EOVERFLOW;
2614                         goto out;
2615                 }
2616                 lost_found_ino++;
2617                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2619                                   mode);
2620                 if (ret < 0) {
2621                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622                                 dir_name, strerror(-ret));
2623                         goto out;
2624                 }
2625                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626                                      namebuf, namelen, type, NULL, 1);
2627                 /*
2628                  * Add ".INO" suffix several times to handle case where
2629                  * "FILENAME.INO" is already taken by another file.
2630                  */
2631                 while (ret == -EEXIST) {
2632                         /*
2633                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634                          */
2635                         if (namelen + count_digits(rec->ino) + 1 >
2636                             BTRFS_NAME_LEN) {
2637                                 ret = -EFBIG;
2638                                 goto out;
2639                         }
2640                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641                                  ".%llu", rec->ino);
2642                         namelen += count_digits(rec->ino) + 1;
2643                         ret = btrfs_add_link(trans, root, rec->ino,
2644                                              lost_found_ino, namebuf,
2645                                              namelen, type, NULL, 1);
2646                 }
2647                 if (ret < 0) {
2648                         fprintf(stderr,
2649                                 "Failed to link the inode %llu to %s dir: %s\n",
2650                                 rec->ino, dir_name, strerror(-ret));
2651                         goto out;
2652                 }
2653                 /*
2654                  * Just increase the found_link, don't actually add the
2655                  * backref. This will make things easier and this inode
2656                  * record will be freed after the repair is done.
2657                  * So fsck will not report problem about this inode.
2658                  */
2659                 rec->found_link++;
2660                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661                        namelen, namebuf, dir_name);
2662         }
2663         printf("Fixed the nlink of inode %llu\n", rec->ino);
2664 out:
2665         /*
2666          * Clear the flag anyway, or we will loop forever for the same inode
2667          * as it will not be removed from the bad inode list and the dead loop
2668          * happens.
2669          */
2670         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671         btrfs_release_path(path);
2672         return ret;
2673 }
2674
2675 /*
2676  * Check if there is any normal(reg or prealloc) file extent for given
2677  * ino.
2678  * This is used to determine the file type when neither its dir_index/item or
2679  * inode_item exists.
2680  *
2681  * This will *NOT* report error, if any error happens, just consider it does
2682  * not have any normal file extent.
2683  */
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 {
2686         struct btrfs_path path;
2687         struct btrfs_key key;
2688         struct btrfs_key found_key;
2689         struct btrfs_file_extent_item *fi;
2690         u8 type;
2691         int ret = 0;
2692
2693         btrfs_init_path(&path);
2694         key.objectid = ino;
2695         key.type = BTRFS_EXTENT_DATA_KEY;
2696         key.offset = 0;
2697
2698         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2699         if (ret < 0) {
2700                 ret = 0;
2701                 goto out;
2702         }
2703         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704                 ret = btrfs_next_leaf(root, &path);
2705                 if (ret) {
2706                         ret = 0;
2707                         goto out;
2708                 }
2709         }
2710         while (1) {
2711                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2712                                       path.slots[0]);
2713                 if (found_key.objectid != ino ||
2714                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2715                         break;
2716                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717                                     struct btrfs_file_extent_item);
2718                 type = btrfs_file_extent_type(path.nodes[0], fi);
2719                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2720                         ret = 1;
2721                         goto out;
2722                 }
2723         }
2724 out:
2725         btrfs_release_path(&path);
2726         return ret;
2727 }
2728
2729 static u32 btrfs_type_to_imode(u8 type)
2730 {
2731         static u32 imode_by_btrfs_type[] = {
2732                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2733                 [BTRFS_FT_DIR]          = S_IFDIR,
2734                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2735                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2736                 [BTRFS_FT_FIFO]         = S_IFIFO,
2737                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2738                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2739         };
2740
2741         return imode_by_btrfs_type[(type)];
2742 }
2743
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745                                 struct btrfs_root *root,
2746                                 struct btrfs_path *path,
2747                                 struct inode_record *rec)
2748 {
2749         u8 filetype;
2750         u32 mode = 0700;
2751         int type_recovered = 0;
2752         int ret = 0;
2753
2754         printf("Trying to rebuild inode:%llu\n", rec->ino);
2755
2756         type_recovered = !find_file_type(rec, &filetype);
2757
2758         /*
2759          * Try to determine inode type if type not found.
2760          *
2761          * For found regular file extent, it must be FILE.
2762          * For found dir_item/index, it must be DIR.
2763          *
2764          * For undetermined one, use FILE as fallback.
2765          *
2766          * TODO:
2767          * 1. If found backref(inode_index/item is already handled) to it,
2768          *    it must be DIR.
2769          *    Need new inode-inode ref structure to allow search for that.
2770          */
2771         if (!type_recovered) {
2772                 if (rec->found_file_extent &&
2773                     find_normal_file_extent(root, rec->ino)) {
2774                         type_recovered = 1;
2775                         filetype = BTRFS_FT_REG_FILE;
2776                 } else if (rec->found_dir_item) {
2777                         type_recovered = 1;
2778                         filetype = BTRFS_FT_DIR;
2779                 } else if (!list_empty(&rec->orphan_extents)) {
2780                         type_recovered = 1;
2781                         filetype = BTRFS_FT_REG_FILE;
2782                 } else{
2783                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2784                                rec->ino);
2785                         type_recovered = 1;
2786                         filetype = BTRFS_FT_REG_FILE;
2787                 }
2788         }
2789
2790         ret = btrfs_new_inode(trans, root, rec->ino,
2791                               mode | btrfs_type_to_imode(filetype));
2792         if (ret < 0)
2793                 goto out;
2794
2795         /*
2796          * Here inode rebuild is done, we only rebuild the inode item,
2797          * don't repair the nlink(like move to lost+found).
2798          * That is the job of nlink repair.
2799          *
2800          * We just fill the record and return
2801          */
2802         rec->found_dir_item = 1;
2803         rec->imode = mode | btrfs_type_to_imode(filetype);
2804         rec->nlink = 0;
2805         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806         /* Ensure the inode_nlinks repair function will be called */
2807         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2808 out:
2809         return ret;
2810 }
2811
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813                                       struct btrfs_root *root,
2814                                       struct btrfs_path *path,
2815                                       struct inode_record *rec)
2816 {
2817         struct orphan_data_extent *orphan;
2818         struct orphan_data_extent *tmp;
2819         int ret = 0;
2820
2821         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2822                 /*
2823                  * Check for conflicting file extents
2824                  *
2825                  * Here we don't know whether the extents is compressed or not,
2826                  * so we can only assume it not compressed nor data offset,
2827                  * and use its disk_len as extent length.
2828                  */
2829                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830                                        orphan->offset, orphan->disk_len, 0);
2831                 btrfs_release_path(path);
2832                 if (ret < 0)
2833                         goto out;
2834                 if (!ret) {
2835                         fprintf(stderr,
2836                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837                                 orphan->disk_bytenr, orphan->disk_len);
2838                         ret = btrfs_free_extent(trans,
2839                                         root->fs_info->extent_root,
2840                                         orphan->disk_bytenr, orphan->disk_len,
2841                                         0, root->objectid, orphan->objectid,
2842                                         orphan->offset);
2843                         if (ret < 0)
2844                                 goto out;
2845                 }
2846                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847                                 orphan->offset, orphan->disk_bytenr,
2848                                 orphan->disk_len, orphan->disk_len);
2849                 if (ret < 0)
2850                         goto out;
2851
2852                 /* Update file size info */
2853                 rec->found_size += orphan->disk_len;
2854                 if (rec->found_size == rec->nbytes)
2855                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2856
2857                 /* Update the file extent hole info too */
2858                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2859                                            orphan->disk_len);
2860                 if (ret < 0)
2861                         goto out;
2862                 if (RB_EMPTY_ROOT(&rec->holes))
2863                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2864
2865                 list_del(&orphan->list);
2866                 free(orphan);
2867         }
2868         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2869 out:
2870         return ret;
2871 }
2872
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874                                         struct btrfs_root *root,
2875                                         struct btrfs_path *path,
2876                                         struct inode_record *rec)
2877 {
2878         struct rb_node *node;
2879         struct file_extent_hole *hole;
2880         int found = 0;
2881         int ret = 0;
2882
2883         node = rb_first(&rec->holes);
2884
2885         while (node) {
2886                 found = 1;
2887                 hole = rb_entry(node, struct file_extent_hole, node);
2888                 ret = btrfs_punch_hole(trans, root, rec->ino,
2889                                        hole->start, hole->len);
2890                 if (ret < 0)
2891                         goto out;
2892                 ret = del_file_extent_hole(&rec->holes, hole->start,
2893                                            hole->len);
2894                 if (ret < 0)
2895                         goto out;
2896                 if (RB_EMPTY_ROOT(&rec->holes))
2897                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898                 node = rb_first(&rec->holes);
2899         }
2900         /* special case for a file losing all its file extent */
2901         if (!found) {
2902                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903                                        round_up(rec->isize, root->sectorsize));
2904                 if (ret < 0)
2905                         goto out;
2906         }
2907         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908                rec->ino, root->objectid);
2909 out:
2910         return ret;
2911 }
2912
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2914 {
2915         struct btrfs_trans_handle *trans;
2916         struct btrfs_path path;
2917         int ret = 0;
2918
2919         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920                              I_ERR_NO_ORPHAN_ITEM |
2921                              I_ERR_LINK_COUNT_WRONG |
2922                              I_ERR_NO_INODE_ITEM |
2923                              I_ERR_FILE_EXTENT_ORPHAN |
2924                              I_ERR_FILE_EXTENT_DISCOUNT|
2925                              I_ERR_FILE_NBYTES_WRONG)))
2926                 return rec->errors;
2927
2928         /*
2929          * For nlink repair, it may create a dir and add link, so
2930          * 2 for parent(256)'s dir_index and dir_item
2931          * 2 for lost+found dir's inode_item and inode_ref
2932          * 1 for the new inode_ref of the file
2933          * 2 for lost+found dir's dir_index and dir_item for the file
2934          */
2935         trans = btrfs_start_transaction(root, 7);
2936         if (IS_ERR(trans))
2937                 return PTR_ERR(trans);
2938
2939         btrfs_init_path(&path);
2940         if (rec->errors & I_ERR_NO_INODE_ITEM)
2941                 ret = repair_inode_no_item(trans, root, &path, rec);
2942         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947                 ret = repair_inode_isize(trans, root, &path, rec);
2948         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951                 ret = repair_inode_nlinks(trans, root, &path, rec);
2952         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953                 ret = repair_inode_nbytes(trans, root, &path, rec);
2954         btrfs_commit_transaction(trans, root);
2955         btrfs_release_path(&path);
2956         return ret;
2957 }
2958
2959 static int check_inode_recs(struct btrfs_root *root,
2960                             struct cache_tree *inode_cache)
2961 {
2962         struct cache_extent *cache;
2963         struct ptr_node *node;
2964         struct inode_record *rec;
2965         struct inode_backref *backref;
2966         int stage = 0;
2967         int ret = 0;
2968         int err = 0;
2969         u64 error = 0;
2970         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2971
2972         if (btrfs_root_refs(&root->root_item) == 0) {
2973                 if (!cache_tree_empty(inode_cache))
2974                         fprintf(stderr, "warning line %d\n", __LINE__);
2975                 return 0;
2976         }
2977
2978         /*
2979          * We need to record the highest inode number for later 'lost+found'
2980          * dir creation.
2981          * We must select an ino not used/referred by any existing inode, or
2982          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983          * this may cause 'lost+found' dir has wrong nlinks.
2984          */
2985         cache = last_cache_extent(inode_cache);
2986         if (cache) {
2987                 node = container_of(cache, struct ptr_node, cache);
2988                 rec = node->data;
2989                 if (rec->ino > root->highest_inode)
2990                         root->highest_inode = rec->ino;
2991         }
2992
2993         /*
2994          * We need to repair backrefs first because we could change some of the
2995          * errors in the inode recs.
2996          *
2997          * We also need to go through and delete invalid backrefs first and then
2998          * add the correct ones second.  We do this because we may get EEXIST
2999          * when adding back the correct index because we hadn't yet deleted the
3000          * invalid index.
3001          *
3002          * For example, if we were missing a dir index then the directories
3003          * isize would be wrong, so if we fixed the isize to what we thought it
3004          * would be and then fixed the backref we'd still have a invalid fs, so
3005          * we need to add back the dir index and then check to see if the isize
3006          * is still wrong.
3007          */
3008         while (stage < 3) {
3009                 stage++;
3010                 if (stage == 3 && !err)
3011                         break;
3012
3013                 cache = search_cache_extent(inode_cache, 0);
3014                 while (repair && cache) {
3015                         node = container_of(cache, struct ptr_node, cache);
3016                         rec = node->data;
3017                         cache = next_cache_extent(cache);
3018
3019                         /* Need to free everything up and rescan */
3020                         if (stage == 3) {
3021                                 remove_cache_extent(inode_cache, &node->cache);
3022                                 free(node);
3023                                 free_inode_rec(rec);
3024                                 continue;
3025                         }
3026
3027                         if (list_empty(&rec->backrefs))
3028                                 continue;
3029
3030                         ret = repair_inode_backrefs(root, rec, inode_cache,
3031                                                     stage == 1);
3032                         if (ret < 0) {
3033                                 err = ret;
3034                                 stage = 2;
3035                                 break;
3036                         } if (ret > 0) {
3037                                 err = -EAGAIN;
3038                         }
3039                 }
3040         }
3041         if (err)
3042                 return err;
3043
3044         rec = get_inode_rec(inode_cache, root_dirid, 0);
3045         BUG_ON(IS_ERR(rec));
3046         if (rec) {
3047                 ret = check_root_dir(rec);
3048                 if (ret) {
3049                         fprintf(stderr, "root %llu root dir %llu error\n",
3050                                 (unsigned long long)root->root_key.objectid,
3051                                 (unsigned long long)root_dirid);
3052                         print_inode_error(root, rec);
3053                         error++;
3054                 }
3055         } else {
3056                 if (repair) {
3057                         struct btrfs_trans_handle *trans;
3058
3059                         trans = btrfs_start_transaction(root, 1);
3060                         if (IS_ERR(trans)) {
3061                                 err = PTR_ERR(trans);
3062                                 return err;
3063                         }
3064
3065                         fprintf(stderr,
3066                                 "root %llu missing its root dir, recreating\n",
3067                                 (unsigned long long)root->objectid);
3068
3069                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3070                         BUG_ON(ret);
3071
3072                         btrfs_commit_transaction(trans, root);
3073                         return -EAGAIN;
3074                 }
3075
3076                 fprintf(stderr, "root %llu root dir %llu not found\n",
3077                         (unsigned long long)root->root_key.objectid,
3078                         (unsigned long long)root_dirid);
3079         }
3080
3081         while (1) {
3082                 cache = search_cache_extent(inode_cache, 0);
3083                 if (!cache)
3084                         break;
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 remove_cache_extent(inode_cache, &node->cache);
3088                 free(node);
3089                 if (rec->ino == root_dirid ||
3090                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091                         free_inode_rec(rec);
3092                         continue;
3093                 }
3094
3095                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096                         ret = check_orphan_item(root, rec->ino);
3097                         if (ret == 0)
3098                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099                         if (can_free_inode_rec(rec)) {
3100                                 free_inode_rec(rec);
3101                                 continue;
3102                         }
3103                 }
3104
3105                 if (!rec->found_inode_item)
3106                         rec->errors |= I_ERR_NO_INODE_ITEM;
3107                 if (rec->found_link != rec->nlink)
3108                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3109                 if (repair) {
3110                         ret = try_repair_inode(root, rec);
3111                         if (ret == 0 && can_free_inode_rec(rec)) {
3112                                 free_inode_rec(rec);
3113                                 continue;
3114                         }
3115                         ret = 0;
3116                 }
3117
3118                 if (!(repair && ret == 0))
3119                         error++;
3120                 print_inode_error(root, rec);
3121                 list_for_each_entry(backref, &rec->backrefs, list) {
3122                         if (!backref->found_dir_item)
3123                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124                         if (!backref->found_dir_index)
3125                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126                         if (!backref->found_inode_ref)
3127                                 backref->errors |= REF_ERR_NO_INODE_REF;
3128                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129                                 " namelen %u name %s filetype %d errors %x",
3130                                 (unsigned long long)backref->dir,
3131                                 (unsigned long long)backref->index,
3132                                 backref->namelen, backref->name,
3133                                 backref->filetype, backref->errors);
3134                         print_ref_error(backref->errors);
3135                 }
3136                 free_inode_rec(rec);
3137         }
3138         return (error > 0) ? -1 : 0;
3139 }
3140
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3142                                         u64 objectid)
3143 {
3144         struct cache_extent *cache;
3145         struct root_record *rec = NULL;
3146         int ret;
3147
3148         cache = lookup_cache_extent(root_cache, objectid, 1);
3149         if (cache) {
3150                 rec = container_of(cache, struct root_record, cache);
3151         } else {
3152                 rec = calloc(1, sizeof(*rec));
3153                 if (!rec)
3154                         return ERR_PTR(-ENOMEM);
3155                 rec->objectid = objectid;
3156                 INIT_LIST_HEAD(&rec->backrefs);
3157                 rec->cache.start = objectid;
3158                 rec->cache.size = 1;
3159
3160                 ret = insert_cache_extent(root_cache, &rec->cache);
3161                 if (ret)
3162                         return ERR_PTR(-EEXIST);
3163         }
3164         return rec;
3165 }
3166
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168                                              u64 ref_root, u64 dir, u64 index,
3169                                              const char *name, int namelen)
3170 {
3171         struct root_backref *backref;
3172
3173         list_for_each_entry(backref, &rec->backrefs, list) {
3174                 if (backref->ref_root != ref_root || backref->dir != dir ||
3175                     backref->namelen != namelen)
3176                         continue;
3177                 if (memcmp(name, backref->name, namelen))
3178                         continue;
3179                 return backref;
3180         }
3181
3182         backref = calloc(1, sizeof(*backref) + namelen + 1);
3183         if (!backref)
3184                 return NULL;
3185         backref->ref_root = ref_root;
3186         backref->dir = dir;
3187         backref->index = index;
3188         backref->namelen = namelen;
3189         memcpy(backref->name, name, namelen);
3190         backref->name[namelen] = '\0';
3191         list_add_tail(&backref->list, &rec->backrefs);
3192         return backref;
3193 }
3194
3195 static void free_root_record(struct cache_extent *cache)
3196 {
3197         struct root_record *rec;
3198         struct root_backref *backref;
3199
3200         rec = container_of(cache, struct root_record, cache);
3201         while (!list_empty(&rec->backrefs)) {
3202                 backref = to_root_backref(rec->backrefs.next);
3203                 list_del(&backref->list);
3204                 free(backref);
3205         }
3206
3207         free(rec);
3208 }
3209
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3211
3212 static int add_root_backref(struct cache_tree *root_cache,
3213                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3214                             const char *name, int namelen,
3215                             int item_type, int errors)
3216 {
3217         struct root_record *rec;
3218         struct root_backref *backref;
3219
3220         rec = get_root_rec(root_cache, root_id);
3221         BUG_ON(IS_ERR(rec));
3222         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3223         BUG_ON(!backref);
3224
3225         backref->errors |= errors;
3226
3227         if (item_type != BTRFS_DIR_ITEM_KEY) {
3228                 if (backref->found_dir_index || backref->found_back_ref ||
3229                     backref->found_forward_ref) {
3230                         if (backref->index != index)
3231                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3232                 } else {
3233                         backref->index = index;
3234                 }
3235         }
3236
3237         if (item_type == BTRFS_DIR_ITEM_KEY) {
3238                 if (backref->found_forward_ref)
3239                         rec->found_ref++;
3240                 backref->found_dir_item = 1;
3241         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242                 backref->found_dir_index = 1;
3243         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244                 if (backref->found_forward_ref)
3245                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3246                 else if (backref->found_dir_item)
3247                         rec->found_ref++;
3248                 backref->found_forward_ref = 1;
3249         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250                 if (backref->found_back_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252                 backref->found_back_ref = 1;
3253         } else {
3254                 BUG_ON(1);
3255         }
3256
3257         if (backref->found_forward_ref && backref->found_dir_item)
3258                 backref->reachable = 1;
3259         return 0;
3260 }
3261
3262 static int merge_root_recs(struct btrfs_root *root,
3263                            struct cache_tree *src_cache,
3264                            struct cache_tree *dst_cache)
3265 {
3266         struct cache_extent *cache;
3267         struct ptr_node *node;
3268         struct inode_record *rec;
3269         struct inode_backref *backref;
3270         int ret = 0;
3271
3272         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273                 free_inode_recs_tree(src_cache);
3274                 return 0;
3275         }
3276
3277         while (1) {
3278                 cache = search_cache_extent(src_cache, 0);
3279                 if (!cache)
3280                         break;
3281                 node = container_of(cache, struct ptr_node, cache);
3282                 rec = node->data;
3283                 remove_cache_extent(src_cache, &node->cache);
3284                 free(node);
3285
3286                 ret = is_child_root(root, root->objectid, rec->ino);
3287                 if (ret < 0)
3288                         break;
3289                 else if (ret == 0)
3290                         goto skip;
3291
3292                 list_for_each_entry(backref, &rec->backrefs, list) {
3293                         BUG_ON(backref->found_inode_ref);
3294                         if (backref->found_dir_item)
3295                                 add_root_backref(dst_cache, rec->ino,
3296                                         root->root_key.objectid, backref->dir,
3297                                         backref->index, backref->name,
3298                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3299                                         backref->errors);
3300                         if (backref->found_dir_index)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3305                                         backref->errors);
3306                 }
3307 skip:
3308                 free_inode_rec(rec);
3309         }
3310         if (ret < 0)
3311                 return ret;
3312         return 0;
3313 }
3314
3315 static int check_root_refs(struct btrfs_root *root,
3316                            struct cache_tree *root_cache)
3317 {
3318         struct root_record *rec;
3319         struct root_record *ref_root;
3320         struct root_backref *backref;
3321         struct cache_extent *cache;
3322         int loop = 1;
3323         int ret;
3324         int error;
3325         int errors = 0;
3326
3327         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328         BUG_ON(IS_ERR(rec));
3329         rec->found_ref = 1;
3330
3331         /* fixme: this can not detect circular references */
3332         while (loop) {
3333                 loop = 0;
3334                 cache = search_cache_extent(root_cache, 0);
3335                 while (1) {
3336                         if (!cache)
3337                                 break;
3338                         rec = container_of(cache, struct root_record, cache);
3339                         cache = next_cache_extent(cache);
3340
3341                         if (rec->found_ref == 0)
3342                                 continue;
3343
3344                         list_for_each_entry(backref, &rec->backrefs, list) {
3345                                 if (!backref->reachable)
3346                                         continue;
3347
3348                                 ref_root = get_root_rec(root_cache,
3349                                                         backref->ref_root);
3350                                 BUG_ON(IS_ERR(ref_root));
3351                                 if (ref_root->found_ref > 0)
3352                                         continue;
3353
3354                                 backref->reachable = 0;
3355                                 rec->found_ref--;
3356                                 if (rec->found_ref == 0)
3357                                         loop = 1;
3358                         }
3359                 }
3360         }
3361
3362         cache = search_cache_extent(root_cache, 0);
3363         while (1) {
3364                 if (!cache)
3365                         break;
3366                 rec = container_of(cache, struct root_record, cache);
3367                 cache = next_cache_extent(cache);
3368
3369                 if (rec->found_ref == 0 &&
3370                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372                         ret = check_orphan_item(root->fs_info->tree_root,
3373                                                 rec->objectid);
3374                         if (ret == 0)
3375                                 continue;
3376
3377                         /*
3378                          * If we don't have a root item then we likely just have
3379                          * a dir item in a snapshot for this root but no actual
3380                          * ref key or anything so it's meaningless.
3381                          */
3382                         if (!rec->found_root_item)
3383                                 continue;
3384                         errors++;
3385                         fprintf(stderr, "fs tree %llu not referenced\n",
3386                                 (unsigned long long)rec->objectid);
3387                 }
3388
3389                 error = 0;
3390                 if (rec->found_ref > 0 && !rec->found_root_item)
3391                         error = 1;
3392                 list_for_each_entry(backref, &rec->backrefs, list) {
3393                         if (!backref->found_dir_item)
3394                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395                         if (!backref->found_dir_index)
3396                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397                         if (!backref->found_back_ref)
3398                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399                         if (!backref->found_forward_ref)
3400                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3401                         if (backref->reachable && backref->errors)
3402                                 error = 1;
3403                 }
3404                 if (!error)
3405                         continue;
3406
3407                 errors++;
3408                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409                         (unsigned long long)rec->objectid, rec->found_ref,
3410                          rec->found_root_item ? "" : "not found");
3411
3412                 list_for_each_entry(backref, &rec->backrefs, list) {
3413                         if (!backref->reachable)
3414                                 continue;
3415                         if (!backref->errors && rec->found_root_item)
3416                                 continue;
3417                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418                                 " index %llu namelen %u name %s errors %x\n",
3419                                 (unsigned long long)backref->ref_root,
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426         }
3427         return errors > 0 ? 1 : 0;
3428 }
3429
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431                             struct btrfs_key *key,
3432                             struct cache_tree *root_cache)
3433 {
3434         u64 dirid;
3435         u64 index;
3436         u32 len;
3437         u32 name_len;
3438         struct btrfs_root_ref *ref;
3439         char namebuf[BTRFS_NAME_LEN];
3440         int error;
3441
3442         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3443
3444         dirid = btrfs_root_ref_dirid(eb, ref);
3445         index = btrfs_root_ref_sequence(eb, ref);
3446         name_len = btrfs_root_ref_name_len(eb, ref);
3447
3448         if (name_len <= BTRFS_NAME_LEN) {
3449                 len = name_len;
3450                 error = 0;
3451         } else {
3452                 len = BTRFS_NAME_LEN;
3453                 error = REF_ERR_NAME_TOO_LONG;
3454         }
3455         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3456
3457         if (key->type == BTRFS_ROOT_REF_KEY) {
3458                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459                                  index, namebuf, len, key->type, error);
3460         } else {
3461                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462                                  index, namebuf, len, key->type, error);
3463         }
3464         return 0;
3465 }
3466
3467 static void free_corrupt_block(struct cache_extent *cache)
3468 {
3469         struct btrfs_corrupt_block *corrupt;
3470
3471         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3472         free(corrupt);
3473 }
3474
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3476
3477 /*
3478  * Repair the btree of the given root.
3479  *
3480  * The fix is to remove the node key in corrupt_blocks cache_tree.
3481  * and rebalance the tree.
3482  * After the fix, the btree should be writeable.
3483  */
3484 static int repair_btree(struct btrfs_root *root,
3485                         struct cache_tree *corrupt_blocks)
3486 {
3487         struct btrfs_trans_handle *trans;
3488         struct btrfs_path path;
3489         struct btrfs_corrupt_block *corrupt;
3490         struct cache_extent *cache;
3491         struct btrfs_key key;
3492         u64 offset;
3493         int level;
3494         int ret = 0;
3495
3496         if (cache_tree_empty(corrupt_blocks))
3497                 return 0;
3498
3499         trans = btrfs_start_transaction(root, 1);
3500         if (IS_ERR(trans)) {
3501                 ret = PTR_ERR(trans);
3502                 fprintf(stderr, "Error starting transaction: %s\n",
3503                         strerror(-ret));
3504                 return ret;
3505         }
3506         btrfs_init_path(&path);
3507         cache = first_cache_extent(corrupt_blocks);
3508         while (cache) {
3509                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3510                                        cache);
3511                 level = corrupt->level;
3512                 path.lowest_level = level;
3513                 key.objectid = corrupt->key.objectid;
3514                 key.type = corrupt->key.type;
3515                 key.offset = corrupt->key.offset;
3516
3517                 /*
3518                  * Here we don't want to do any tree balance, since it may
3519                  * cause a balance with corrupted brother leaf/node,
3520                  * so ins_len set to 0 here.
3521                  * Balance will be done after all corrupt node/leaf is deleted.
3522                  */
3523                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3524                 if (ret < 0)
3525                         goto out;
3526                 offset = btrfs_node_blockptr(path.nodes[level],
3527                                              path.slots[level]);
3528
3529                 /* Remove the ptr */
3530                 ret = btrfs_del_ptr(trans, root, &path, level,
3531                                     path.slots[level]);
3532                 if (ret < 0)
3533                         goto out;
3534                 /*
3535                  * Remove the corresponding extent
3536                  * return value is not concerned.
3537                  */
3538                 btrfs_release_path(&path);
3539                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3540                                         0, root->root_key.objectid,
3541                                         level - 1, 0);
3542                 cache = next_cache_extent(cache);
3543         }
3544
3545         /* Balance the btree using btrfs_search_slot() */
3546         cache = first_cache_extent(corrupt_blocks);
3547         while (cache) {
3548                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3549                                        cache);
3550                 memcpy(&key, &corrupt->key, sizeof(key));
3551                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3552                 if (ret < 0)
3553                         goto out;
3554                 /* return will always >0 since it won't find the item */
3555                 ret = 0;
3556                 btrfs_release_path(&path);
3557                 cache = next_cache_extent(cache);
3558         }
3559 out:
3560         btrfs_commit_transaction(trans, root);
3561         btrfs_release_path(&path);
3562         return ret;
3563 }
3564
3565 static int check_fs_root(struct btrfs_root *root,
3566                          struct cache_tree *root_cache,
3567                          struct walk_control *wc)
3568 {
3569         int ret = 0;
3570         int err = 0;
3571         int wret;
3572         int level;
3573         struct btrfs_path path;
3574         struct shared_node root_node;
3575         struct root_record *rec;
3576         struct btrfs_root_item *root_item = &root->root_item;
3577         struct cache_tree corrupt_blocks;
3578         struct orphan_data_extent *orphan;
3579         struct orphan_data_extent *tmp;
3580         enum btrfs_tree_block_status status;
3581         struct node_refs nrefs;
3582
3583         /*
3584          * Reuse the corrupt_block cache tree to record corrupted tree block
3585          *
3586          * Unlike the usage in extent tree check, here we do it in a per
3587          * fs/subvol tree base.
3588          */
3589         cache_tree_init(&corrupt_blocks);
3590         root->fs_info->corrupt_blocks = &corrupt_blocks;
3591
3592         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3593                 rec = get_root_rec(root_cache, root->root_key.objectid);
3594                 BUG_ON(IS_ERR(rec));
3595                 if (btrfs_root_refs(root_item) > 0)
3596                         rec->found_root_item = 1;
3597         }
3598
3599         btrfs_init_path(&path);
3600         memset(&root_node, 0, sizeof(root_node));
3601         cache_tree_init(&root_node.root_cache);
3602         cache_tree_init(&root_node.inode_cache);
3603         memset(&nrefs, 0, sizeof(nrefs));
3604
3605         /* Move the orphan extent record to corresponding inode_record */
3606         list_for_each_entry_safe(orphan, tmp,
3607                                  &root->orphan_data_extents, list) {
3608                 struct inode_record *inode;
3609
3610                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3611                                       1);
3612                 BUG_ON(IS_ERR(inode));
3613                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3614                 list_move(&orphan->list, &inode->orphan_extents);
3615         }
3616
3617         level = btrfs_header_level(root->node);
3618         memset(wc->nodes, 0, sizeof(wc->nodes));
3619         wc->nodes[level] = &root_node;
3620         wc->active_node = level;
3621         wc->root_level = level;
3622
3623         /* We may not have checked the root block, lets do that now */
3624         if (btrfs_is_leaf(root->node))
3625                 status = btrfs_check_leaf(root, NULL, root->node);
3626         else
3627                 status = btrfs_check_node(root, NULL, root->node);
3628         if (status != BTRFS_TREE_BLOCK_CLEAN)
3629                 return -EIO;
3630
3631         if (btrfs_root_refs(root_item) > 0 ||
3632             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3633                 path.nodes[level] = root->node;
3634                 extent_buffer_get(root->node);
3635                 path.slots[level] = 0;
3636         } else {
3637                 struct btrfs_key key;
3638                 struct btrfs_disk_key found_key;
3639
3640                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3641                 level = root_item->drop_level;
3642                 path.lowest_level = level;
3643                 if (level > btrfs_header_level(root->node) ||
3644                     level >= BTRFS_MAX_LEVEL) {
3645                         error("ignoring invalid drop level: %u", level);
3646                         goto skip_walking;
3647                 }
3648                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3649                 if (wret < 0)
3650                         goto skip_walking;
3651                 btrfs_node_key(path.nodes[level], &found_key,
3652                                 path.slots[level]);
3653                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3654                                         sizeof(found_key)));
3655         }
3656
3657         while (1) {
3658                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3659                 if (wret < 0)
3660                         ret = wret;
3661                 if (wret != 0)
3662                         break;
3663
3664                 wret = walk_up_tree(root, &path, wc, &level);
3665                 if (wret < 0)
3666                         ret = wret;
3667                 if (wret != 0)
3668                         break;
3669         }
3670 skip_walking:
3671         btrfs_release_path(&path);
3672
3673         if (!cache_tree_empty(&corrupt_blocks)) {
3674                 struct cache_extent *cache;
3675                 struct btrfs_corrupt_block *corrupt;
3676
3677                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3678                        root->root_key.objectid);
3679                 cache = first_cache_extent(&corrupt_blocks);
3680                 while (cache) {
3681                         corrupt = container_of(cache,
3682                                                struct btrfs_corrupt_block,
3683                                                cache);
3684                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3685                                cache->start, corrupt->level,
3686                                corrupt->key.objectid, corrupt->key.type,
3687                                corrupt->key.offset);
3688                         cache = next_cache_extent(cache);
3689                 }
3690                 if (repair) {
3691                         printf("Try to repair the btree for root %llu\n",
3692                                root->root_key.objectid);
3693                         ret = repair_btree(root, &corrupt_blocks);
3694                         if (ret < 0)
3695                                 fprintf(stderr, "Failed to repair btree: %s\n",
3696                                         strerror(-ret));
3697                         if (!ret)
3698                                 printf("Btree for root %llu is fixed\n",
3699                                        root->root_key.objectid);
3700                 }
3701         }
3702
3703         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3704         if (err < 0)
3705                 ret = err;
3706
3707         if (root_node.current) {
3708                 root_node.current->checked = 1;
3709                 maybe_free_inode_rec(&root_node.inode_cache,
3710                                 root_node.current);
3711         }
3712
3713         err = check_inode_recs(root, &root_node.inode_cache);
3714         if (!ret)
3715                 ret = err;
3716
3717         free_corrupt_blocks_tree(&corrupt_blocks);
3718         root->fs_info->corrupt_blocks = NULL;
3719         free_orphan_data_extents(&root->orphan_data_extents);
3720         return ret;
3721 }
3722
3723 static int fs_root_objectid(u64 objectid)
3724 {
3725         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3726             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3727                 return 1;
3728         return is_fstree(objectid);
3729 }
3730
3731 static int check_fs_roots(struct btrfs_root *root,
3732                           struct cache_tree *root_cache)
3733 {
3734         struct btrfs_path path;
3735         struct btrfs_key key;
3736         struct walk_control wc;
3737         struct extent_buffer *leaf, *tree_node;
3738         struct btrfs_root *tmp_root;
3739         struct btrfs_root *tree_root = root->fs_info->tree_root;
3740         int ret;
3741         int err = 0;
3742
3743         if (ctx.progress_enabled) {
3744                 ctx.tp = TASK_FS_ROOTS;
3745                 task_start(ctx.info);
3746         }
3747
3748         /*
3749          * Just in case we made any changes to the extent tree that weren't
3750          * reflected into the free space cache yet.
3751          */
3752         if (repair)
3753                 reset_cached_block_groups(root->fs_info);
3754         memset(&wc, 0, sizeof(wc));
3755         cache_tree_init(&wc.shared);
3756         btrfs_init_path(&path);
3757
3758 again:
3759         key.offset = 0;
3760         key.objectid = 0;
3761         key.type = BTRFS_ROOT_ITEM_KEY;
3762         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3763         if (ret < 0) {
3764                 err = 1;
3765                 goto out;
3766         }
3767         tree_node = tree_root->node;
3768         while (1) {
3769                 if (tree_node != tree_root->node) {
3770                         free_root_recs_tree(root_cache);
3771                         btrfs_release_path(&path);
3772                         goto again;
3773                 }
3774                 leaf = path.nodes[0];
3775                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3776                         ret = btrfs_next_leaf(tree_root, &path);
3777                         if (ret) {
3778                                 if (ret < 0)
3779                                         err = 1;
3780                                 break;
3781                         }
3782                         leaf = path.nodes[0];
3783                 }
3784                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3785                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3786                     fs_root_objectid(key.objectid)) {
3787                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3788                                 tmp_root = btrfs_read_fs_root_no_cache(
3789                                                 root->fs_info, &key);
3790                         } else {
3791                                 key.offset = (u64)-1;
3792                                 tmp_root = btrfs_read_fs_root(
3793                                                 root->fs_info, &key);
3794                         }
3795                         if (IS_ERR(tmp_root)) {
3796                                 err = 1;
3797                                 goto next;
3798                         }
3799                         ret = check_fs_root(tmp_root, root_cache, &wc);
3800                         if (ret == -EAGAIN) {
3801                                 free_root_recs_tree(root_cache);
3802                                 btrfs_release_path(&path);
3803                                 goto again;
3804                         }
3805                         if (ret)
3806                                 err = 1;
3807                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3808                                 btrfs_free_fs_root(tmp_root);
3809                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3810                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3811                         process_root_ref(leaf, path.slots[0], &key,
3812                                          root_cache);
3813                 }
3814 next:
3815                 path.slots[0]++;
3816         }
3817 out:
3818         btrfs_release_path(&path);
3819         if (err)
3820                 free_extent_cache_tree(&wc.shared);
3821         if (!cache_tree_empty(&wc.shared))
3822                 fprintf(stderr, "warning line %d\n", __LINE__);
3823
3824         task_stop(ctx.info);
3825
3826         return err;
3827 }
3828
3829 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3830 {
3831         struct list_head *cur = rec->backrefs.next;
3832         struct extent_backref *back;
3833         struct tree_backref *tback;
3834         struct data_backref *dback;
3835         u64 found = 0;
3836         int err = 0;
3837
3838         while(cur != &rec->backrefs) {
3839                 back = to_extent_backref(cur);
3840                 cur = cur->next;
3841                 if (!back->found_extent_tree) {
3842                         err = 1;
3843                         if (!print_errs)
3844                                 goto out;
3845                         if (back->is_data) {
3846                                 dback = to_data_backref(back);
3847                                 fprintf(stderr, "Backref %llu %s %llu"
3848                                         " owner %llu offset %llu num_refs %lu"
3849                                         " not found in extent tree\n",
3850                                         (unsigned long long)rec->start,
3851                                         back->full_backref ?
3852                                         "parent" : "root",
3853                                         back->full_backref ?
3854                                         (unsigned long long)dback->parent:
3855                                         (unsigned long long)dback->root,
3856                                         (unsigned long long)dback->owner,
3857                                         (unsigned long long)dback->offset,
3858                                         (unsigned long)dback->num_refs);
3859                         } else {
3860                                 tback = to_tree_backref(back);
3861                                 fprintf(stderr, "Backref %llu parent %llu"
3862                                         " root %llu not found in extent tree\n",
3863                                         (unsigned long long)rec->start,
3864                                         (unsigned long long)tback->parent,
3865                                         (unsigned long long)tback->root);
3866                         }
3867                 }
3868                 if (!back->is_data && !back->found_ref) {
3869                         err = 1;
3870                         if (!print_errs)
3871                                 goto out;
3872                         tback = to_tree_backref(back);
3873                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3874                                 (unsigned long long)rec->start,
3875                                 back->full_backref ? "parent" : "root",
3876                                 back->full_backref ?
3877                                 (unsigned long long)tback->parent :
3878                                 (unsigned long long)tback->root, back);
3879                 }
3880                 if (back->is_data) {
3881                         dback = to_data_backref(back);
3882                         if (dback->found_ref != dback->num_refs) {
3883                                 err = 1;
3884                                 if (!print_errs)
3885                                         goto out;
3886                                 fprintf(stderr, "Incorrect local backref count"
3887                                         " on %llu %s %llu owner %llu"
3888                                         " offset %llu found %u wanted %u back %p\n",
3889                                         (unsigned long long)rec->start,
3890                                         back->full_backref ?
3891                                         "parent" : "root",
3892                                         back->full_backref ?
3893                                         (unsigned long long)dback->parent:
3894                                         (unsigned long long)dback->root,
3895                                         (unsigned long long)dback->owner,
3896                                         (unsigned long long)dback->offset,
3897                                         dback->found_ref, dback->num_refs, back);
3898                         }
3899                         if (dback->disk_bytenr != rec->start) {
3900                                 err = 1;
3901                                 if (!print_errs)
3902                                         goto out;
3903                                 fprintf(stderr, "Backref disk bytenr does not"
3904                                         " match extent record, bytenr=%llu, "
3905                                         "ref bytenr=%llu\n",
3906                                         (unsigned long long)rec->start,
3907                                         (unsigned long long)dback->disk_bytenr);
3908                         }
3909
3910                         if (dback->bytes != rec->nr) {
3911                                 err = 1;
3912                                 if (!print_errs)
3913                                         goto out;
3914                                 fprintf(stderr, "Backref bytes do not match "
3915                                         "extent backref, bytenr=%llu, ref "
3916                                         "bytes=%llu, backref bytes=%llu\n",
3917                                         (unsigned long long)rec->start,
3918                                         (unsigned long long)rec->nr,
3919                                         (unsigned long long)dback->bytes);
3920                         }
3921                 }
3922                 if (!back->is_data) {
3923                         found += 1;
3924                 } else {
3925                         dback = to_data_backref(back);
3926                         found += dback->found_ref;
3927                 }
3928         }
3929         if (found != rec->refs) {
3930                 err = 1;
3931                 if (!print_errs)
3932                         goto out;
3933                 fprintf(stderr, "Incorrect global backref count "
3934                         "on %llu found %llu wanted %llu\n",
3935                         (unsigned long long)rec->start,
3936                         (unsigned long long)found,
3937                         (unsigned long long)rec->refs);
3938         }
3939 out:
3940         return err;
3941 }
3942
3943 static int free_all_extent_backrefs(struct extent_record *rec)
3944 {
3945         struct extent_backref *back;
3946         struct list_head *cur;
3947         while (!list_empty(&rec->backrefs)) {
3948                 cur = rec->backrefs.next;
3949                 back = to_extent_backref(cur);
3950                 list_del(cur);
3951                 free(back);
3952         }
3953         return 0;
3954 }
3955
3956 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3957                                      struct cache_tree *extent_cache)
3958 {
3959         struct cache_extent *cache;
3960         struct extent_record *rec;
3961
3962         while (1) {
3963                 cache = first_cache_extent(extent_cache);
3964                 if (!cache)
3965                         break;
3966                 rec = container_of(cache, struct extent_record, cache);
3967                 remove_cache_extent(extent_cache, cache);
3968                 free_all_extent_backrefs(rec);
3969                 free(rec);
3970         }
3971 }
3972
3973 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3974                                  struct extent_record *rec)
3975 {
3976         if (rec->content_checked && rec->owner_ref_checked &&
3977             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3978             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3979             !rec->bad_full_backref && !rec->crossing_stripes &&
3980             !rec->wrong_chunk_type) {
3981                 remove_cache_extent(extent_cache, &rec->cache);
3982                 free_all_extent_backrefs(rec);
3983                 list_del_init(&rec->list);
3984                 free(rec);
3985         }
3986         return 0;
3987 }
3988
3989 static int check_owner_ref(struct btrfs_root *root,
3990                             struct extent_record *rec,
3991                             struct extent_buffer *buf)
3992 {
3993         struct extent_backref *node;
3994         struct tree_backref *back;
3995         struct btrfs_root *ref_root;
3996         struct btrfs_key key;
3997         struct btrfs_path path;
3998         struct extent_buffer *parent;
3999         int level;
4000         int found = 0;
4001         int ret;
4002
4003         list_for_each_entry(node, &rec->backrefs, list) {
4004                 if (node->is_data)
4005                         continue;
4006                 if (!node->found_ref)
4007                         continue;
4008                 if (node->full_backref)
4009                         continue;
4010                 back = to_tree_backref(node);
4011                 if (btrfs_header_owner(buf) == back->root)
4012                         return 0;
4013         }
4014         BUG_ON(rec->is_root);
4015
4016         /* try to find the block by search corresponding fs tree */
4017         key.objectid = btrfs_header_owner(buf);
4018         key.type = BTRFS_ROOT_ITEM_KEY;
4019         key.offset = (u64)-1;
4020
4021         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4022         if (IS_ERR(ref_root))
4023                 return 1;
4024
4025         level = btrfs_header_level(buf);
4026         if (level == 0)
4027                 btrfs_item_key_to_cpu(buf, &key, 0);
4028         else
4029                 btrfs_node_key_to_cpu(buf, &key, 0);
4030
4031         btrfs_init_path(&path);
4032         path.lowest_level = level + 1;
4033         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4034         if (ret < 0)
4035                 return 0;
4036
4037         parent = path.nodes[level + 1];
4038         if (parent && buf->start == btrfs_node_blockptr(parent,
4039                                                         path.slots[level + 1]))
4040                 found = 1;
4041
4042         btrfs_release_path(&path);
4043         return found ? 0 : 1;
4044 }
4045
4046 static int is_extent_tree_record(struct extent_record *rec)
4047 {
4048         struct list_head *cur = rec->backrefs.next;
4049         struct extent_backref *node;
4050         struct tree_backref *back;
4051         int is_extent = 0;
4052
4053         while(cur != &rec->backrefs) {
4054                 node = to_extent_backref(cur);
4055                 cur = cur->next;
4056                 if (node->is_data)
4057                         return 0;
4058                 back = to_tree_backref(node);
4059                 if (node->full_backref)
4060                         return 0;
4061                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4062                         is_extent = 1;
4063         }
4064         return is_extent;
4065 }
4066
4067
4068 static int record_bad_block_io(struct btrfs_fs_info *info,
4069                                struct cache_tree *extent_cache,
4070                                u64 start, u64 len)
4071 {
4072         struct extent_record *rec;
4073         struct cache_extent *cache;
4074         struct btrfs_key key;
4075
4076         cache = lookup_cache_extent(extent_cache, start, len);
4077         if (!cache)
4078                 return 0;
4079
4080         rec = container_of(cache, struct extent_record, cache);
4081         if (!is_extent_tree_record(rec))
4082                 return 0;
4083
4084         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4085         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4086 }
4087
4088 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4089                        struct extent_buffer *buf, int slot)
4090 {
4091         if (btrfs_header_level(buf)) {
4092                 struct btrfs_key_ptr ptr1, ptr2;
4093
4094                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4095                                    sizeof(struct btrfs_key_ptr));
4096                 read_extent_buffer(buf, &ptr2,
4097                                    btrfs_node_key_ptr_offset(slot + 1),
4098                                    sizeof(struct btrfs_key_ptr));
4099                 write_extent_buffer(buf, &ptr1,
4100                                     btrfs_node_key_ptr_offset(slot + 1),
4101                                     sizeof(struct btrfs_key_ptr));
4102                 write_extent_buffer(buf, &ptr2,
4103                                     btrfs_node_key_ptr_offset(slot),
4104                                     sizeof(struct btrfs_key_ptr));
4105                 if (slot == 0) {
4106                         struct btrfs_disk_key key;
4107                         btrfs_node_key(buf, &key, 0);
4108                         btrfs_fixup_low_keys(root, path, &key,
4109                                              btrfs_header_level(buf) + 1);
4110                 }
4111         } else {
4112                 struct btrfs_item *item1, *item2;
4113                 struct btrfs_key k1, k2;
4114                 char *item1_data, *item2_data;
4115                 u32 item1_offset, item2_offset, item1_size, item2_size;
4116
4117                 item1 = btrfs_item_nr(slot);
4118                 item2 = btrfs_item_nr(slot + 1);
4119                 btrfs_item_key_to_cpu(buf, &k1, slot);
4120                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4121                 item1_offset = btrfs_item_offset(buf, item1);
4122                 item2_offset = btrfs_item_offset(buf, item2);
4123                 item1_size = btrfs_item_size(buf, item1);
4124                 item2_size = btrfs_item_size(buf, item2);
4125
4126                 item1_data = malloc(item1_size);
4127                 if (!item1_data)
4128                         return -ENOMEM;
4129                 item2_data = malloc(item2_size);
4130                 if (!item2_data) {
4131                         free(item1_data);
4132                         return -ENOMEM;
4133                 }
4134
4135                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4136                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4137
4138                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4139                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4140                 free(item1_data);
4141                 free(item2_data);
4142
4143                 btrfs_set_item_offset(buf, item1, item2_offset);
4144                 btrfs_set_item_offset(buf, item2, item1_offset);
4145                 btrfs_set_item_size(buf, item1, item2_size);
4146                 btrfs_set_item_size(buf, item2, item1_size);
4147
4148                 path->slots[0] = slot;
4149                 btrfs_set_item_key_unsafe(root, path, &k2);
4150                 path->slots[0] = slot + 1;
4151                 btrfs_set_item_key_unsafe(root, path, &k1);
4152         }
4153         return 0;
4154 }
4155
4156 static int fix_key_order(struct btrfs_trans_handle *trans,
4157                          struct btrfs_root *root,
4158                          struct btrfs_path *path)
4159 {
4160         struct extent_buffer *buf;
4161         struct btrfs_key k1, k2;
4162         int i;
4163         int level = path->lowest_level;
4164         int ret = -EIO;
4165
4166         buf = path->nodes[level];
4167         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4168                 if (level) {
4169                         btrfs_node_key_to_cpu(buf, &k1, i);
4170                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4171                 } else {
4172                         btrfs_item_key_to_cpu(buf, &k1, i);
4173                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4174                 }
4175                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4176                         continue;
4177                 ret = swap_values(root, path, buf, i);
4178                 if (ret)
4179                         break;
4180                 btrfs_mark_buffer_dirty(buf);
4181                 i = 0;
4182         }
4183         return ret;
4184 }
4185
4186 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4187                              struct btrfs_root *root,
4188                              struct btrfs_path *path,
4189                              struct extent_buffer *buf, int slot)
4190 {
4191         struct btrfs_key key;
4192         int nritems = btrfs_header_nritems(buf);
4193
4194         btrfs_item_key_to_cpu(buf, &key, slot);
4195
4196         /* These are all the keys we can deal with missing. */
4197         if (key.type != BTRFS_DIR_INDEX_KEY &&
4198             key.type != BTRFS_EXTENT_ITEM_KEY &&
4199             key.type != BTRFS_METADATA_ITEM_KEY &&
4200             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4201             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4202                 return -1;
4203
4204         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4205                (unsigned long long)key.objectid, key.type,
4206                (unsigned long long)key.offset, slot, buf->start);
4207         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4208                               btrfs_item_nr_offset(slot + 1),
4209                               sizeof(struct btrfs_item) *
4210                               (nritems - slot - 1));
4211         btrfs_set_header_nritems(buf, nritems - 1);
4212         if (slot == 0) {
4213                 struct btrfs_disk_key disk_key;
4214
4215                 btrfs_item_key(buf, &disk_key, 0);
4216                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4217         }
4218         btrfs_mark_buffer_dirty(buf);
4219         return 0;
4220 }
4221
4222 static int fix_item_offset(struct btrfs_trans_handle *trans,
4223                            struct btrfs_root *root,
4224                            struct btrfs_path *path)
4225 {
4226         struct extent_buffer *buf;
4227         int i;
4228         int ret = 0;
4229
4230         /* We should only get this for leaves */
4231         BUG_ON(path->lowest_level);
4232         buf = path->nodes[0];
4233 again:
4234         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4235                 unsigned int shift = 0, offset;
4236
4237                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4238                     BTRFS_LEAF_DATA_SIZE(root)) {
4239                         if (btrfs_item_end_nr(buf, i) >
4240                             BTRFS_LEAF_DATA_SIZE(root)) {
4241                                 ret = delete_bogus_item(trans, root, path,
4242                                                         buf, i);
4243                                 if (!ret)
4244                                         goto again;
4245                                 fprintf(stderr, "item is off the end of the "
4246                                         "leaf, can't fix\n");
4247                                 ret = -EIO;
4248                                 break;
4249                         }
4250                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4251                                 btrfs_item_end_nr(buf, i);
4252                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4253                            btrfs_item_offset_nr(buf, i - 1)) {
4254                         if (btrfs_item_end_nr(buf, i) >
4255                             btrfs_item_offset_nr(buf, i - 1)) {
4256                                 ret = delete_bogus_item(trans, root, path,
4257                                                         buf, i);
4258                                 if (!ret)
4259                                         goto again;
4260                                 fprintf(stderr, "items overlap, can't fix\n");
4261                                 ret = -EIO;
4262                                 break;
4263                         }
4264                         shift = btrfs_item_offset_nr(buf, i - 1) -
4265                                 btrfs_item_end_nr(buf, i);
4266                 }
4267                 if (!shift)
4268                         continue;
4269
4270                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4271                        i, shift, (unsigned long long)buf->start);
4272                 offset = btrfs_item_offset_nr(buf, i);
4273                 memmove_extent_buffer(buf,
4274                                       btrfs_leaf_data(buf) + offset + shift,
4275                                       btrfs_leaf_data(buf) + offset,
4276                                       btrfs_item_size_nr(buf, i));
4277                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4278                                       offset + shift);
4279                 btrfs_mark_buffer_dirty(buf);
4280         }
4281
4282         /*
4283          * We may have moved things, in which case we want to exit so we don't
4284          * write those changes out.  Once we have proper abort functionality in
4285          * progs this can be changed to something nicer.
4286          */
4287         BUG_ON(ret);
4288         return ret;
4289 }
4290
4291 /*
4292  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4293  * then just return -EIO.
4294  */
4295 static int try_to_fix_bad_block(struct btrfs_root *root,
4296                                 struct extent_buffer *buf,
4297                                 enum btrfs_tree_block_status status)
4298 {
4299         struct btrfs_trans_handle *trans;
4300         struct ulist *roots;
4301         struct ulist_node *node;
4302         struct btrfs_root *search_root;
4303         struct btrfs_path path;
4304         struct ulist_iterator iter;
4305         struct btrfs_key root_key, key;
4306         int ret;
4307
4308         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4309             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4310                 return -EIO;
4311
4312         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4313         if (ret)
4314                 return -EIO;
4315
4316         btrfs_init_path(&path);
4317         ULIST_ITER_INIT(&iter);
4318         while ((node = ulist_next(roots, &iter))) {
4319                 root_key.objectid = node->val;
4320                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4321                 root_key.offset = (u64)-1;
4322
4323                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4324                 if (IS_ERR(root)) {
4325                         ret = -EIO;
4326                         break;
4327                 }
4328
4329
4330                 trans = btrfs_start_transaction(search_root, 0);
4331                 if (IS_ERR(trans)) {
4332                         ret = PTR_ERR(trans);
4333                         break;
4334                 }
4335
4336                 path.lowest_level = btrfs_header_level(buf);
4337                 path.skip_check_block = 1;
4338                 if (path.lowest_level)
4339                         btrfs_node_key_to_cpu(buf, &key, 0);
4340                 else
4341                         btrfs_item_key_to_cpu(buf, &key, 0);
4342                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4343                 if (ret) {
4344                         ret = -EIO;
4345                         btrfs_commit_transaction(trans, search_root);
4346                         break;
4347                 }
4348                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4349                         ret = fix_key_order(trans, search_root, &path);
4350                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4351                         ret = fix_item_offset(trans, search_root, &path);
4352                 if (ret) {
4353                         btrfs_commit_transaction(trans, search_root);
4354                         break;
4355                 }
4356                 btrfs_release_path(&path);
4357                 btrfs_commit_transaction(trans, search_root);
4358         }
4359         ulist_free(roots);
4360         btrfs_release_path(&path);
4361         return ret;
4362 }
4363
4364 static int check_block(struct btrfs_root *root,
4365                        struct cache_tree *extent_cache,
4366                        struct extent_buffer *buf, u64 flags)
4367 {
4368         struct extent_record *rec;
4369         struct cache_extent *cache;
4370         struct btrfs_key key;
4371         enum btrfs_tree_block_status status;
4372         int ret = 0;
4373         int level;
4374
4375         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4376         if (!cache)
4377                 return 1;
4378         rec = container_of(cache, struct extent_record, cache);
4379         rec->generation = btrfs_header_generation(buf);
4380
4381         level = btrfs_header_level(buf);
4382         if (btrfs_header_nritems(buf) > 0) {
4383
4384                 if (level == 0)
4385                         btrfs_item_key_to_cpu(buf, &key, 0);
4386                 else
4387                         btrfs_node_key_to_cpu(buf, &key, 0);
4388
4389                 rec->info_objectid = key.objectid;
4390         }
4391         rec->info_level = level;
4392
4393         if (btrfs_is_leaf(buf))
4394                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4395         else
4396                 status = btrfs_check_node(root, &rec->parent_key, buf);
4397
4398         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4399                 if (repair)
4400                         status = try_to_fix_bad_block(root, buf, status);
4401                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4402                         ret = -EIO;
4403                         fprintf(stderr, "bad block %llu\n",
4404                                 (unsigned long long)buf->start);
4405                 } else {
4406                         /*
4407                          * Signal to callers we need to start the scan over
4408                          * again since we'll have cowed blocks.
4409                          */
4410                         ret = -EAGAIN;
4411                 }
4412         } else {
4413                 rec->content_checked = 1;
4414                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4415                         rec->owner_ref_checked = 1;
4416                 else {
4417                         ret = check_owner_ref(root, rec, buf);
4418                         if (!ret)
4419                                 rec->owner_ref_checked = 1;
4420                 }
4421         }
4422         if (!ret)
4423                 maybe_free_extent_rec(extent_cache, rec);
4424         return ret;
4425 }
4426
4427 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4428                                                 u64 parent, u64 root)
4429 {
4430         struct list_head *cur = rec->backrefs.next;
4431         struct extent_backref *node;
4432         struct tree_backref *back;
4433
4434         while(cur != &rec->backrefs) {
4435                 node = to_extent_backref(cur);
4436                 cur = cur->next;
4437                 if (node->is_data)
4438                         continue;
4439                 back = to_tree_backref(node);
4440                 if (parent > 0) {
4441                         if (!node->full_backref)
4442                                 continue;
4443                         if (parent == back->parent)
4444                                 return back;
4445                 } else {
4446                         if (node->full_backref)
4447                                 continue;
4448                         if (back->root == root)
4449                                 return back;
4450                 }
4451         }
4452         return NULL;
4453 }
4454
4455 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4456                                                 u64 parent, u64 root)
4457 {
4458         struct tree_backref *ref = malloc(sizeof(*ref));
4459
4460         if (!ref)
4461                 return NULL;
4462         memset(&ref->node, 0, sizeof(ref->node));
4463         if (parent > 0) {
4464                 ref->parent = parent;
4465                 ref->node.full_backref = 1;
4466         } else {
4467                 ref->root = root;
4468                 ref->node.full_backref = 0;
4469         }
4470         list_add_tail(&ref->node.list, &rec->backrefs);
4471
4472         return ref;
4473 }
4474
4475 static struct data_backref *find_data_backref(struct extent_record *rec,
4476                                                 u64 parent, u64 root,
4477                                                 u64 owner, u64 offset,
4478                                                 int found_ref,
4479                                                 u64 disk_bytenr, u64 bytes)
4480 {
4481         struct list_head *cur = rec->backrefs.next;
4482         struct extent_backref *node;
4483         struct data_backref *back;
4484
4485         while(cur != &rec->backrefs) {
4486                 node = to_extent_backref(cur);
4487                 cur = cur->next;
4488                 if (!node->is_data)
4489                         continue;
4490                 back = to_data_backref(node);
4491                 if (parent > 0) {
4492                         if (!node->full_backref)
4493                                 continue;
4494                         if (parent == back->parent)
4495                                 return back;
4496                 } else {
4497                         if (node->full_backref)
4498                                 continue;
4499                         if (back->root == root && back->owner == owner &&
4500                             back->offset == offset) {
4501                                 if (found_ref && node->found_ref &&
4502                                     (back->bytes != bytes ||
4503                                     back->disk_bytenr != disk_bytenr))
4504                                         continue;
4505                                 return back;
4506                         }
4507                 }
4508         }
4509         return NULL;
4510 }
4511
4512 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4513                                                 u64 parent, u64 root,
4514                                                 u64 owner, u64 offset,
4515                                                 u64 max_size)
4516 {
4517         struct data_backref *ref = malloc(sizeof(*ref));
4518
4519         if (!ref)
4520                 return NULL;
4521         memset(&ref->node, 0, sizeof(ref->node));
4522         ref->node.is_data = 1;
4523
4524         if (parent > 0) {
4525                 ref->parent = parent;
4526                 ref->owner = 0;
4527                 ref->offset = 0;
4528                 ref->node.full_backref = 1;
4529         } else {
4530                 ref->root = root;
4531                 ref->owner = owner;
4532                 ref->offset = offset;
4533                 ref->node.full_backref = 0;
4534         }
4535         ref->bytes = max_size;
4536         ref->found_ref = 0;
4537         ref->num_refs = 0;
4538         list_add_tail(&ref->node.list, &rec->backrefs);
4539         if (max_size > rec->max_size)
4540                 rec->max_size = max_size;
4541         return ref;
4542 }
4543
4544 /* Check if the type of extent matches with its chunk */
4545 static void check_extent_type(struct extent_record *rec)
4546 {
4547         struct btrfs_block_group_cache *bg_cache;
4548
4549         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4550         if (!bg_cache)
4551                 return;
4552
4553         /* data extent, check chunk directly*/
4554         if (!rec->metadata) {
4555                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4556                         rec->wrong_chunk_type = 1;
4557                 return;
4558         }
4559
4560         /* metadata extent, check the obvious case first */
4561         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4562                                  BTRFS_BLOCK_GROUP_METADATA))) {
4563                 rec->wrong_chunk_type = 1;
4564                 return;
4565         }
4566
4567         /*
4568          * Check SYSTEM extent, as it's also marked as metadata, we can only
4569          * make sure it's a SYSTEM extent by its backref
4570          */
4571         if (!list_empty(&rec->backrefs)) {
4572                 struct extent_backref *node;
4573                 struct tree_backref *tback;
4574                 u64 bg_type;
4575
4576                 node = to_extent_backref(rec->backrefs.next);
4577                 if (node->is_data) {
4578                         /* tree block shouldn't have data backref */
4579                         rec->wrong_chunk_type = 1;
4580                         return;
4581                 }
4582                 tback = container_of(node, struct tree_backref, node);
4583
4584                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4585                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4586                 else
4587                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4588                 if (!(bg_cache->flags & bg_type))
4589                         rec->wrong_chunk_type = 1;
4590         }
4591 }
4592
4593 /*
4594  * Allocate a new extent record, fill default values from @tmpl and insert int
4595  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4596  * the cache, otherwise it fails.
4597  */
4598 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4599                 struct extent_record *tmpl)
4600 {
4601         struct extent_record *rec;
4602         int ret = 0;
4603
4604         rec = malloc(sizeof(*rec));
4605         if (!rec)
4606                 return -ENOMEM;
4607         rec->start = tmpl->start;
4608         rec->max_size = tmpl->max_size;
4609         rec->nr = max(tmpl->nr, tmpl->max_size);
4610         rec->found_rec = tmpl->found_rec;
4611         rec->content_checked = tmpl->content_checked;
4612         rec->owner_ref_checked = tmpl->owner_ref_checked;
4613         rec->num_duplicates = 0;
4614         rec->metadata = tmpl->metadata;
4615         rec->flag_block_full_backref = FLAG_UNSET;
4616         rec->bad_full_backref = 0;
4617         rec->crossing_stripes = 0;
4618         rec->wrong_chunk_type = 0;
4619         rec->is_root = tmpl->is_root;
4620         rec->refs = tmpl->refs;
4621         rec->extent_item_refs = tmpl->extent_item_refs;
4622         rec->parent_generation = tmpl->parent_generation;
4623         INIT_LIST_HEAD(&rec->backrefs);
4624         INIT_LIST_HEAD(&rec->dups);
4625         INIT_LIST_HEAD(&rec->list);
4626         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4627         rec->cache.start = tmpl->start;
4628         rec->cache.size = tmpl->nr;
4629         ret = insert_cache_extent(extent_cache, &rec->cache);
4630         if (ret) {
4631                 free(rec);
4632                 return ret;
4633         }
4634         bytes_used += rec->nr;
4635
4636         if (tmpl->metadata)
4637                 rec->crossing_stripes = check_crossing_stripes(global_info,
4638                                 rec->start, global_info->tree_root->nodesize);
4639         check_extent_type(rec);
4640         return ret;
4641 }
4642
4643 /*
4644  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4645  * some are hints:
4646  * - refs              - if found, increase refs
4647  * - is_root           - if found, set
4648  * - content_checked   - if found, set
4649  * - owner_ref_checked - if found, set
4650  *
4651  * If not found, create a new one, initialize and insert.
4652  */
4653 static int add_extent_rec(struct cache_tree *extent_cache,
4654                 struct extent_record *tmpl)
4655 {
4656         struct extent_record *rec;
4657         struct cache_extent *cache;
4658         int ret = 0;
4659         int dup = 0;
4660
4661         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4662         if (cache) {
4663                 rec = container_of(cache, struct extent_record, cache);
4664                 if (tmpl->refs)
4665                         rec->refs++;
4666                 if (rec->nr == 1)
4667                         rec->nr = max(tmpl->nr, tmpl->max_size);
4668
4669                 /*
4670                  * We need to make sure to reset nr to whatever the extent
4671                  * record says was the real size, this way we can compare it to
4672                  * the backrefs.
4673                  */
4674                 if (tmpl->found_rec) {
4675                         if (tmpl->start != rec->start || rec->found_rec) {
4676                                 struct extent_record *tmp;
4677
4678                                 dup = 1;
4679                                 if (list_empty(&rec->list))
4680                                         list_add_tail(&rec->list,
4681                                                       &duplicate_extents);
4682
4683                                 /*
4684                                  * We have to do this song and dance in case we
4685                                  * find an extent record that falls inside of
4686                                  * our current extent record but does not have
4687                                  * the same objectid.
4688                                  */
4689                                 tmp = malloc(sizeof(*tmp));
4690                                 if (!tmp)
4691                                         return -ENOMEM;
4692                                 tmp->start = tmpl->start;
4693                                 tmp->max_size = tmpl->max_size;
4694                                 tmp->nr = tmpl->nr;
4695                                 tmp->found_rec = 1;
4696                                 tmp->metadata = tmpl->metadata;
4697                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4698                                 INIT_LIST_HEAD(&tmp->list);
4699                                 list_add_tail(&tmp->list, &rec->dups);
4700                                 rec->num_duplicates++;
4701                         } else {
4702                                 rec->nr = tmpl->nr;
4703                                 rec->found_rec = 1;
4704                         }
4705                 }
4706
4707                 if (tmpl->extent_item_refs && !dup) {
4708                         if (rec->extent_item_refs) {
4709                                 fprintf(stderr, "block %llu rec "
4710                                         "extent_item_refs %llu, passed %llu\n",
4711                                         (unsigned long long)tmpl->start,
4712                                         (unsigned long long)
4713                                                         rec->extent_item_refs,
4714                                         (unsigned long long)tmpl->extent_item_refs);
4715                         }
4716                         rec->extent_item_refs = tmpl->extent_item_refs;
4717                 }
4718                 if (tmpl->is_root)
4719                         rec->is_root = 1;
4720                 if (tmpl->content_checked)
4721                         rec->content_checked = 1;
4722                 if (tmpl->owner_ref_checked)
4723                         rec->owner_ref_checked = 1;
4724                 memcpy(&rec->parent_key, &tmpl->parent_key,
4725                                 sizeof(tmpl->parent_key));
4726                 if (tmpl->parent_generation)
4727                         rec->parent_generation = tmpl->parent_generation;
4728                 if (rec->max_size < tmpl->max_size)
4729                         rec->max_size = tmpl->max_size;
4730
4731                 /*
4732                  * A metadata extent can't cross stripe_len boundary, otherwise
4733                  * kernel scrub won't be able to handle it.
4734                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4735                  * it.
4736                  */
4737                 if (tmpl->metadata)
4738                         rec->crossing_stripes = check_crossing_stripes(
4739                                         global_info, rec->start,
4740                                         global_info->tree_root->nodesize);
4741                 check_extent_type(rec);
4742                 maybe_free_extent_rec(extent_cache, rec);
4743                 return ret;
4744         }
4745
4746         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4747
4748         return ret;
4749 }
4750
4751 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4752                             u64 parent, u64 root, int found_ref)
4753 {
4754         struct extent_record *rec;
4755         struct tree_backref *back;
4756         struct cache_extent *cache;
4757         int ret;
4758
4759         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4760         if (!cache) {
4761                 struct extent_record tmpl;
4762
4763                 memset(&tmpl, 0, sizeof(tmpl));
4764                 tmpl.start = bytenr;
4765                 tmpl.nr = 1;
4766                 tmpl.metadata = 1;
4767
4768                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4769                 if (ret)
4770                         return ret;
4771
4772                 /* really a bug in cache_extent implement now */
4773                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4774                 if (!cache)
4775                         return -ENOENT;
4776         }
4777
4778         rec = container_of(cache, struct extent_record, cache);
4779         if (rec->start != bytenr) {
4780                 /*
4781                  * Several cause, from unaligned bytenr to over lapping extents
4782                  */
4783                 return -EEXIST;
4784         }
4785
4786         back = find_tree_backref(rec, parent, root);
4787         if (!back) {
4788                 back = alloc_tree_backref(rec, parent, root);
4789                 if (!back)
4790                         return -ENOMEM;
4791         }
4792
4793         if (found_ref) {
4794                 if (back->node.found_ref) {
4795                         fprintf(stderr, "Extent back ref already exists "
4796                                 "for %llu parent %llu root %llu \n",
4797                                 (unsigned long long)bytenr,
4798                                 (unsigned long long)parent,
4799                                 (unsigned long long)root);
4800                 }
4801                 back->node.found_ref = 1;
4802         } else {
4803                 if (back->node.found_extent_tree) {
4804                         fprintf(stderr, "Extent back ref already exists "
4805                                 "for %llu parent %llu root %llu \n",
4806                                 (unsigned long long)bytenr,
4807                                 (unsigned long long)parent,
4808                                 (unsigned long long)root);
4809                 }
4810                 back->node.found_extent_tree = 1;
4811         }
4812         check_extent_type(rec);
4813         maybe_free_extent_rec(extent_cache, rec);
4814         return 0;
4815 }
4816
4817 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4818                             u64 parent, u64 root, u64 owner, u64 offset,
4819                             u32 num_refs, int found_ref, u64 max_size)
4820 {
4821         struct extent_record *rec;
4822         struct data_backref *back;
4823         struct cache_extent *cache;
4824         int ret;
4825
4826         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4827         if (!cache) {
4828                 struct extent_record tmpl;
4829
4830                 memset(&tmpl, 0, sizeof(tmpl));
4831                 tmpl.start = bytenr;
4832                 tmpl.nr = 1;
4833                 tmpl.max_size = max_size;
4834
4835                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4836                 if (ret)
4837                         return ret;
4838
4839                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4840                 if (!cache)
4841                         abort();
4842         }
4843
4844         rec = container_of(cache, struct extent_record, cache);
4845         if (rec->max_size < max_size)
4846                 rec->max_size = max_size;
4847
4848         /*
4849          * If found_ref is set then max_size is the real size and must match the
4850          * existing refs.  So if we have already found a ref then we need to
4851          * make sure that this ref matches the existing one, otherwise we need
4852          * to add a new backref so we can notice that the backrefs don't match
4853          * and we need to figure out who is telling the truth.  This is to
4854          * account for that awful fsync bug I introduced where we'd end up with
4855          * a btrfs_file_extent_item that would have its length include multiple
4856          * prealloc extents or point inside of a prealloc extent.
4857          */
4858         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4859                                  bytenr, max_size);
4860         if (!back) {
4861                 back = alloc_data_backref(rec, parent, root, owner, offset,
4862                                           max_size);
4863                 BUG_ON(!back);
4864         }
4865
4866         if (found_ref) {
4867                 BUG_ON(num_refs != 1);
4868                 if (back->node.found_ref)
4869                         BUG_ON(back->bytes != max_size);
4870                 back->node.found_ref = 1;
4871                 back->found_ref += 1;
4872                 back->bytes = max_size;
4873                 back->disk_bytenr = bytenr;
4874                 rec->refs += 1;
4875                 rec->content_checked = 1;
4876                 rec->owner_ref_checked = 1;
4877         } else {
4878                 if (back->node.found_extent_tree) {
4879                         fprintf(stderr, "Extent back ref already exists "
4880                                 "for %llu parent %llu root %llu "
4881                                 "owner %llu offset %llu num_refs %lu\n",
4882                                 (unsigned long long)bytenr,
4883                                 (unsigned long long)parent,
4884                                 (unsigned long long)root,
4885                                 (unsigned long long)owner,
4886                                 (unsigned long long)offset,
4887                                 (unsigned long)num_refs);
4888                 }
4889                 back->num_refs = num_refs;
4890                 back->node.found_extent_tree = 1;
4891         }
4892         maybe_free_extent_rec(extent_cache, rec);
4893         return 0;
4894 }
4895
4896 static int add_pending(struct cache_tree *pending,
4897                        struct cache_tree *seen, u64 bytenr, u32 size)
4898 {
4899         int ret;
4900         ret = add_cache_extent(seen, bytenr, size);
4901         if (ret)
4902                 return ret;
4903         add_cache_extent(pending, bytenr, size);
4904         return 0;
4905 }
4906
4907 static int pick_next_pending(struct cache_tree *pending,
4908                         struct cache_tree *reada,
4909                         struct cache_tree *nodes,
4910                         u64 last, struct block_info *bits, int bits_nr,
4911                         int *reada_bits)
4912 {
4913         unsigned long node_start = last;
4914         struct cache_extent *cache;
4915         int ret;
4916
4917         cache = search_cache_extent(reada, 0);
4918         if (cache) {
4919                 bits[0].start = cache->start;
4920                 bits[0].size = cache->size;
4921                 *reada_bits = 1;
4922                 return 1;
4923         }
4924         *reada_bits = 0;
4925         if (node_start > 32768)
4926                 node_start -= 32768;
4927
4928         cache = search_cache_extent(nodes, node_start);
4929         if (!cache)
4930                 cache = search_cache_extent(nodes, 0);
4931
4932         if (!cache) {
4933                  cache = search_cache_extent(pending, 0);
4934                  if (!cache)
4935                          return 0;
4936                  ret = 0;
4937                  do {
4938                          bits[ret].start = cache->start;
4939                          bits[ret].size = cache->size;
4940                          cache = next_cache_extent(cache);
4941                          ret++;
4942                  } while (cache && ret < bits_nr);
4943                  return ret;
4944         }
4945
4946         ret = 0;
4947         do {
4948                 bits[ret].start = cache->start;
4949                 bits[ret].size = cache->size;
4950                 cache = next_cache_extent(cache);
4951                 ret++;
4952         } while (cache && ret < bits_nr);
4953
4954         if (bits_nr - ret > 8) {
4955                 u64 lookup = bits[0].start + bits[0].size;
4956                 struct cache_extent *next;
4957                 next = search_cache_extent(pending, lookup);
4958                 while(next) {
4959                         if (next->start - lookup > 32768)
4960                                 break;
4961                         bits[ret].start = next->start;
4962                         bits[ret].size = next->size;
4963                         lookup = next->start + next->size;
4964                         ret++;
4965                         if (ret == bits_nr)
4966                                 break;
4967                         next = next_cache_extent(next);
4968                         if (!next)
4969                                 break;
4970                 }
4971         }
4972         return ret;
4973 }
4974
4975 static void free_chunk_record(struct cache_extent *cache)
4976 {
4977         struct chunk_record *rec;
4978
4979         rec = container_of(cache, struct chunk_record, cache);
4980         list_del_init(&rec->list);
4981         list_del_init(&rec->dextents);
4982         free(rec);
4983 }
4984
4985 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4986 {
4987         cache_tree_free_extents(chunk_cache, free_chunk_record);
4988 }
4989
4990 static void free_device_record(struct rb_node *node)
4991 {
4992         struct device_record *rec;
4993
4994         rec = container_of(node, struct device_record, node);
4995         free(rec);
4996 }
4997
4998 FREE_RB_BASED_TREE(device_cache, free_device_record);
4999
5000 int insert_block_group_record(struct block_group_tree *tree,
5001                               struct block_group_record *bg_rec)
5002 {
5003         int ret;
5004
5005         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5006         if (ret)
5007                 return ret;
5008
5009         list_add_tail(&bg_rec->list, &tree->block_groups);
5010         return 0;
5011 }
5012
5013 static void free_block_group_record(struct cache_extent *cache)
5014 {
5015         struct block_group_record *rec;
5016
5017         rec = container_of(cache, struct block_group_record, cache);
5018         list_del_init(&rec->list);
5019         free(rec);
5020 }
5021
5022 void free_block_group_tree(struct block_group_tree *tree)
5023 {
5024         cache_tree_free_extents(&tree->tree, free_block_group_record);
5025 }
5026
5027 int insert_device_extent_record(struct device_extent_tree *tree,
5028                                 struct device_extent_record *de_rec)
5029 {
5030         int ret;
5031
5032         /*
5033          * Device extent is a bit different from the other extents, because
5034          * the extents which belong to the different devices may have the
5035          * same start and size, so we need use the special extent cache
5036          * search/insert functions.
5037          */
5038         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5039         if (ret)
5040                 return ret;
5041
5042         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5043         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5044         return 0;
5045 }
5046
5047 static void free_device_extent_record(struct cache_extent *cache)
5048 {
5049         struct device_extent_record *rec;
5050
5051         rec = container_of(cache, struct device_extent_record, cache);
5052         if (!list_empty(&rec->chunk_list))
5053                 list_del_init(&rec->chunk_list);
5054         if (!list_empty(&rec->device_list))
5055                 list_del_init(&rec->device_list);
5056         free(rec);
5057 }
5058
5059 void free_device_extent_tree(struct device_extent_tree *tree)
5060 {
5061         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5062 }
5063
5064 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5065 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5066                                  struct extent_buffer *leaf, int slot)
5067 {
5068         struct btrfs_extent_ref_v0 *ref0;
5069         struct btrfs_key key;
5070         int ret;
5071
5072         btrfs_item_key_to_cpu(leaf, &key, slot);
5073         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5074         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5075                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5076                                 0, 0);
5077         } else {
5078                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5079                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5080         }
5081         return ret;
5082 }
5083 #endif
5084
5085 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5086                                             struct btrfs_key *key,
5087                                             int slot)
5088 {
5089         struct btrfs_chunk *ptr;
5090         struct chunk_record *rec;
5091         int num_stripes, i;
5092
5093         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5094         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5095
5096         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5097         if (!rec) {
5098                 fprintf(stderr, "memory allocation failed\n");
5099                 exit(-1);
5100         }
5101
5102         INIT_LIST_HEAD(&rec->list);
5103         INIT_LIST_HEAD(&rec->dextents);
5104         rec->bg_rec = NULL;
5105
5106         rec->cache.start = key->offset;
5107         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5108
5109         rec->generation = btrfs_header_generation(leaf);
5110
5111         rec->objectid = key->objectid;
5112         rec->type = key->type;
5113         rec->offset = key->offset;
5114
5115         rec->length = rec->cache.size;
5116         rec->owner = btrfs_chunk_owner(leaf, ptr);
5117         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5118         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5119         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5120         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5121         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5122         rec->num_stripes = num_stripes;
5123         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5124
5125         for (i = 0; i < rec->num_stripes; ++i) {
5126                 rec->stripes[i].devid =
5127                         btrfs_stripe_devid_nr(leaf, ptr, i);
5128                 rec->stripes[i].offset =
5129                         btrfs_stripe_offset_nr(leaf, ptr, i);
5130                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5131                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5132                                 BTRFS_UUID_SIZE);
5133         }
5134
5135         return rec;
5136 }
5137
5138 static int process_chunk_item(struct cache_tree *chunk_cache,
5139                               struct btrfs_key *key, struct extent_buffer *eb,
5140                               int slot)
5141 {
5142         struct chunk_record *rec;
5143         struct btrfs_chunk *chunk;
5144         int ret = 0;
5145
5146         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5147         /*
5148          * Do extra check for this chunk item,
5149          *
5150          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5151          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5152          * and owner<->key_type check.
5153          */
5154         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5155                                       key->offset);
5156         if (ret < 0) {
5157                 error("chunk(%llu, %llu) is not valid, ignore it",
5158                       key->offset, btrfs_chunk_length(eb, chunk));
5159                 return 0;
5160         }
5161         rec = btrfs_new_chunk_record(eb, key, slot);
5162         ret = insert_cache_extent(chunk_cache, &rec->cache);
5163         if (ret) {
5164                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5165                         rec->offset, rec->length);
5166                 free(rec);
5167         }
5168
5169         return ret;
5170 }
5171
5172 static int process_device_item(struct rb_root *dev_cache,
5173                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5174 {
5175         struct btrfs_dev_item *ptr;
5176         struct device_record *rec;
5177         int ret = 0;
5178
5179         ptr = btrfs_item_ptr(eb,
5180                 slot, struct btrfs_dev_item);
5181
5182         rec = malloc(sizeof(*rec));
5183         if (!rec) {
5184                 fprintf(stderr, "memory allocation failed\n");
5185                 return -ENOMEM;
5186         }
5187
5188         rec->devid = key->offset;
5189         rec->generation = btrfs_header_generation(eb);
5190
5191         rec->objectid = key->objectid;
5192         rec->type = key->type;
5193         rec->offset = key->offset;
5194
5195         rec->devid = btrfs_device_id(eb, ptr);
5196         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5197         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5198
5199         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5200         if (ret) {
5201                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5202                 free(rec);
5203         }
5204
5205         return ret;
5206 }
5207
5208 struct block_group_record *
5209 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5210                              int slot)
5211 {
5212         struct btrfs_block_group_item *ptr;
5213         struct block_group_record *rec;
5214
5215         rec = calloc(1, sizeof(*rec));
5216         if (!rec) {
5217                 fprintf(stderr, "memory allocation failed\n");
5218                 exit(-1);
5219         }
5220
5221         rec->cache.start = key->objectid;
5222         rec->cache.size = key->offset;
5223
5224         rec->generation = btrfs_header_generation(leaf);
5225
5226         rec->objectid = key->objectid;
5227         rec->type = key->type;
5228         rec->offset = key->offset;
5229
5230         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5231         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5232
5233         INIT_LIST_HEAD(&rec->list);
5234
5235         return rec;
5236 }
5237
5238 static int process_block_group_item(struct block_group_tree *block_group_cache,
5239                                     struct btrfs_key *key,
5240                                     struct extent_buffer *eb, int slot)
5241 {
5242         struct block_group_record *rec;
5243         int ret = 0;
5244
5245         rec = btrfs_new_block_group_record(eb, key, slot);
5246         ret = insert_block_group_record(block_group_cache, rec);
5247         if (ret) {
5248                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5249                         rec->objectid, rec->offset);
5250                 free(rec);
5251         }
5252
5253         return ret;
5254 }
5255
5256 struct device_extent_record *
5257 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5258                                struct btrfs_key *key, int slot)
5259 {
5260         struct device_extent_record *rec;
5261         struct btrfs_dev_extent *ptr;
5262
5263         rec = calloc(1, sizeof(*rec));
5264         if (!rec) {
5265                 fprintf(stderr, "memory allocation failed\n");
5266                 exit(-1);
5267         }
5268
5269         rec->cache.objectid = key->objectid;
5270         rec->cache.start = key->offset;
5271
5272         rec->generation = btrfs_header_generation(leaf);
5273
5274         rec->objectid = key->objectid;
5275         rec->type = key->type;
5276         rec->offset = key->offset;
5277
5278         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5279         rec->chunk_objecteid =
5280                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5281         rec->chunk_offset =
5282                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5283         rec->length = btrfs_dev_extent_length(leaf, ptr);
5284         rec->cache.size = rec->length;
5285
5286         INIT_LIST_HEAD(&rec->chunk_list);
5287         INIT_LIST_HEAD(&rec->device_list);
5288
5289         return rec;
5290 }
5291
5292 static int
5293 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5294                            struct btrfs_key *key, struct extent_buffer *eb,
5295                            int slot)
5296 {
5297         struct device_extent_record *rec;
5298         int ret;
5299
5300         rec = btrfs_new_device_extent_record(eb, key, slot);
5301         ret = insert_device_extent_record(dev_extent_cache, rec);
5302         if (ret) {
5303                 fprintf(stderr,
5304                         "Device extent[%llu, %llu, %llu] existed.\n",
5305                         rec->objectid, rec->offset, rec->length);
5306                 free(rec);
5307         }
5308
5309         return ret;
5310 }
5311
5312 static int process_extent_item(struct btrfs_root *root,
5313                                struct cache_tree *extent_cache,
5314                                struct extent_buffer *eb, int slot)
5315 {
5316         struct btrfs_extent_item *ei;
5317         struct btrfs_extent_inline_ref *iref;
5318         struct btrfs_extent_data_ref *dref;
5319         struct btrfs_shared_data_ref *sref;
5320         struct btrfs_key key;
5321         struct extent_record tmpl;
5322         unsigned long end;
5323         unsigned long ptr;
5324         int ret;
5325         int type;
5326         u32 item_size = btrfs_item_size_nr(eb, slot);
5327         u64 refs = 0;
5328         u64 offset;
5329         u64 num_bytes;
5330         int metadata = 0;
5331
5332         btrfs_item_key_to_cpu(eb, &key, slot);
5333
5334         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5335                 metadata = 1;
5336                 num_bytes = root->nodesize;
5337         } else {
5338                 num_bytes = key.offset;
5339         }
5340
5341         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5342                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5343                       key.objectid, root->sectorsize);
5344                 return -EIO;
5345         }
5346         if (item_size < sizeof(*ei)) {
5347 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5348                 struct btrfs_extent_item_v0 *ei0;
5349                 BUG_ON(item_size != sizeof(*ei0));
5350                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5351                 refs = btrfs_extent_refs_v0(eb, ei0);
5352 #else
5353                 BUG();
5354 #endif
5355                 memset(&tmpl, 0, sizeof(tmpl));
5356                 tmpl.start = key.objectid;
5357                 tmpl.nr = num_bytes;
5358                 tmpl.extent_item_refs = refs;
5359                 tmpl.metadata = metadata;
5360                 tmpl.found_rec = 1;
5361                 tmpl.max_size = num_bytes;
5362
5363                 return add_extent_rec(extent_cache, &tmpl);
5364         }
5365
5366         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5367         refs = btrfs_extent_refs(eb, ei);
5368         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5369                 metadata = 1;
5370         else
5371                 metadata = 0;
5372         if (metadata && num_bytes != root->nodesize) {
5373                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5374                       num_bytes, root->nodesize);
5375                 return -EIO;
5376         }
5377         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5378                 error("ignore invalid data extent, length %llu is not aligned to %u",
5379                       num_bytes, root->sectorsize);
5380                 return -EIO;
5381         }
5382
5383         memset(&tmpl, 0, sizeof(tmpl));
5384         tmpl.start = key.objectid;
5385         tmpl.nr = num_bytes;
5386         tmpl.extent_item_refs = refs;
5387         tmpl.metadata = metadata;
5388         tmpl.found_rec = 1;
5389         tmpl.max_size = num_bytes;
5390         add_extent_rec(extent_cache, &tmpl);
5391
5392         ptr = (unsigned long)(ei + 1);
5393         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5394             key.type == BTRFS_EXTENT_ITEM_KEY)
5395                 ptr += sizeof(struct btrfs_tree_block_info);
5396
5397         end = (unsigned long)ei + item_size;
5398         while (ptr < end) {
5399                 iref = (struct btrfs_extent_inline_ref *)ptr;
5400                 type = btrfs_extent_inline_ref_type(eb, iref);
5401                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5402                 switch (type) {
5403                 case BTRFS_TREE_BLOCK_REF_KEY:
5404                         ret = add_tree_backref(extent_cache, key.objectid,
5405                                         0, offset, 0);
5406                         if (ret < 0)
5407                                 error("add_tree_backref failed: %s",
5408                                       strerror(-ret));
5409                         break;
5410                 case BTRFS_SHARED_BLOCK_REF_KEY:
5411                         ret = add_tree_backref(extent_cache, key.objectid,
5412                                         offset, 0, 0);
5413                         if (ret < 0)
5414                                 error("add_tree_backref failed: %s",
5415                                       strerror(-ret));
5416                         break;
5417                 case BTRFS_EXTENT_DATA_REF_KEY:
5418                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5419                         add_data_backref(extent_cache, key.objectid, 0,
5420                                         btrfs_extent_data_ref_root(eb, dref),
5421                                         btrfs_extent_data_ref_objectid(eb,
5422                                                                        dref),
5423                                         btrfs_extent_data_ref_offset(eb, dref),
5424                                         btrfs_extent_data_ref_count(eb, dref),
5425                                         0, num_bytes);
5426                         break;
5427                 case BTRFS_SHARED_DATA_REF_KEY:
5428                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5429                         add_data_backref(extent_cache, key.objectid, offset,
5430                                         0, 0, 0,
5431                                         btrfs_shared_data_ref_count(eb, sref),
5432                                         0, num_bytes);
5433                         break;
5434                 default:
5435                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5436                                 key.objectid, key.type, num_bytes);
5437                         goto out;
5438                 }
5439                 ptr += btrfs_extent_inline_ref_size(type);
5440         }
5441         WARN_ON(ptr > end);
5442 out:
5443         return 0;
5444 }
5445
5446 static int check_cache_range(struct btrfs_root *root,
5447                              struct btrfs_block_group_cache *cache,
5448                              u64 offset, u64 bytes)
5449 {
5450         struct btrfs_free_space *entry;
5451         u64 *logical;
5452         u64 bytenr;
5453         int stripe_len;
5454         int i, nr, ret;
5455
5456         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5457                 bytenr = btrfs_sb_offset(i);
5458                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5459                                        cache->key.objectid, bytenr, 0,
5460                                        &logical, &nr, &stripe_len);
5461                 if (ret)
5462                         return ret;
5463
5464                 while (nr--) {
5465                         if (logical[nr] + stripe_len <= offset)
5466                                 continue;
5467                         if (offset + bytes <= logical[nr])
5468                                 continue;
5469                         if (logical[nr] == offset) {
5470                                 if (stripe_len >= bytes) {
5471                                         free(logical);
5472                                         return 0;
5473                                 }
5474                                 bytes -= stripe_len;
5475                                 offset += stripe_len;
5476                         } else if (logical[nr] < offset) {
5477                                 if (logical[nr] + stripe_len >=
5478                                     offset + bytes) {
5479                                         free(logical);
5480                                         return 0;
5481                                 }
5482                                 bytes = (offset + bytes) -
5483                                         (logical[nr] + stripe_len);
5484                                 offset = logical[nr] + stripe_len;
5485                         } else {
5486                                 /*
5487                                  * Could be tricky, the super may land in the
5488                                  * middle of the area we're checking.  First
5489                                  * check the easiest case, it's at the end.
5490                                  */
5491                                 if (logical[nr] + stripe_len >=
5492                                     bytes + offset) {
5493                                         bytes = logical[nr] - offset;
5494                                         continue;
5495                                 }
5496
5497                                 /* Check the left side */
5498                                 ret = check_cache_range(root, cache,
5499                                                         offset,
5500                                                         logical[nr] - offset);
5501                                 if (ret) {
5502                                         free(logical);
5503                                         return ret;
5504                                 }
5505
5506                                 /* Now we continue with the right side */
5507                                 bytes = (offset + bytes) -
5508                                         (logical[nr] + stripe_len);
5509                                 offset = logical[nr] + stripe_len;
5510                         }
5511                 }
5512
5513                 free(logical);
5514         }
5515
5516         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5517         if (!entry) {
5518                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5519                         offset, offset+bytes);
5520                 return -EINVAL;
5521         }
5522
5523         if (entry->offset != offset) {
5524                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5525                         entry->offset);
5526                 return -EINVAL;
5527         }
5528
5529         if (entry->bytes != bytes) {
5530                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5531                         bytes, entry->bytes, offset);
5532                 return -EINVAL;
5533         }
5534
5535         unlink_free_space(cache->free_space_ctl, entry);
5536         free(entry);
5537         return 0;
5538 }
5539
5540 static int verify_space_cache(struct btrfs_root *root,
5541                               struct btrfs_block_group_cache *cache)
5542 {
5543         struct btrfs_path *path;
5544         struct extent_buffer *leaf;
5545         struct btrfs_key key;
5546         u64 last;
5547         int ret = 0;
5548
5549         path = btrfs_alloc_path();
5550         if (!path)
5551                 return -ENOMEM;
5552
5553         root = root->fs_info->extent_root;
5554
5555         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5556
5557         key.objectid = last;
5558         key.offset = 0;
5559         key.type = BTRFS_EXTENT_ITEM_KEY;
5560
5561         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5562         if (ret < 0)
5563                 goto out;
5564         ret = 0;
5565         while (1) {
5566                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5567                         ret = btrfs_next_leaf(root, path);
5568                         if (ret < 0)
5569                                 goto out;
5570                         if (ret > 0) {
5571                                 ret = 0;
5572                                 break;
5573                         }
5574                 }
5575                 leaf = path->nodes[0];
5576                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5577                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5578                         break;
5579                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5580                     key.type != BTRFS_METADATA_ITEM_KEY) {
5581                         path->slots[0]++;
5582                         continue;
5583                 }
5584
5585                 if (last == key.objectid) {
5586                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5587                                 last = key.objectid + key.offset;
5588                         else
5589                                 last = key.objectid + root->nodesize;
5590                         path->slots[0]++;
5591                         continue;
5592                 }
5593
5594                 ret = check_cache_range(root, cache, last,
5595                                         key.objectid - last);
5596                 if (ret)
5597                         break;
5598                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5599                         last = key.objectid + key.offset;
5600                 else
5601                         last = key.objectid + root->nodesize;
5602                 path->slots[0]++;
5603         }
5604
5605         if (last < cache->key.objectid + cache->key.offset)
5606                 ret = check_cache_range(root, cache, last,
5607                                         cache->key.objectid +
5608                                         cache->key.offset - last);
5609
5610 out:
5611         btrfs_free_path(path);
5612
5613         if (!ret &&
5614             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5615                 fprintf(stderr, "There are still entries left in the space "
5616                         "cache\n");
5617                 ret = -EINVAL;
5618         }
5619
5620         return ret;
5621 }
5622
5623 static int check_space_cache(struct btrfs_root *root)
5624 {
5625         struct btrfs_block_group_cache *cache;
5626         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5627         int ret;
5628         int error = 0;
5629
5630         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5631             btrfs_super_generation(root->fs_info->super_copy) !=
5632             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5633                 printf("cache and super generation don't match, space cache "
5634                        "will be invalidated\n");
5635                 return 0;
5636         }
5637
5638         if (ctx.progress_enabled) {
5639                 ctx.tp = TASK_FREE_SPACE;
5640                 task_start(ctx.info);
5641         }
5642
5643         while (1) {
5644                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5645                 if (!cache)
5646                         break;
5647
5648                 start = cache->key.objectid + cache->key.offset;
5649                 if (!cache->free_space_ctl) {
5650                         if (btrfs_init_free_space_ctl(cache,
5651                                                       root->sectorsize)) {
5652                                 ret = -ENOMEM;
5653                                 break;
5654                         }
5655                 } else {
5656                         btrfs_remove_free_space_cache(cache);
5657                 }
5658
5659                 if (btrfs_fs_compat_ro(root->fs_info,
5660                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5661                         ret = exclude_super_stripes(root, cache);
5662                         if (ret) {
5663                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5664                                         strerror(-ret));
5665                                 error++;
5666                                 continue;
5667                         }
5668                         ret = load_free_space_tree(root->fs_info, cache);
5669                         free_excluded_extents(root, cache);
5670                         if (ret < 0) {
5671                                 fprintf(stderr, "could not load free space tree: %s\n",
5672                                         strerror(-ret));
5673                                 error++;
5674                                 continue;
5675                         }
5676                         error += ret;
5677                 } else {
5678                         ret = load_free_space_cache(root->fs_info, cache);
5679                         if (!ret)
5680                                 continue;
5681                 }
5682
5683                 ret = verify_space_cache(root, cache);
5684                 if (ret) {
5685                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5686                                 cache->key.objectid);
5687                         error++;
5688                 }
5689         }
5690
5691         task_stop(ctx.info);
5692
5693         return error ? -EINVAL : 0;
5694 }
5695
5696 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5697                         u64 num_bytes, unsigned long leaf_offset,
5698                         struct extent_buffer *eb) {
5699
5700         u64 offset = 0;
5701         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5702         char *data;
5703         unsigned long csum_offset;
5704         u32 csum;
5705         u32 csum_expected;
5706         u64 read_len;
5707         u64 data_checked = 0;
5708         u64 tmp;
5709         int ret = 0;
5710         int mirror;
5711         int num_copies;
5712
5713         if (num_bytes % root->sectorsize)
5714                 return -EINVAL;
5715
5716         data = malloc(num_bytes);
5717         if (!data)
5718                 return -ENOMEM;
5719
5720         while (offset < num_bytes) {
5721                 mirror = 0;
5722 again:
5723                 read_len = num_bytes - offset;
5724                 /* read as much space once a time */
5725                 ret = read_extent_data(root, data + offset,
5726                                 bytenr + offset, &read_len, mirror);
5727                 if (ret)
5728                         goto out;
5729                 data_checked = 0;
5730                 /* verify every 4k data's checksum */
5731                 while (data_checked < read_len) {
5732                         csum = ~(u32)0;
5733                         tmp = offset + data_checked;
5734
5735                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5736                                                csum, root->sectorsize);
5737                         btrfs_csum_final(csum, (u8 *)&csum);
5738
5739                         csum_offset = leaf_offset +
5740                                  tmp / root->sectorsize * csum_size;
5741                         read_extent_buffer(eb, (char *)&csum_expected,
5742                                            csum_offset, csum_size);
5743                         /* try another mirror */
5744                         if (csum != csum_expected) {
5745                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5746                                                 mirror, bytenr + tmp,
5747                                                 csum, csum_expected);
5748                                 num_copies = btrfs_num_copies(
5749                                                 &root->fs_info->mapping_tree,
5750                                                 bytenr, num_bytes);
5751                                 if (mirror < num_copies - 1) {
5752                                         mirror += 1;
5753                                         goto again;
5754                                 }
5755                         }
5756                         data_checked += root->sectorsize;
5757                 }
5758                 offset += read_len;
5759         }
5760 out:
5761         free(data);
5762         return ret;
5763 }
5764
5765 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5766                                u64 num_bytes)
5767 {
5768         struct btrfs_path *path;
5769         struct extent_buffer *leaf;
5770         struct btrfs_key key;
5771         int ret;
5772
5773         path = btrfs_alloc_path();
5774         if (!path) {
5775                 fprintf(stderr, "Error allocating path\n");
5776                 return -ENOMEM;
5777         }
5778
5779         key.objectid = bytenr;
5780         key.type = BTRFS_EXTENT_ITEM_KEY;
5781         key.offset = (u64)-1;
5782
5783 again:
5784         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5785                                 0, 0);
5786         if (ret < 0) {
5787                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5788                 btrfs_free_path(path);
5789                 return ret;
5790         } else if (ret) {
5791                 if (path->slots[0] > 0) {
5792                         path->slots[0]--;
5793                 } else {
5794                         ret = btrfs_prev_leaf(root, path);
5795                         if (ret < 0) {
5796                                 goto out;
5797                         } else if (ret > 0) {
5798                                 ret = 0;
5799                                 goto out;
5800                         }
5801                 }
5802         }
5803
5804         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5805
5806         /*
5807          * Block group items come before extent items if they have the same
5808          * bytenr, so walk back one more just in case.  Dear future traveller,
5809          * first congrats on mastering time travel.  Now if it's not too much
5810          * trouble could you go back to 2006 and tell Chris to make the
5811          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5812          * EXTENT_ITEM_KEY please?
5813          */
5814         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5815                 if (path->slots[0] > 0) {
5816                         path->slots[0]--;
5817                 } else {
5818                         ret = btrfs_prev_leaf(root, path);
5819                         if (ret < 0) {
5820                                 goto out;
5821                         } else if (ret > 0) {
5822                                 ret = 0;
5823                                 goto out;
5824                         }
5825                 }
5826                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5827         }
5828
5829         while (num_bytes) {
5830                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5831                         ret = btrfs_next_leaf(root, path);
5832                         if (ret < 0) {
5833                                 fprintf(stderr, "Error going to next leaf "
5834                                         "%d\n", ret);
5835                                 btrfs_free_path(path);
5836                                 return ret;
5837                         } else if (ret) {
5838                                 break;
5839                         }
5840                 }
5841                 leaf = path->nodes[0];
5842                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5843                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5844                         path->slots[0]++;
5845                         continue;
5846                 }
5847                 if (key.objectid + key.offset < bytenr) {
5848                         path->slots[0]++;
5849                         continue;
5850                 }
5851                 if (key.objectid > bytenr + num_bytes)
5852                         break;
5853
5854                 if (key.objectid == bytenr) {
5855                         if (key.offset >= num_bytes) {
5856                                 num_bytes = 0;
5857                                 break;
5858                         }
5859                         num_bytes -= key.offset;
5860                         bytenr += key.offset;
5861                 } else if (key.objectid < bytenr) {
5862                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5863                                 num_bytes = 0;
5864                                 break;
5865                         }
5866                         num_bytes = (bytenr + num_bytes) -
5867                                 (key.objectid + key.offset);
5868                         bytenr = key.objectid + key.offset;
5869                 } else {
5870                         if (key.objectid + key.offset < bytenr + num_bytes) {
5871                                 u64 new_start = key.objectid + key.offset;
5872                                 u64 new_bytes = bytenr + num_bytes - new_start;
5873
5874                                 /*
5875                                  * Weird case, the extent is in the middle of
5876                                  * our range, we'll have to search one side
5877                                  * and then the other.  Not sure if this happens
5878                                  * in real life, but no harm in coding it up
5879                                  * anyway just in case.
5880                                  */
5881                                 btrfs_release_path(path);
5882                                 ret = check_extent_exists(root, new_start,
5883                                                           new_bytes);
5884                                 if (ret) {
5885                                         fprintf(stderr, "Right section didn't "
5886                                                 "have a record\n");
5887                                         break;
5888                                 }
5889                                 num_bytes = key.objectid - bytenr;
5890                                 goto again;
5891                         }
5892                         num_bytes = key.objectid - bytenr;
5893                 }
5894                 path->slots[0]++;
5895         }
5896         ret = 0;
5897
5898 out:
5899         if (num_bytes && !ret) {
5900                 fprintf(stderr, "There are no extents for csum range "
5901                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5902                 ret = 1;
5903         }
5904
5905         btrfs_free_path(path);
5906         return ret;
5907 }
5908
5909 static int check_csums(struct btrfs_root *root)
5910 {
5911         struct btrfs_path *path;
5912         struct extent_buffer *leaf;
5913         struct btrfs_key key;
5914         u64 offset = 0, num_bytes = 0;
5915         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5916         int errors = 0;
5917         int ret;
5918         u64 data_len;
5919         unsigned long leaf_offset;
5920
5921         root = root->fs_info->csum_root;
5922         if (!extent_buffer_uptodate(root->node)) {
5923                 fprintf(stderr, "No valid csum tree found\n");
5924                 return -ENOENT;
5925         }
5926
5927         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5928         key.type = BTRFS_EXTENT_CSUM_KEY;
5929         key.offset = 0;
5930
5931         path = btrfs_alloc_path();
5932         if (!path)
5933                 return -ENOMEM;
5934
5935         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5936         if (ret < 0) {
5937                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5938                 btrfs_free_path(path);
5939                 return ret;
5940         }
5941
5942         if (ret > 0 && path->slots[0])
5943                 path->slots[0]--;
5944         ret = 0;
5945
5946         while (1) {
5947                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5948                         ret = btrfs_next_leaf(root, path);
5949                         if (ret < 0) {
5950                                 fprintf(stderr, "Error going to next leaf "
5951                                         "%d\n", ret);
5952                                 break;
5953                         }
5954                         if (ret)
5955                                 break;
5956                 }
5957                 leaf = path->nodes[0];
5958
5959                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5960                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5961                         path->slots[0]++;
5962                         continue;
5963                 }
5964
5965                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5966                               csum_size) * root->sectorsize;
5967                 if (!check_data_csum)
5968                         goto skip_csum_check;
5969                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5970                 ret = check_extent_csums(root, key.offset, data_len,
5971                                          leaf_offset, leaf);
5972                 if (ret)
5973                         break;
5974 skip_csum_check:
5975                 if (!num_bytes) {
5976                         offset = key.offset;
5977                 } else if (key.offset != offset + num_bytes) {
5978                         ret = check_extent_exists(root, offset, num_bytes);
5979                         if (ret) {
5980                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5981                                         "there is no extent record\n",
5982                                         offset, offset+num_bytes);
5983                                 errors++;
5984                         }
5985                         offset = key.offset;
5986                         num_bytes = 0;
5987                 }
5988                 num_bytes += data_len;
5989                 path->slots[0]++;
5990         }
5991
5992         btrfs_free_path(path);
5993         return errors;
5994 }
5995
5996 static int is_dropped_key(struct btrfs_key *key,
5997                           struct btrfs_key *drop_key) {
5998         if (key->objectid < drop_key->objectid)
5999                 return 1;
6000         else if (key->objectid == drop_key->objectid) {
6001                 if (key->type < drop_key->type)
6002                         return 1;
6003                 else if (key->type == drop_key->type) {
6004                         if (key->offset < drop_key->offset)
6005                                 return 1;
6006                 }
6007         }
6008         return 0;
6009 }
6010
6011 /*
6012  * Here are the rules for FULL_BACKREF.
6013  *
6014  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6015  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6016  *      FULL_BACKREF set.
6017  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6018  *    if it happened after the relocation occurred since we'll have dropped the
6019  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6020  *    have no real way to know for sure.
6021  *
6022  * We process the blocks one root at a time, and we start from the lowest root
6023  * objectid and go to the highest.  So we can just lookup the owner backref for
6024  * the record and if we don't find it then we know it doesn't exist and we have
6025  * a FULL BACKREF.
6026  *
6027  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6028  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6029  * be set or not and then we can check later once we've gathered all the refs.
6030  */
6031 static int calc_extent_flag(struct btrfs_root *root,
6032                            struct cache_tree *extent_cache,
6033                            struct extent_buffer *buf,
6034                            struct root_item_record *ri,
6035                            u64 *flags)
6036 {
6037         struct extent_record *rec;
6038         struct cache_extent *cache;
6039         struct tree_backref *tback;
6040         u64 owner = 0;
6041
6042         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6043         /* we have added this extent before */
6044         if (!cache)
6045                 return -ENOENT;
6046
6047         rec = container_of(cache, struct extent_record, cache);
6048
6049         /*
6050          * Except file/reloc tree, we can not have
6051          * FULL BACKREF MODE
6052          */
6053         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6054                 goto normal;
6055         /*
6056          * root node
6057          */
6058         if (buf->start == ri->bytenr)
6059                 goto normal;
6060
6061         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6062                 goto full_backref;
6063
6064         owner = btrfs_header_owner(buf);
6065         if (owner == ri->objectid)
6066                 goto normal;
6067
6068         tback = find_tree_backref(rec, 0, owner);
6069         if (!tback)
6070                 goto full_backref;
6071 normal:
6072         *flags = 0;
6073         if (rec->flag_block_full_backref != FLAG_UNSET &&
6074             rec->flag_block_full_backref != 0)
6075                 rec->bad_full_backref = 1;
6076         return 0;
6077 full_backref:
6078         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6079         if (rec->flag_block_full_backref != FLAG_UNSET &&
6080             rec->flag_block_full_backref != 1)
6081                 rec->bad_full_backref = 1;
6082         return 0;
6083 }
6084
6085 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6086 {
6087         fprintf(stderr, "Invalid key type(");
6088         print_key_type(stderr, 0, key_type);
6089         fprintf(stderr, ") found in root(");
6090         print_objectid(stderr, rootid, 0);
6091         fprintf(stderr, ")\n");
6092 }
6093
6094 /*
6095  * Check if the key is valid with its extent buffer.
6096  *
6097  * This is a early check in case invalid key exists in a extent buffer
6098  * This is not comprehensive yet, but should prevent wrong key/item passed
6099  * further
6100  */
6101 static int check_type_with_root(u64 rootid, u8 key_type)
6102 {
6103         switch (key_type) {
6104         /* Only valid in chunk tree */
6105         case BTRFS_DEV_ITEM_KEY:
6106         case BTRFS_CHUNK_ITEM_KEY:
6107                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6108                         goto err;
6109                 break;
6110         /* valid in csum and log tree */
6111         case BTRFS_CSUM_TREE_OBJECTID:
6112                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6113                       is_fstree(rootid)))
6114                         goto err;
6115                 break;
6116         case BTRFS_EXTENT_ITEM_KEY:
6117         case BTRFS_METADATA_ITEM_KEY:
6118         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6119                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6120                         goto err;
6121                 break;
6122         case BTRFS_ROOT_ITEM_KEY:
6123                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6124                         goto err;
6125                 break;
6126         case BTRFS_DEV_EXTENT_KEY:
6127                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6128                         goto err;
6129                 break;
6130         }
6131         return 0;
6132 err:
6133         report_mismatch_key_root(key_type, rootid);
6134         return -EINVAL;
6135 }
6136
6137 static int run_next_block(struct btrfs_root *root,
6138                           struct block_info *bits,
6139                           int bits_nr,
6140                           u64 *last,
6141                           struct cache_tree *pending,
6142                           struct cache_tree *seen,
6143                           struct cache_tree *reada,
6144                           struct cache_tree *nodes,
6145                           struct cache_tree *extent_cache,
6146                           struct cache_tree *chunk_cache,
6147                           struct rb_root *dev_cache,
6148                           struct block_group_tree *block_group_cache,
6149                           struct device_extent_tree *dev_extent_cache,
6150                           struct root_item_record *ri)
6151 {
6152         struct extent_buffer *buf;
6153         struct extent_record *rec = NULL;
6154         u64 bytenr;
6155         u32 size;
6156         u64 parent;
6157         u64 owner;
6158         u64 flags;
6159         u64 ptr;
6160         u64 gen = 0;
6161         int ret = 0;
6162         int i;
6163         int nritems;
6164         struct btrfs_key key;
6165         struct cache_extent *cache;
6166         int reada_bits;
6167
6168         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6169                                     bits_nr, &reada_bits);
6170         if (nritems == 0)
6171                 return 1;
6172
6173         if (!reada_bits) {
6174                 for(i = 0; i < nritems; i++) {
6175                         ret = add_cache_extent(reada, bits[i].start,
6176                                                bits[i].size);
6177                         if (ret == -EEXIST)
6178                                 continue;
6179
6180                         /* fixme, get the parent transid */
6181                         readahead_tree_block(root, bits[i].start,
6182                                              bits[i].size, 0);
6183                 }
6184         }
6185         *last = bits[0].start;
6186         bytenr = bits[0].start;
6187         size = bits[0].size;
6188
6189         cache = lookup_cache_extent(pending, bytenr, size);
6190         if (cache) {
6191                 remove_cache_extent(pending, cache);
6192                 free(cache);
6193         }
6194         cache = lookup_cache_extent(reada, bytenr, size);
6195         if (cache) {
6196                 remove_cache_extent(reada, cache);
6197                 free(cache);
6198         }
6199         cache = lookup_cache_extent(nodes, bytenr, size);
6200         if (cache) {
6201                 remove_cache_extent(nodes, cache);
6202                 free(cache);
6203         }
6204         cache = lookup_cache_extent(extent_cache, bytenr, size);
6205         if (cache) {
6206                 rec = container_of(cache, struct extent_record, cache);
6207                 gen = rec->parent_generation;
6208         }
6209
6210         /* fixme, get the real parent transid */
6211         buf = read_tree_block(root, bytenr, size, gen);
6212         if (!extent_buffer_uptodate(buf)) {
6213                 record_bad_block_io(root->fs_info,
6214                                     extent_cache, bytenr, size);
6215                 goto out;
6216         }
6217
6218         nritems = btrfs_header_nritems(buf);
6219
6220         flags = 0;
6221         if (!init_extent_tree) {
6222                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6223                                        btrfs_header_level(buf), 1, NULL,
6224                                        &flags);
6225                 if (ret < 0) {
6226                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6227                         if (ret < 0) {
6228                                 fprintf(stderr, "Couldn't calc extent flags\n");
6229                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6230                         }
6231                 }
6232         } else {
6233                 flags = 0;
6234                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6235                 if (ret < 0) {
6236                         fprintf(stderr, "Couldn't calc extent flags\n");
6237                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6238                 }
6239         }
6240
6241         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6242                 if (ri != NULL &&
6243                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6244                     ri->objectid == btrfs_header_owner(buf)) {
6245                         /*
6246                          * Ok we got to this block from it's original owner and
6247                          * we have FULL_BACKREF set.  Relocation can leave
6248                          * converted blocks over so this is altogether possible,
6249                          * however it's not possible if the generation > the
6250                          * last snapshot, so check for this case.
6251                          */
6252                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6253                             btrfs_header_generation(buf) > ri->last_snapshot) {
6254                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6255                                 rec->bad_full_backref = 1;
6256                         }
6257                 }
6258         } else {
6259                 if (ri != NULL &&
6260                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6261                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6262                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6263                         rec->bad_full_backref = 1;
6264                 }
6265         }
6266
6267         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6268                 rec->flag_block_full_backref = 1;
6269                 parent = bytenr;
6270                 owner = 0;
6271         } else {
6272                 rec->flag_block_full_backref = 0;
6273                 parent = 0;
6274                 owner = btrfs_header_owner(buf);
6275         }
6276
6277         ret = check_block(root, extent_cache, buf, flags);
6278         if (ret)
6279                 goto out;
6280
6281         if (btrfs_is_leaf(buf)) {
6282                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6283                 for (i = 0; i < nritems; i++) {
6284                         struct btrfs_file_extent_item *fi;
6285                         btrfs_item_key_to_cpu(buf, &key, i);
6286                         /*
6287                          * Check key type against the leaf owner.
6288                          * Could filter quite a lot of early error if
6289                          * owner is correct
6290                          */
6291                         if (check_type_with_root(btrfs_header_owner(buf),
6292                                                  key.type)) {
6293                                 fprintf(stderr, "ignoring invalid key\n");
6294                                 continue;
6295                         }
6296                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6297                                 process_extent_item(root, extent_cache, buf,
6298                                                     i);
6299                                 continue;
6300                         }
6301                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6302                                 process_extent_item(root, extent_cache, buf,
6303                                                     i);
6304                                 continue;
6305                         }
6306                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6307                                 total_csum_bytes +=
6308                                         btrfs_item_size_nr(buf, i);
6309                                 continue;
6310                         }
6311                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6312                                 process_chunk_item(chunk_cache, &key, buf, i);
6313                                 continue;
6314                         }
6315                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6316                                 process_device_item(dev_cache, &key, buf, i);
6317                                 continue;
6318                         }
6319                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6320                                 process_block_group_item(block_group_cache,
6321                                         &key, buf, i);
6322                                 continue;
6323                         }
6324                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6325                                 process_device_extent_item(dev_extent_cache,
6326                                         &key, buf, i);
6327                                 continue;
6328
6329                         }
6330                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6331 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6332                                 process_extent_ref_v0(extent_cache, buf, i);
6333 #else
6334                                 BUG();
6335 #endif
6336                                 continue;
6337                         }
6338
6339                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6340                                 ret = add_tree_backref(extent_cache,
6341                                                 key.objectid, 0, key.offset, 0);
6342                                 if (ret < 0)
6343                                         error("add_tree_backref failed: %s",
6344                                               strerror(-ret));
6345                                 continue;
6346                         }
6347                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6348                                 ret = add_tree_backref(extent_cache,
6349                                                 key.objectid, key.offset, 0, 0);
6350                                 if (ret < 0)
6351                                         error("add_tree_backref failed: %s",
6352                                               strerror(-ret));
6353                                 continue;
6354                         }
6355                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6356                                 struct btrfs_extent_data_ref *ref;
6357                                 ref = btrfs_item_ptr(buf, i,
6358                                                 struct btrfs_extent_data_ref);
6359                                 add_data_backref(extent_cache,
6360                                         key.objectid, 0,
6361                                         btrfs_extent_data_ref_root(buf, ref),
6362                                         btrfs_extent_data_ref_objectid(buf,
6363                                                                        ref),
6364                                         btrfs_extent_data_ref_offset(buf, ref),
6365                                         btrfs_extent_data_ref_count(buf, ref),
6366                                         0, root->sectorsize);
6367                                 continue;
6368                         }
6369                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6370                                 struct btrfs_shared_data_ref *ref;
6371                                 ref = btrfs_item_ptr(buf, i,
6372                                                 struct btrfs_shared_data_ref);
6373                                 add_data_backref(extent_cache,
6374                                         key.objectid, key.offset, 0, 0, 0,
6375                                         btrfs_shared_data_ref_count(buf, ref),
6376                                         0, root->sectorsize);
6377                                 continue;
6378                         }
6379                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6380                                 struct bad_item *bad;
6381
6382                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6383                                         continue;
6384                                 if (!owner)
6385                                         continue;
6386                                 bad = malloc(sizeof(struct bad_item));
6387                                 if (!bad)
6388                                         continue;
6389                                 INIT_LIST_HEAD(&bad->list);
6390                                 memcpy(&bad->key, &key,
6391                                        sizeof(struct btrfs_key));
6392                                 bad->root_id = owner;
6393                                 list_add_tail(&bad->list, &delete_items);
6394                                 continue;
6395                         }
6396                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6397                                 continue;
6398                         fi = btrfs_item_ptr(buf, i,
6399                                             struct btrfs_file_extent_item);
6400                         if (btrfs_file_extent_type(buf, fi) ==
6401                             BTRFS_FILE_EXTENT_INLINE)
6402                                 continue;
6403                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6404                                 continue;
6405
6406                         data_bytes_allocated +=
6407                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6408                         if (data_bytes_allocated < root->sectorsize) {
6409                                 abort();
6410                         }
6411                         data_bytes_referenced +=
6412                                 btrfs_file_extent_num_bytes(buf, fi);
6413                         add_data_backref(extent_cache,
6414                                 btrfs_file_extent_disk_bytenr(buf, fi),
6415                                 parent, owner, key.objectid, key.offset -
6416                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6417                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6418                 }
6419         } else {
6420                 int level;
6421                 struct btrfs_key first_key;
6422
6423                 first_key.objectid = 0;
6424
6425                 if (nritems > 0)
6426                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6427                 level = btrfs_header_level(buf);
6428                 for (i = 0; i < nritems; i++) {
6429                         struct extent_record tmpl;
6430
6431                         ptr = btrfs_node_blockptr(buf, i);
6432                         size = root->nodesize;
6433                         btrfs_node_key_to_cpu(buf, &key, i);
6434                         if (ri != NULL) {
6435                                 if ((level == ri->drop_level)
6436                                     && is_dropped_key(&key, &ri->drop_key)) {
6437                                         continue;
6438                                 }
6439                         }
6440
6441                         memset(&tmpl, 0, sizeof(tmpl));
6442                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6443                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6444                         tmpl.start = ptr;
6445                         tmpl.nr = size;
6446                         tmpl.refs = 1;
6447                         tmpl.metadata = 1;
6448                         tmpl.max_size = size;
6449                         ret = add_extent_rec(extent_cache, &tmpl);
6450                         if (ret < 0)
6451                                 goto out;
6452
6453                         ret = add_tree_backref(extent_cache, ptr, parent,
6454                                         owner, 1);
6455                         if (ret < 0) {
6456                                 error("add_tree_backref failed: %s",
6457                                       strerror(-ret));
6458                                 continue;
6459                         }
6460
6461                         if (level > 1) {
6462                                 add_pending(nodes, seen, ptr, size);
6463                         } else {
6464                                 add_pending(pending, seen, ptr, size);
6465                         }
6466                 }
6467                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6468                                       nritems) * sizeof(struct btrfs_key_ptr);
6469         }
6470         total_btree_bytes += buf->len;
6471         if (fs_root_objectid(btrfs_header_owner(buf)))
6472                 total_fs_tree_bytes += buf->len;
6473         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6474                 total_extent_tree_bytes += buf->len;
6475         if (!found_old_backref &&
6476             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6477             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6478             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6479                 found_old_backref = 1;
6480 out:
6481         free_extent_buffer(buf);
6482         return ret;
6483 }
6484
6485 static int add_root_to_pending(struct extent_buffer *buf,
6486                                struct cache_tree *extent_cache,
6487                                struct cache_tree *pending,
6488                                struct cache_tree *seen,
6489                                struct cache_tree *nodes,
6490                                u64 objectid)
6491 {
6492         struct extent_record tmpl;
6493         int ret;
6494
6495         if (btrfs_header_level(buf) > 0)
6496                 add_pending(nodes, seen, buf->start, buf->len);
6497         else
6498                 add_pending(pending, seen, buf->start, buf->len);
6499
6500         memset(&tmpl, 0, sizeof(tmpl));
6501         tmpl.start = buf->start;
6502         tmpl.nr = buf->len;
6503         tmpl.is_root = 1;
6504         tmpl.refs = 1;
6505         tmpl.metadata = 1;
6506         tmpl.max_size = buf->len;
6507         add_extent_rec(extent_cache, &tmpl);
6508
6509         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6510             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6511                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6512                                 0, 1);
6513         else
6514                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6515                                 1);
6516         return ret;
6517 }
6518
6519 /* as we fix the tree, we might be deleting blocks that
6520  * we're tracking for repair.  This hook makes sure we
6521  * remove any backrefs for blocks as we are fixing them.
6522  */
6523 static int free_extent_hook(struct btrfs_trans_handle *trans,
6524                             struct btrfs_root *root,
6525                             u64 bytenr, u64 num_bytes, u64 parent,
6526                             u64 root_objectid, u64 owner, u64 offset,
6527                             int refs_to_drop)
6528 {
6529         struct extent_record *rec;
6530         struct cache_extent *cache;
6531         int is_data;
6532         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6533
6534         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6535         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6536         if (!cache)
6537                 return 0;
6538
6539         rec = container_of(cache, struct extent_record, cache);
6540         if (is_data) {
6541                 struct data_backref *back;
6542                 back = find_data_backref(rec, parent, root_objectid, owner,
6543                                          offset, 1, bytenr, num_bytes);
6544                 if (!back)
6545                         goto out;
6546                 if (back->node.found_ref) {
6547                         back->found_ref -= refs_to_drop;
6548                         if (rec->refs)
6549                                 rec->refs -= refs_to_drop;
6550                 }
6551                 if (back->node.found_extent_tree) {
6552                         back->num_refs -= refs_to_drop;
6553                         if (rec->extent_item_refs)
6554                                 rec->extent_item_refs -= refs_to_drop;
6555                 }
6556                 if (back->found_ref == 0)
6557                         back->node.found_ref = 0;
6558                 if (back->num_refs == 0)
6559                         back->node.found_extent_tree = 0;
6560
6561                 if (!back->node.found_extent_tree && back->node.found_ref) {
6562                         list_del(&back->node.list);
6563                         free(back);
6564                 }
6565         } else {
6566                 struct tree_backref *back;
6567                 back = find_tree_backref(rec, parent, root_objectid);
6568                 if (!back)
6569                         goto out;
6570                 if (back->node.found_ref) {
6571                         if (rec->refs)
6572                                 rec->refs--;
6573                         back->node.found_ref = 0;
6574                 }
6575                 if (back->node.found_extent_tree) {
6576                         if (rec->extent_item_refs)
6577                                 rec->extent_item_refs--;
6578                         back->node.found_extent_tree = 0;
6579                 }
6580                 if (!back->node.found_extent_tree && back->node.found_ref) {
6581                         list_del(&back->node.list);
6582                         free(back);
6583                 }
6584         }
6585         maybe_free_extent_rec(extent_cache, rec);
6586 out:
6587         return 0;
6588 }
6589
6590 static int delete_extent_records(struct btrfs_trans_handle *trans,
6591                                  struct btrfs_root *root,
6592                                  struct btrfs_path *path,
6593                                  u64 bytenr, u64 new_len)
6594 {
6595         struct btrfs_key key;
6596         struct btrfs_key found_key;
6597         struct extent_buffer *leaf;
6598         int ret;
6599         int slot;
6600
6601
6602         key.objectid = bytenr;
6603         key.type = (u8)-1;
6604         key.offset = (u64)-1;
6605
6606         while(1) {
6607                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6608                                         &key, path, 0, 1);
6609                 if (ret < 0)
6610                         break;
6611
6612                 if (ret > 0) {
6613                         ret = 0;
6614                         if (path->slots[0] == 0)
6615                                 break;
6616                         path->slots[0]--;
6617                 }
6618                 ret = 0;
6619
6620                 leaf = path->nodes[0];
6621                 slot = path->slots[0];
6622
6623                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6624                 if (found_key.objectid != bytenr)
6625                         break;
6626
6627                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6628                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6629                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6630                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6631                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6632                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6633                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6634                         btrfs_release_path(path);
6635                         if (found_key.type == 0) {
6636                                 if (found_key.offset == 0)
6637                                         break;
6638                                 key.offset = found_key.offset - 1;
6639                                 key.type = found_key.type;
6640                         }
6641                         key.type = found_key.type - 1;
6642                         key.offset = (u64)-1;
6643                         continue;
6644                 }
6645
6646                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6647                         found_key.objectid, found_key.type, found_key.offset);
6648
6649                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6650                 if (ret)
6651                         break;
6652                 btrfs_release_path(path);
6653
6654                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6655                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6656                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6657                                 found_key.offset : root->nodesize;
6658
6659                         ret = btrfs_update_block_group(trans, root, bytenr,
6660                                                        bytes, 0, 0);
6661                         if (ret)
6662                                 break;
6663                 }
6664         }
6665
6666         btrfs_release_path(path);
6667         return ret;
6668 }
6669
6670 /*
6671  * for a single backref, this will allocate a new extent
6672  * and add the backref to it.
6673  */
6674 static int record_extent(struct btrfs_trans_handle *trans,
6675                          struct btrfs_fs_info *info,
6676                          struct btrfs_path *path,
6677                          struct extent_record *rec,
6678                          struct extent_backref *back,
6679                          int allocated, u64 flags)
6680 {
6681         int ret;
6682         struct btrfs_root *extent_root = info->extent_root;
6683         struct extent_buffer *leaf;
6684         struct btrfs_key ins_key;
6685         struct btrfs_extent_item *ei;
6686         struct tree_backref *tback;
6687         struct data_backref *dback;
6688         struct btrfs_tree_block_info *bi;
6689
6690         if (!back->is_data)
6691                 rec->max_size = max_t(u64, rec->max_size,
6692                                     info->extent_root->nodesize);
6693
6694         if (!allocated) {
6695                 u32 item_size = sizeof(*ei);
6696
6697                 if (!back->is_data)
6698                         item_size += sizeof(*bi);
6699
6700                 ins_key.objectid = rec->start;
6701                 ins_key.offset = rec->max_size;
6702                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6703
6704                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6705                                         &ins_key, item_size);
6706                 if (ret)
6707                         goto fail;
6708
6709                 leaf = path->nodes[0];
6710                 ei = btrfs_item_ptr(leaf, path->slots[0],
6711                                     struct btrfs_extent_item);
6712
6713                 btrfs_set_extent_refs(leaf, ei, 0);
6714                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6715
6716                 if (back->is_data) {
6717                         btrfs_set_extent_flags(leaf, ei,
6718                                                BTRFS_EXTENT_FLAG_DATA);
6719                 } else {
6720                         struct btrfs_disk_key copy_key;;
6721
6722                         tback = to_tree_backref(back);
6723                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6724                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6725                                              sizeof(*bi));
6726
6727                         btrfs_set_disk_key_objectid(&copy_key,
6728                                                     rec->info_objectid);
6729                         btrfs_set_disk_key_type(&copy_key, 0);
6730                         btrfs_set_disk_key_offset(&copy_key, 0);
6731
6732                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6733                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6734
6735                         btrfs_set_extent_flags(leaf, ei,
6736                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6737                 }
6738
6739                 btrfs_mark_buffer_dirty(leaf);
6740                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6741                                                rec->max_size, 1, 0);
6742                 if (ret)
6743                         goto fail;
6744                 btrfs_release_path(path);
6745         }
6746
6747         if (back->is_data) {
6748                 u64 parent;
6749                 int i;
6750
6751                 dback = to_data_backref(back);
6752                 if (back->full_backref)
6753                         parent = dback->parent;
6754                 else
6755                         parent = 0;
6756
6757                 for (i = 0; i < dback->found_ref; i++) {
6758                         /* if parent != 0, we're doing a full backref
6759                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6760                          * just makes the backref allocator create a data
6761                          * backref
6762                          */
6763                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6764                                                    rec->start, rec->max_size,
6765                                                    parent,
6766                                                    dback->root,
6767                                                    parent ?
6768                                                    BTRFS_FIRST_FREE_OBJECTID :
6769                                                    dback->owner,
6770                                                    dback->offset);
6771                         if (ret)
6772                                 break;
6773                 }
6774                 fprintf(stderr, "adding new data backref"
6775                                 " on %llu %s %llu owner %llu"
6776                                 " offset %llu found %d\n",
6777                                 (unsigned long long)rec->start,
6778                                 back->full_backref ?
6779                                 "parent" : "root",
6780                                 back->full_backref ?
6781                                 (unsigned long long)parent :
6782                                 (unsigned long long)dback->root,
6783                                 (unsigned long long)dback->owner,
6784                                 (unsigned long long)dback->offset,
6785                                 dback->found_ref);
6786         } else {
6787                 u64 parent;
6788
6789                 tback = to_tree_backref(back);
6790                 if (back->full_backref)
6791                         parent = tback->parent;
6792                 else
6793                         parent = 0;
6794
6795                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6796                                            rec->start, rec->max_size,
6797                                            parent, tback->root, 0, 0);
6798                 fprintf(stderr, "adding new tree backref on "
6799                         "start %llu len %llu parent %llu root %llu\n",
6800                         rec->start, rec->max_size, parent, tback->root);
6801         }
6802 fail:
6803         btrfs_release_path(path);
6804         return ret;
6805 }
6806
6807 static struct extent_entry *find_entry(struct list_head *entries,
6808                                        u64 bytenr, u64 bytes)
6809 {
6810         struct extent_entry *entry = NULL;
6811
6812         list_for_each_entry(entry, entries, list) {
6813                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6814                         return entry;
6815         }
6816
6817         return NULL;
6818 }
6819
6820 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6821 {
6822         struct extent_entry *entry, *best = NULL, *prev = NULL;
6823
6824         list_for_each_entry(entry, entries, list) {
6825                 if (!prev) {
6826                         prev = entry;
6827                         continue;
6828                 }
6829
6830                 /*
6831                  * If there are as many broken entries as entries then we know
6832                  * not to trust this particular entry.
6833                  */
6834                 if (entry->broken == entry->count)
6835                         continue;
6836
6837                 /*
6838                  * If our current entry == best then we can't be sure our best
6839                  * is really the best, so we need to keep searching.
6840                  */
6841                 if (best && best->count == entry->count) {
6842                         prev = entry;
6843                         best = NULL;
6844                         continue;
6845                 }
6846
6847                 /* Prev == entry, not good enough, have to keep searching */
6848                 if (!prev->broken && prev->count == entry->count)
6849                         continue;
6850
6851                 if (!best)
6852                         best = (prev->count > entry->count) ? prev : entry;
6853                 else if (best->count < entry->count)
6854                         best = entry;
6855                 prev = entry;
6856         }
6857
6858         return best;
6859 }
6860
6861 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6862                       struct data_backref *dback, struct extent_entry *entry)
6863 {
6864         struct btrfs_trans_handle *trans;
6865         struct btrfs_root *root;
6866         struct btrfs_file_extent_item *fi;
6867         struct extent_buffer *leaf;
6868         struct btrfs_key key;
6869         u64 bytenr, bytes;
6870         int ret, err;
6871
6872         key.objectid = dback->root;
6873         key.type = BTRFS_ROOT_ITEM_KEY;
6874         key.offset = (u64)-1;
6875         root = btrfs_read_fs_root(info, &key);
6876         if (IS_ERR(root)) {
6877                 fprintf(stderr, "Couldn't find root for our ref\n");
6878                 return -EINVAL;
6879         }
6880
6881         /*
6882          * The backref points to the original offset of the extent if it was
6883          * split, so we need to search down to the offset we have and then walk
6884          * forward until we find the backref we're looking for.
6885          */
6886         key.objectid = dback->owner;
6887         key.type = BTRFS_EXTENT_DATA_KEY;
6888         key.offset = dback->offset;
6889         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6890         if (ret < 0) {
6891                 fprintf(stderr, "Error looking up ref %d\n", ret);
6892                 return ret;
6893         }
6894
6895         while (1) {
6896                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6897                         ret = btrfs_next_leaf(root, path);
6898                         if (ret) {
6899                                 fprintf(stderr, "Couldn't find our ref, next\n");
6900                                 return -EINVAL;
6901                         }
6902                 }
6903                 leaf = path->nodes[0];
6904                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6905                 if (key.objectid != dback->owner ||
6906                     key.type != BTRFS_EXTENT_DATA_KEY) {
6907                         fprintf(stderr, "Couldn't find our ref, search\n");
6908                         return -EINVAL;
6909                 }
6910                 fi = btrfs_item_ptr(leaf, path->slots[0],
6911                                     struct btrfs_file_extent_item);
6912                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6913                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6914
6915                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6916                         break;
6917                 path->slots[0]++;
6918         }
6919
6920         btrfs_release_path(path);
6921
6922         trans = btrfs_start_transaction(root, 1);
6923         if (IS_ERR(trans))
6924                 return PTR_ERR(trans);
6925
6926         /*
6927          * Ok we have the key of the file extent we want to fix, now we can cow
6928          * down to the thing and fix it.
6929          */
6930         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6931         if (ret < 0) {
6932                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6933                         key.objectid, key.type, key.offset, ret);
6934                 goto out;
6935         }
6936         if (ret > 0) {
6937                 fprintf(stderr, "Well that's odd, we just found this key "
6938                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6939                         key.offset);
6940                 ret = -EINVAL;
6941                 goto out;
6942         }
6943         leaf = path->nodes[0];
6944         fi = btrfs_item_ptr(leaf, path->slots[0],
6945                             struct btrfs_file_extent_item);
6946
6947         if (btrfs_file_extent_compression(leaf, fi) &&
6948             dback->disk_bytenr != entry->bytenr) {
6949                 fprintf(stderr, "Ref doesn't match the record start and is "
6950                         "compressed, please take a btrfs-image of this file "
6951                         "system and send it to a btrfs developer so they can "
6952                         "complete this functionality for bytenr %Lu\n",
6953                         dback->disk_bytenr);
6954                 ret = -EINVAL;
6955                 goto out;
6956         }
6957
6958         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6959                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6960         } else if (dback->disk_bytenr > entry->bytenr) {
6961                 u64 off_diff, offset;
6962
6963                 off_diff = dback->disk_bytenr - entry->bytenr;
6964                 offset = btrfs_file_extent_offset(leaf, fi);
6965                 if (dback->disk_bytenr + offset +
6966                     btrfs_file_extent_num_bytes(leaf, fi) >
6967                     entry->bytenr + entry->bytes) {
6968                         fprintf(stderr, "Ref is past the entry end, please "
6969                                 "take a btrfs-image of this file system and "
6970                                 "send it to a btrfs developer, ref %Lu\n",
6971                                 dback->disk_bytenr);
6972                         ret = -EINVAL;
6973                         goto out;
6974                 }
6975                 offset += off_diff;
6976                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6977                 btrfs_set_file_extent_offset(leaf, fi, offset);
6978         } else if (dback->disk_bytenr < entry->bytenr) {
6979                 u64 offset;
6980
6981                 offset = btrfs_file_extent_offset(leaf, fi);
6982                 if (dback->disk_bytenr + offset < entry->bytenr) {
6983                         fprintf(stderr, "Ref is before the entry start, please"
6984                                 " take a btrfs-image of this file system and "
6985                                 "send it to a btrfs developer, ref %Lu\n",
6986                                 dback->disk_bytenr);
6987                         ret = -EINVAL;
6988                         goto out;
6989                 }
6990
6991                 offset += dback->disk_bytenr;
6992                 offset -= entry->bytenr;
6993                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6994                 btrfs_set_file_extent_offset(leaf, fi, offset);
6995         }
6996
6997         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6998
6999         /*
7000          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7001          * only do this if we aren't using compression, otherwise it's a
7002          * trickier case.
7003          */
7004         if (!btrfs_file_extent_compression(leaf, fi))
7005                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7006         else
7007                 printf("ram bytes may be wrong?\n");
7008         btrfs_mark_buffer_dirty(leaf);
7009 out:
7010         err = btrfs_commit_transaction(trans, root);
7011         btrfs_release_path(path);
7012         return ret ? ret : err;
7013 }
7014
7015 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7016                            struct extent_record *rec)
7017 {
7018         struct extent_backref *back;
7019         struct data_backref *dback;
7020         struct extent_entry *entry, *best = NULL;
7021         LIST_HEAD(entries);
7022         int nr_entries = 0;
7023         int broken_entries = 0;
7024         int ret = 0;
7025         short mismatch = 0;
7026
7027         /*
7028          * Metadata is easy and the backrefs should always agree on bytenr and
7029          * size, if not we've got bigger issues.
7030          */
7031         if (rec->metadata)
7032                 return 0;
7033
7034         list_for_each_entry(back, &rec->backrefs, list) {
7035                 if (back->full_backref || !back->is_data)
7036                         continue;
7037
7038                 dback = to_data_backref(back);
7039
7040                 /*
7041                  * We only pay attention to backrefs that we found a real
7042                  * backref for.
7043                  */
7044                 if (dback->found_ref == 0)
7045                         continue;
7046
7047                 /*
7048                  * For now we only catch when the bytes don't match, not the
7049                  * bytenr.  We can easily do this at the same time, but I want
7050                  * to have a fs image to test on before we just add repair
7051                  * functionality willy-nilly so we know we won't screw up the
7052                  * repair.
7053                  */
7054
7055                 entry = find_entry(&entries, dback->disk_bytenr,
7056                                    dback->bytes);
7057                 if (!entry) {
7058                         entry = malloc(sizeof(struct extent_entry));
7059                         if (!entry) {
7060                                 ret = -ENOMEM;
7061                                 goto out;
7062                         }
7063                         memset(entry, 0, sizeof(*entry));
7064                         entry->bytenr = dback->disk_bytenr;
7065                         entry->bytes = dback->bytes;
7066                         list_add_tail(&entry->list, &entries);
7067                         nr_entries++;
7068                 }
7069
7070                 /*
7071                  * If we only have on entry we may think the entries agree when
7072                  * in reality they don't so we have to do some extra checking.
7073                  */
7074                 if (dback->disk_bytenr != rec->start ||
7075                     dback->bytes != rec->nr || back->broken)
7076                         mismatch = 1;
7077
7078                 if (back->broken) {
7079                         entry->broken++;
7080                         broken_entries++;
7081                 }
7082
7083                 entry->count++;
7084         }
7085
7086         /* Yay all the backrefs agree, carry on good sir */
7087         if (nr_entries <= 1 && !mismatch)
7088                 goto out;
7089
7090         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7091                 "%Lu\n", rec->start);
7092
7093         /*
7094          * First we want to see if the backrefs can agree amongst themselves who
7095          * is right, so figure out which one of the entries has the highest
7096          * count.
7097          */
7098         best = find_most_right_entry(&entries);
7099
7100         /*
7101          * Ok so we may have an even split between what the backrefs think, so
7102          * this is where we use the extent ref to see what it thinks.
7103          */
7104         if (!best) {
7105                 entry = find_entry(&entries, rec->start, rec->nr);
7106                 if (!entry && (!broken_entries || !rec->found_rec)) {
7107                         fprintf(stderr, "Backrefs don't agree with each other "
7108                                 "and extent record doesn't agree with anybody,"
7109                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7110                                 rec->start, rec->nr);
7111                         ret = -EINVAL;
7112                         goto out;
7113                 } else if (!entry) {
7114                         /*
7115                          * Ok our backrefs were broken, we'll assume this is the
7116                          * correct value and add an entry for this range.
7117                          */
7118                         entry = malloc(sizeof(struct extent_entry));
7119                         if (!entry) {
7120                                 ret = -ENOMEM;
7121                                 goto out;
7122                         }
7123                         memset(entry, 0, sizeof(*entry));
7124                         entry->bytenr = rec->start;
7125                         entry->bytes = rec->nr;
7126                         list_add_tail(&entry->list, &entries);
7127                         nr_entries++;
7128                 }
7129                 entry->count++;
7130                 best = find_most_right_entry(&entries);
7131                 if (!best) {
7132                         fprintf(stderr, "Backrefs and extent record evenly "
7133                                 "split on who is right, this is going to "
7134                                 "require user input to fix bytenr %Lu bytes "
7135                                 "%Lu\n", rec->start, rec->nr);
7136                         ret = -EINVAL;
7137                         goto out;
7138                 }
7139         }
7140
7141         /*
7142          * I don't think this can happen currently as we'll abort() if we catch
7143          * this case higher up, but in case somebody removes that we still can't
7144          * deal with it properly here yet, so just bail out of that's the case.
7145          */
7146         if (best->bytenr != rec->start) {
7147                 fprintf(stderr, "Extent start and backref starts don't match, "
7148                         "please use btrfs-image on this file system and send "
7149                         "it to a btrfs developer so they can make fsck fix "
7150                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7151                         rec->start, rec->nr);
7152                 ret = -EINVAL;
7153                 goto out;
7154         }
7155
7156         /*
7157          * Ok great we all agreed on an extent record, let's go find the real
7158          * references and fix up the ones that don't match.
7159          */
7160         list_for_each_entry(back, &rec->backrefs, list) {
7161                 if (back->full_backref || !back->is_data)
7162                         continue;
7163
7164                 dback = to_data_backref(back);
7165
7166                 /*
7167                  * Still ignoring backrefs that don't have a real ref attached
7168                  * to them.
7169                  */
7170                 if (dback->found_ref == 0)
7171                         continue;
7172
7173                 if (dback->bytes == best->bytes &&
7174                     dback->disk_bytenr == best->bytenr)
7175                         continue;
7176
7177                 ret = repair_ref(info, path, dback, best);
7178                 if (ret)
7179                         goto out;
7180         }
7181
7182         /*
7183          * Ok we messed with the actual refs, which means we need to drop our
7184          * entire cache and go back and rescan.  I know this is a huge pain and
7185          * adds a lot of extra work, but it's the only way to be safe.  Once all
7186          * the backrefs agree we may not need to do anything to the extent
7187          * record itself.
7188          */
7189         ret = -EAGAIN;
7190 out:
7191         while (!list_empty(&entries)) {
7192                 entry = list_entry(entries.next, struct extent_entry, list);
7193                 list_del_init(&entry->list);
7194                 free(entry);
7195         }
7196         return ret;
7197 }
7198
7199 static int process_duplicates(struct btrfs_root *root,
7200                               struct cache_tree *extent_cache,
7201                               struct extent_record *rec)
7202 {
7203         struct extent_record *good, *tmp;
7204         struct cache_extent *cache;
7205         int ret;
7206
7207         /*
7208          * If we found a extent record for this extent then return, or if we
7209          * have more than one duplicate we are likely going to need to delete
7210          * something.
7211          */
7212         if (rec->found_rec || rec->num_duplicates > 1)
7213                 return 0;
7214
7215         /* Shouldn't happen but just in case */
7216         BUG_ON(!rec->num_duplicates);
7217
7218         /*
7219          * So this happens if we end up with a backref that doesn't match the
7220          * actual extent entry.  So either the backref is bad or the extent
7221          * entry is bad.  Either way we want to have the extent_record actually
7222          * reflect what we found in the extent_tree, so we need to take the
7223          * duplicate out and use that as the extent_record since the only way we
7224          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7225          */
7226         remove_cache_extent(extent_cache, &rec->cache);
7227
7228         good = to_extent_record(rec->dups.next);
7229         list_del_init(&good->list);
7230         INIT_LIST_HEAD(&good->backrefs);
7231         INIT_LIST_HEAD(&good->dups);
7232         good->cache.start = good->start;
7233         good->cache.size = good->nr;
7234         good->content_checked = 0;
7235         good->owner_ref_checked = 0;
7236         good->num_duplicates = 0;
7237         good->refs = rec->refs;
7238         list_splice_init(&rec->backrefs, &good->backrefs);
7239         while (1) {
7240                 cache = lookup_cache_extent(extent_cache, good->start,
7241                                             good->nr);
7242                 if (!cache)
7243                         break;
7244                 tmp = container_of(cache, struct extent_record, cache);
7245
7246                 /*
7247                  * If we find another overlapping extent and it's found_rec is
7248                  * set then it's a duplicate and we need to try and delete
7249                  * something.
7250                  */
7251                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7252                         if (list_empty(&good->list))
7253                                 list_add_tail(&good->list,
7254                                               &duplicate_extents);
7255                         good->num_duplicates += tmp->num_duplicates + 1;
7256                         list_splice_init(&tmp->dups, &good->dups);
7257                         list_del_init(&tmp->list);
7258                         list_add_tail(&tmp->list, &good->dups);
7259                         remove_cache_extent(extent_cache, &tmp->cache);
7260                         continue;
7261                 }
7262
7263                 /*
7264                  * Ok we have another non extent item backed extent rec, so lets
7265                  * just add it to this extent and carry on like we did above.
7266                  */
7267                 good->refs += tmp->refs;
7268                 list_splice_init(&tmp->backrefs, &good->backrefs);
7269                 remove_cache_extent(extent_cache, &tmp->cache);
7270                 free(tmp);
7271         }
7272         ret = insert_cache_extent(extent_cache, &good->cache);
7273         BUG_ON(ret);
7274         free(rec);
7275         return good->num_duplicates ? 0 : 1;
7276 }
7277
7278 static int delete_duplicate_records(struct btrfs_root *root,
7279                                     struct extent_record *rec)
7280 {
7281         struct btrfs_trans_handle *trans;
7282         LIST_HEAD(delete_list);
7283         struct btrfs_path *path;
7284         struct extent_record *tmp, *good, *n;
7285         int nr_del = 0;
7286         int ret = 0, err;
7287         struct btrfs_key key;
7288
7289         path = btrfs_alloc_path();
7290         if (!path) {
7291                 ret = -ENOMEM;
7292                 goto out;
7293         }
7294
7295         good = rec;
7296         /* Find the record that covers all of the duplicates. */
7297         list_for_each_entry(tmp, &rec->dups, list) {
7298                 if (good->start < tmp->start)
7299                         continue;
7300                 if (good->nr > tmp->nr)
7301                         continue;
7302
7303                 if (tmp->start + tmp->nr < good->start + good->nr) {
7304                         fprintf(stderr, "Ok we have overlapping extents that "
7305                                 "aren't completely covered by each other, this "
7306                                 "is going to require more careful thought.  "
7307                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7308                                 tmp->start, tmp->nr, good->start, good->nr);
7309                         abort();
7310                 }
7311                 good = tmp;
7312         }
7313
7314         if (good != rec)
7315                 list_add_tail(&rec->list, &delete_list);
7316
7317         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7318                 if (tmp == good)
7319                         continue;
7320                 list_move_tail(&tmp->list, &delete_list);
7321         }
7322
7323         root = root->fs_info->extent_root;
7324         trans = btrfs_start_transaction(root, 1);
7325         if (IS_ERR(trans)) {
7326                 ret = PTR_ERR(trans);
7327                 goto out;
7328         }
7329
7330         list_for_each_entry(tmp, &delete_list, list) {
7331                 if (tmp->found_rec == 0)
7332                         continue;
7333                 key.objectid = tmp->start;
7334                 key.type = BTRFS_EXTENT_ITEM_KEY;
7335                 key.offset = tmp->nr;
7336
7337                 /* Shouldn't happen but just in case */
7338                 if (tmp->metadata) {
7339                         fprintf(stderr, "Well this shouldn't happen, extent "
7340                                 "record overlaps but is metadata? "
7341                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7342                         abort();
7343                 }
7344
7345                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7346                 if (ret) {
7347                         if (ret > 0)
7348                                 ret = -EINVAL;
7349                         break;
7350                 }
7351                 ret = btrfs_del_item(trans, root, path);
7352                 if (ret)
7353                         break;
7354                 btrfs_release_path(path);
7355                 nr_del++;
7356         }
7357         err = btrfs_commit_transaction(trans, root);
7358         if (err && !ret)
7359                 ret = err;
7360 out:
7361         while (!list_empty(&delete_list)) {
7362                 tmp = to_extent_record(delete_list.next);
7363                 list_del_init(&tmp->list);
7364                 if (tmp == rec)
7365                         continue;
7366                 free(tmp);
7367         }
7368
7369         while (!list_empty(&rec->dups)) {
7370                 tmp = to_extent_record(rec->dups.next);
7371                 list_del_init(&tmp->list);
7372                 free(tmp);
7373         }
7374
7375         btrfs_free_path(path);
7376
7377         if (!ret && !nr_del)
7378                 rec->num_duplicates = 0;
7379
7380         return ret ? ret : nr_del;
7381 }
7382
7383 static int find_possible_backrefs(struct btrfs_fs_info *info,
7384                                   struct btrfs_path *path,
7385                                   struct cache_tree *extent_cache,
7386                                   struct extent_record *rec)
7387 {
7388         struct btrfs_root *root;
7389         struct extent_backref *back;
7390         struct data_backref *dback;
7391         struct cache_extent *cache;
7392         struct btrfs_file_extent_item *fi;
7393         struct btrfs_key key;
7394         u64 bytenr, bytes;
7395         int ret;
7396
7397         list_for_each_entry(back, &rec->backrefs, list) {
7398                 /* Don't care about full backrefs (poor unloved backrefs) */
7399                 if (back->full_backref || !back->is_data)
7400                         continue;
7401
7402                 dback = to_data_backref(back);
7403
7404                 /* We found this one, we don't need to do a lookup */
7405                 if (dback->found_ref)
7406                         continue;
7407
7408                 key.objectid = dback->root;
7409                 key.type = BTRFS_ROOT_ITEM_KEY;
7410                 key.offset = (u64)-1;
7411
7412                 root = btrfs_read_fs_root(info, &key);
7413
7414                 /* No root, definitely a bad ref, skip */
7415                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7416                         continue;
7417                 /* Other err, exit */
7418                 if (IS_ERR(root))
7419                         return PTR_ERR(root);
7420
7421                 key.objectid = dback->owner;
7422                 key.type = BTRFS_EXTENT_DATA_KEY;
7423                 key.offset = dback->offset;
7424                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7425                 if (ret) {
7426                         btrfs_release_path(path);
7427                         if (ret < 0)
7428                                 return ret;
7429                         /* Didn't find it, we can carry on */
7430                         ret = 0;
7431                         continue;
7432                 }
7433
7434                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7435                                     struct btrfs_file_extent_item);
7436                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7437                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7438                 btrfs_release_path(path);
7439                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7440                 if (cache) {
7441                         struct extent_record *tmp;
7442                         tmp = container_of(cache, struct extent_record, cache);
7443
7444                         /*
7445                          * If we found an extent record for the bytenr for this
7446                          * particular backref then we can't add it to our
7447                          * current extent record.  We only want to add backrefs
7448                          * that don't have a corresponding extent item in the
7449                          * extent tree since they likely belong to this record
7450                          * and we need to fix it if it doesn't match bytenrs.
7451                          */
7452                         if  (tmp->found_rec)
7453                                 continue;
7454                 }
7455
7456                 dback->found_ref += 1;
7457                 dback->disk_bytenr = bytenr;
7458                 dback->bytes = bytes;
7459
7460                 /*
7461                  * Set this so the verify backref code knows not to trust the
7462                  * values in this backref.
7463                  */
7464                 back->broken = 1;
7465         }
7466
7467         return 0;
7468 }
7469
7470 /*
7471  * Record orphan data ref into corresponding root.
7472  *
7473  * Return 0 if the extent item contains data ref and recorded.
7474  * Return 1 if the extent item contains no useful data ref
7475  *   On that case, it may contains only shared_dataref or metadata backref
7476  *   or the file extent exists(this should be handled by the extent bytenr
7477  *   recovery routine)
7478  * Return <0 if something goes wrong.
7479  */
7480 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7481                                       struct extent_record *rec)
7482 {
7483         struct btrfs_key key;
7484         struct btrfs_root *dest_root;
7485         struct extent_backref *back;
7486         struct data_backref *dback;
7487         struct orphan_data_extent *orphan;
7488         struct btrfs_path *path;
7489         int recorded_data_ref = 0;
7490         int ret = 0;
7491
7492         if (rec->metadata)
7493                 return 1;
7494         path = btrfs_alloc_path();
7495         if (!path)
7496                 return -ENOMEM;
7497         list_for_each_entry(back, &rec->backrefs, list) {
7498                 if (back->full_backref || !back->is_data ||
7499                     !back->found_extent_tree)
7500                         continue;
7501                 dback = to_data_backref(back);
7502                 if (dback->found_ref)
7503                         continue;
7504                 key.objectid = dback->root;
7505                 key.type = BTRFS_ROOT_ITEM_KEY;
7506                 key.offset = (u64)-1;
7507
7508                 dest_root = btrfs_read_fs_root(fs_info, &key);
7509
7510                 /* For non-exist root we just skip it */
7511                 if (IS_ERR(dest_root) || !dest_root)
7512                         continue;
7513
7514                 key.objectid = dback->owner;
7515                 key.type = BTRFS_EXTENT_DATA_KEY;
7516                 key.offset = dback->offset;
7517
7518                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7519                 btrfs_release_path(path);
7520                 /*
7521                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7522                  * we need to record it for inode/file extent rebuild.
7523                  * For ret > 0, we record it only for file extent rebuild.
7524                  * For ret == 0, the file extent exists but only bytenr
7525                  * mismatch, let the original bytenr fix routine to handle,
7526                  * don't record it.
7527                  */
7528                 if (ret == 0)
7529                         continue;
7530                 ret = 0;
7531                 orphan = malloc(sizeof(*orphan));
7532                 if (!orphan) {
7533                         ret = -ENOMEM;
7534                         goto out;
7535                 }
7536                 INIT_LIST_HEAD(&orphan->list);
7537                 orphan->root = dback->root;
7538                 orphan->objectid = dback->owner;
7539                 orphan->offset = dback->offset;
7540                 orphan->disk_bytenr = rec->cache.start;
7541                 orphan->disk_len = rec->cache.size;
7542                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7543                 recorded_data_ref = 1;
7544         }
7545 out:
7546         btrfs_free_path(path);
7547         if (!ret)
7548                 return !recorded_data_ref;
7549         else
7550                 return ret;
7551 }
7552
7553 /*
7554  * when an incorrect extent item is found, this will delete
7555  * all of the existing entries for it and recreate them
7556  * based on what the tree scan found.
7557  */
7558 static int fixup_extent_refs(struct btrfs_fs_info *info,
7559                              struct cache_tree *extent_cache,
7560                              struct extent_record *rec)
7561 {
7562         struct btrfs_trans_handle *trans = NULL;
7563         int ret;
7564         struct btrfs_path *path;
7565         struct list_head *cur = rec->backrefs.next;
7566         struct cache_extent *cache;
7567         struct extent_backref *back;
7568         int allocated = 0;
7569         u64 flags = 0;
7570
7571         if (rec->flag_block_full_backref)
7572                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7573
7574         path = btrfs_alloc_path();
7575         if (!path)
7576                 return -ENOMEM;
7577
7578         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7579                 /*
7580                  * Sometimes the backrefs themselves are so broken they don't
7581                  * get attached to any meaningful rec, so first go back and
7582                  * check any of our backrefs that we couldn't find and throw
7583                  * them into the list if we find the backref so that
7584                  * verify_backrefs can figure out what to do.
7585                  */
7586                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7587                 if (ret < 0)
7588                         goto out;
7589         }
7590
7591         /* step one, make sure all of the backrefs agree */
7592         ret = verify_backrefs(info, path, rec);
7593         if (ret < 0)
7594                 goto out;
7595
7596         trans = btrfs_start_transaction(info->extent_root, 1);
7597         if (IS_ERR(trans)) {
7598                 ret = PTR_ERR(trans);
7599                 goto out;
7600         }
7601
7602         /* step two, delete all the existing records */
7603         ret = delete_extent_records(trans, info->extent_root, path,
7604                                     rec->start, rec->max_size);
7605
7606         if (ret < 0)
7607                 goto out;
7608
7609         /* was this block corrupt?  If so, don't add references to it */
7610         cache = lookup_cache_extent(info->corrupt_blocks,
7611                                     rec->start, rec->max_size);
7612         if (cache) {
7613                 ret = 0;
7614                 goto out;
7615         }
7616
7617         /* step three, recreate all the refs we did find */
7618         while(cur != &rec->backrefs) {
7619                 back = to_extent_backref(cur);
7620                 cur = cur->next;
7621
7622                 /*
7623                  * if we didn't find any references, don't create a
7624                  * new extent record
7625                  */
7626                 if (!back->found_ref)
7627                         continue;
7628
7629                 rec->bad_full_backref = 0;
7630                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7631                 allocated = 1;
7632
7633                 if (ret)
7634                         goto out;
7635         }
7636 out:
7637         if (trans) {
7638                 int err = btrfs_commit_transaction(trans, info->extent_root);
7639                 if (!ret)
7640                         ret = err;
7641         }
7642
7643         btrfs_free_path(path);
7644         return ret;
7645 }
7646
7647 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7648                               struct extent_record *rec)
7649 {
7650         struct btrfs_trans_handle *trans;
7651         struct btrfs_root *root = fs_info->extent_root;
7652         struct btrfs_path *path;
7653         struct btrfs_extent_item *ei;
7654         struct btrfs_key key;
7655         u64 flags;
7656         int ret = 0;
7657
7658         key.objectid = rec->start;
7659         if (rec->metadata) {
7660                 key.type = BTRFS_METADATA_ITEM_KEY;
7661                 key.offset = rec->info_level;
7662         } else {
7663                 key.type = BTRFS_EXTENT_ITEM_KEY;
7664                 key.offset = rec->max_size;
7665         }
7666
7667         path = btrfs_alloc_path();
7668         if (!path)
7669                 return -ENOMEM;
7670
7671         trans = btrfs_start_transaction(root, 0);
7672         if (IS_ERR(trans)) {
7673                 btrfs_free_path(path);
7674                 return PTR_ERR(trans);
7675         }
7676
7677         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7678         if (ret < 0) {
7679                 btrfs_free_path(path);
7680                 btrfs_commit_transaction(trans, root);
7681                 return ret;
7682         } else if (ret) {
7683                 fprintf(stderr, "Didn't find extent for %llu\n",
7684                         (unsigned long long)rec->start);
7685                 btrfs_free_path(path);
7686                 btrfs_commit_transaction(trans, root);
7687                 return -ENOENT;
7688         }
7689
7690         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7691                             struct btrfs_extent_item);
7692         flags = btrfs_extent_flags(path->nodes[0], ei);
7693         if (rec->flag_block_full_backref) {
7694                 fprintf(stderr, "setting full backref on %llu\n",
7695                         (unsigned long long)key.objectid);
7696                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7697         } else {
7698                 fprintf(stderr, "clearing full backref on %llu\n",
7699                         (unsigned long long)key.objectid);
7700                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7701         }
7702         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7703         btrfs_mark_buffer_dirty(path->nodes[0]);
7704         btrfs_free_path(path);
7705         return btrfs_commit_transaction(trans, root);
7706 }
7707
7708 /* right now we only prune from the extent allocation tree */
7709 static int prune_one_block(struct btrfs_trans_handle *trans,
7710                            struct btrfs_fs_info *info,
7711                            struct btrfs_corrupt_block *corrupt)
7712 {
7713         int ret;
7714         struct btrfs_path path;
7715         struct extent_buffer *eb;
7716         u64 found;
7717         int slot;
7718         int nritems;
7719         int level = corrupt->level + 1;
7720
7721         btrfs_init_path(&path);
7722 again:
7723         /* we want to stop at the parent to our busted block */
7724         path.lowest_level = level;
7725
7726         ret = btrfs_search_slot(trans, info->extent_root,
7727                                 &corrupt->key, &path, -1, 1);
7728
7729         if (ret < 0)
7730                 goto out;
7731
7732         eb = path.nodes[level];
7733         if (!eb) {
7734                 ret = -ENOENT;
7735                 goto out;
7736         }
7737
7738         /*
7739          * hopefully the search gave us the block we want to prune,
7740          * lets try that first
7741          */
7742         slot = path.slots[level];
7743         found =  btrfs_node_blockptr(eb, slot);
7744         if (found == corrupt->cache.start)
7745                 goto del_ptr;
7746
7747         nritems = btrfs_header_nritems(eb);
7748
7749         /* the search failed, lets scan this node and hope we find it */
7750         for (slot = 0; slot < nritems; slot++) {
7751                 found =  btrfs_node_blockptr(eb, slot);
7752                 if (found == corrupt->cache.start)
7753                         goto del_ptr;
7754         }
7755         /*
7756          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7757          * to this block
7758          */
7759         if (eb == info->extent_root->node) {
7760                 ret = -ENOENT;
7761                 goto out;
7762         } else {
7763                 level++;
7764                 btrfs_release_path(&path);
7765                 goto again;
7766         }
7767
7768 del_ptr:
7769         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7770         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7771
7772 out:
7773         btrfs_release_path(&path);
7774         return ret;
7775 }
7776
7777 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7778 {
7779         struct btrfs_trans_handle *trans = NULL;
7780         struct cache_extent *cache;
7781         struct btrfs_corrupt_block *corrupt;
7782
7783         while (1) {
7784                 cache = search_cache_extent(info->corrupt_blocks, 0);
7785                 if (!cache)
7786                         break;
7787                 if (!trans) {
7788                         trans = btrfs_start_transaction(info->extent_root, 1);
7789                         if (IS_ERR(trans))
7790                                 return PTR_ERR(trans);
7791                 }
7792                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7793                 prune_one_block(trans, info, corrupt);
7794                 remove_cache_extent(info->corrupt_blocks, cache);
7795         }
7796         if (trans)
7797                 return btrfs_commit_transaction(trans, info->extent_root);
7798         return 0;
7799 }
7800
7801 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7802 {
7803         struct btrfs_block_group_cache *cache;
7804         u64 start, end;
7805         int ret;
7806
7807         while (1) {
7808                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7809                                             &start, &end, EXTENT_DIRTY);
7810                 if (ret)
7811                         break;
7812                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7813                                    GFP_NOFS);
7814         }
7815
7816         start = 0;
7817         while (1) {
7818                 cache = btrfs_lookup_first_block_group(fs_info, start);
7819                 if (!cache)
7820                         break;
7821                 if (cache->cached)
7822                         cache->cached = 0;
7823                 start = cache->key.objectid + cache->key.offset;
7824         }
7825 }
7826
7827 static int check_extent_refs(struct btrfs_root *root,
7828                              struct cache_tree *extent_cache)
7829 {
7830         struct extent_record *rec;
7831         struct cache_extent *cache;
7832         int err = 0;
7833         int ret = 0;
7834         int fixed = 0;
7835         int had_dups = 0;
7836         int recorded = 0;
7837
7838         if (repair) {
7839                 /*
7840                  * if we're doing a repair, we have to make sure
7841                  * we don't allocate from the problem extents.
7842                  * In the worst case, this will be all the
7843                  * extents in the FS
7844                  */
7845                 cache = search_cache_extent(extent_cache, 0);
7846                 while(cache) {
7847                         rec = container_of(cache, struct extent_record, cache);
7848                         set_extent_dirty(root->fs_info->excluded_extents,
7849                                          rec->start,
7850                                          rec->start + rec->max_size - 1,
7851                                          GFP_NOFS);
7852                         cache = next_cache_extent(cache);
7853                 }
7854
7855                 /* pin down all the corrupted blocks too */
7856                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7857                 while(cache) {
7858                         set_extent_dirty(root->fs_info->excluded_extents,
7859                                          cache->start,
7860                                          cache->start + cache->size - 1,
7861                                          GFP_NOFS);
7862                         cache = next_cache_extent(cache);
7863                 }
7864                 prune_corrupt_blocks(root->fs_info);
7865                 reset_cached_block_groups(root->fs_info);
7866         }
7867
7868         reset_cached_block_groups(root->fs_info);
7869
7870         /*
7871          * We need to delete any duplicate entries we find first otherwise we
7872          * could mess up the extent tree when we have backrefs that actually
7873          * belong to a different extent item and not the weird duplicate one.
7874          */
7875         while (repair && !list_empty(&duplicate_extents)) {
7876                 rec = to_extent_record(duplicate_extents.next);
7877                 list_del_init(&rec->list);
7878
7879                 /* Sometimes we can find a backref before we find an actual
7880                  * extent, so we need to process it a little bit to see if there
7881                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7882                  * if this is a backref screwup.  If we need to delete stuff
7883                  * process_duplicates() will return 0, otherwise it will return
7884                  * 1 and we
7885                  */
7886                 if (process_duplicates(root, extent_cache, rec))
7887                         continue;
7888                 ret = delete_duplicate_records(root, rec);
7889                 if (ret < 0)
7890                         return ret;
7891                 /*
7892                  * delete_duplicate_records will return the number of entries
7893                  * deleted, so if it's greater than 0 then we know we actually
7894                  * did something and we need to remove.
7895                  */
7896                 if (ret)
7897                         had_dups = 1;
7898         }
7899
7900         if (had_dups)
7901                 return -EAGAIN;
7902
7903         while(1) {
7904                 int cur_err = 0;
7905
7906                 fixed = 0;
7907                 recorded = 0;
7908                 cache = search_cache_extent(extent_cache, 0);
7909                 if (!cache)
7910                         break;
7911                 rec = container_of(cache, struct extent_record, cache);
7912                 if (rec->num_duplicates) {
7913                         fprintf(stderr, "extent item %llu has multiple extent "
7914                                 "items\n", (unsigned long long)rec->start);
7915                         err = 1;
7916                         cur_err = 1;
7917                 }
7918
7919                 if (rec->refs != rec->extent_item_refs) {
7920                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7921                                 (unsigned long long)rec->start,
7922                                 (unsigned long long)rec->nr);
7923                         fprintf(stderr, "extent item %llu, found %llu\n",
7924                                 (unsigned long long)rec->extent_item_refs,
7925                                 (unsigned long long)rec->refs);
7926                         ret = record_orphan_data_extents(root->fs_info, rec);
7927                         if (ret < 0)
7928                                 goto repair_abort;
7929                         if (ret == 0) {
7930                                 recorded = 1;
7931                         } else {
7932                                 /*
7933                                  * we can't use the extent to repair file
7934                                  * extent, let the fallback method handle it.
7935                                  */
7936                                 if (!fixed && repair) {
7937                                         ret = fixup_extent_refs(
7938                                                         root->fs_info,
7939                                                         extent_cache, rec);
7940                                         if (ret)
7941                                                 goto repair_abort;
7942                                         fixed = 1;
7943                                 }
7944                         }
7945                         err = 1;
7946                         cur_err = 1;
7947                 }
7948                 if (all_backpointers_checked(rec, 1)) {
7949                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7950                                 (unsigned long long)rec->start,
7951                                 (unsigned long long)rec->nr);
7952
7953                         if (!fixed && !recorded && repair) {
7954                                 ret = fixup_extent_refs(root->fs_info,
7955                                                         extent_cache, rec);
7956                                 if (ret)
7957                                         goto repair_abort;
7958                                 fixed = 1;
7959                         }
7960                         cur_err = 1;
7961                         err = 1;
7962                 }
7963                 if (!rec->owner_ref_checked) {
7964                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7965                                 (unsigned long long)rec->start,
7966                                 (unsigned long long)rec->nr);
7967                         if (!fixed && !recorded && repair) {
7968                                 ret = fixup_extent_refs(root->fs_info,
7969                                                         extent_cache, rec);
7970                                 if (ret)
7971                                         goto repair_abort;
7972                                 fixed = 1;
7973                         }
7974                         err = 1;
7975                         cur_err = 1;
7976                 }
7977                 if (rec->bad_full_backref) {
7978                         fprintf(stderr, "bad full backref, on [%llu]\n",
7979                                 (unsigned long long)rec->start);
7980                         if (repair) {
7981                                 ret = fixup_extent_flags(root->fs_info, rec);
7982                                 if (ret)
7983                                         goto repair_abort;
7984                                 fixed = 1;
7985                         }
7986                         err = 1;
7987                         cur_err = 1;
7988                 }
7989                 /*
7990                  * Although it's not a extent ref's problem, we reuse this
7991                  * routine for error reporting.
7992                  * No repair function yet.
7993                  */
7994                 if (rec->crossing_stripes) {
7995                         fprintf(stderr,
7996                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7997                                 rec->start, rec->start + rec->max_size);
7998                         err = 1;
7999                         cur_err = 1;
8000                 }
8001
8002                 if (rec->wrong_chunk_type) {
8003                         fprintf(stderr,
8004                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8005                                 rec->start, rec->start + rec->max_size);
8006                         err = 1;
8007                         cur_err = 1;
8008                 }
8009
8010                 remove_cache_extent(extent_cache, cache);
8011                 free_all_extent_backrefs(rec);
8012                 if (!init_extent_tree && repair && (!cur_err || fixed))
8013                         clear_extent_dirty(root->fs_info->excluded_extents,
8014                                            rec->start,
8015                                            rec->start + rec->max_size - 1,
8016                                            GFP_NOFS);
8017                 free(rec);
8018         }
8019 repair_abort:
8020         if (repair) {
8021                 if (ret && ret != -EAGAIN) {
8022                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8023                         exit(1);
8024                 } else if (!ret) {
8025                         struct btrfs_trans_handle *trans;
8026
8027                         root = root->fs_info->extent_root;
8028                         trans = btrfs_start_transaction(root, 1);
8029                         if (IS_ERR(trans)) {
8030                                 ret = PTR_ERR(trans);
8031                                 goto repair_abort;
8032                         }
8033
8034                         btrfs_fix_block_accounting(trans, root);
8035                         ret = btrfs_commit_transaction(trans, root);
8036                         if (ret)
8037                                 goto repair_abort;
8038                 }
8039                 if (err)
8040                         fprintf(stderr, "repaired damaged extent references\n");
8041                 return ret;
8042         }
8043         return err;
8044 }
8045
8046 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8047 {
8048         u64 stripe_size;
8049
8050         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8051                 stripe_size = length;
8052                 stripe_size /= num_stripes;
8053         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8054                 stripe_size = length * 2;
8055                 stripe_size /= num_stripes;
8056         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8057                 stripe_size = length;
8058                 stripe_size /= (num_stripes - 1);
8059         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8060                 stripe_size = length;
8061                 stripe_size /= (num_stripes - 2);
8062         } else {
8063                 stripe_size = length;
8064         }
8065         return stripe_size;
8066 }
8067
8068 /*
8069  * Check the chunk with its block group/dev list ref:
8070  * Return 0 if all refs seems valid.
8071  * Return 1 if part of refs seems valid, need later check for rebuild ref
8072  * like missing block group and needs to search extent tree to rebuild them.
8073  * Return -1 if essential refs are missing and unable to rebuild.
8074  */
8075 static int check_chunk_refs(struct chunk_record *chunk_rec,
8076                             struct block_group_tree *block_group_cache,
8077                             struct device_extent_tree *dev_extent_cache,
8078                             int silent)
8079 {
8080         struct cache_extent *block_group_item;
8081         struct block_group_record *block_group_rec;
8082         struct cache_extent *dev_extent_item;
8083         struct device_extent_record *dev_extent_rec;
8084         u64 devid;
8085         u64 offset;
8086         u64 length;
8087         int metadump_v2 = 0;
8088         int i;
8089         int ret = 0;
8090
8091         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8092                                                chunk_rec->offset,
8093                                                chunk_rec->length);
8094         if (block_group_item) {
8095                 block_group_rec = container_of(block_group_item,
8096                                                struct block_group_record,
8097                                                cache);
8098                 if (chunk_rec->length != block_group_rec->offset ||
8099                     chunk_rec->offset != block_group_rec->objectid ||
8100                     (!metadump_v2 &&
8101                      chunk_rec->type_flags != block_group_rec->flags)) {
8102                         if (!silent)
8103                                 fprintf(stderr,
8104                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8105                                         chunk_rec->objectid,
8106                                         chunk_rec->type,
8107                                         chunk_rec->offset,
8108                                         chunk_rec->length,
8109                                         chunk_rec->offset,
8110                                         chunk_rec->type_flags,
8111                                         block_group_rec->objectid,
8112                                         block_group_rec->type,
8113                                         block_group_rec->offset,
8114                                         block_group_rec->offset,
8115                                         block_group_rec->objectid,
8116                                         block_group_rec->flags);
8117                         ret = -1;
8118                 } else {
8119                         list_del_init(&block_group_rec->list);
8120                         chunk_rec->bg_rec = block_group_rec;
8121                 }
8122         } else {
8123                 if (!silent)
8124                         fprintf(stderr,
8125                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8126                                 chunk_rec->objectid,
8127                                 chunk_rec->type,
8128                                 chunk_rec->offset,
8129                                 chunk_rec->length,
8130                                 chunk_rec->offset,
8131                                 chunk_rec->type_flags);
8132                 ret = 1;
8133         }
8134
8135         if (metadump_v2)
8136                 return ret;
8137
8138         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8139                                     chunk_rec->num_stripes);
8140         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8141                 devid = chunk_rec->stripes[i].devid;
8142                 offset = chunk_rec->stripes[i].offset;
8143                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8144                                                        devid, offset, length);
8145                 if (dev_extent_item) {
8146                         dev_extent_rec = container_of(dev_extent_item,
8147                                                 struct device_extent_record,
8148                                                 cache);
8149                         if (dev_extent_rec->objectid != devid ||
8150                             dev_extent_rec->offset != offset ||
8151                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8152                             dev_extent_rec->length != length) {
8153                                 if (!silent)
8154                                         fprintf(stderr,
8155                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8156                                                 chunk_rec->objectid,
8157                                                 chunk_rec->type,
8158                                                 chunk_rec->offset,
8159                                                 chunk_rec->stripes[i].devid,
8160                                                 chunk_rec->stripes[i].offset,
8161                                                 dev_extent_rec->objectid,
8162                                                 dev_extent_rec->offset,
8163                                                 dev_extent_rec->length);
8164                                 ret = -1;
8165                         } else {
8166                                 list_move(&dev_extent_rec->chunk_list,
8167                                           &chunk_rec->dextents);
8168                         }
8169                 } else {
8170                         if (!silent)
8171                                 fprintf(stderr,
8172                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8173                                         chunk_rec->objectid,
8174                                         chunk_rec->type,
8175                                         chunk_rec->offset,
8176                                         chunk_rec->stripes[i].devid,
8177                                         chunk_rec->stripes[i].offset);
8178                         ret = -1;
8179                 }
8180         }
8181         return ret;
8182 }
8183
8184 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8185 int check_chunks(struct cache_tree *chunk_cache,
8186                  struct block_group_tree *block_group_cache,
8187                  struct device_extent_tree *dev_extent_cache,
8188                  struct list_head *good, struct list_head *bad,
8189                  struct list_head *rebuild, int silent)
8190 {
8191         struct cache_extent *chunk_item;
8192         struct chunk_record *chunk_rec;
8193         struct block_group_record *bg_rec;
8194         struct device_extent_record *dext_rec;
8195         int err;
8196         int ret = 0;
8197
8198         chunk_item = first_cache_extent(chunk_cache);
8199         while (chunk_item) {
8200                 chunk_rec = container_of(chunk_item, struct chunk_record,
8201                                          cache);
8202                 err = check_chunk_refs(chunk_rec, block_group_cache,
8203                                        dev_extent_cache, silent);
8204                 if (err < 0)
8205                         ret = err;
8206                 if (err == 0 && good)
8207                         list_add_tail(&chunk_rec->list, good);
8208                 if (err > 0 && rebuild)
8209                         list_add_tail(&chunk_rec->list, rebuild);
8210                 if (err < 0 && bad)
8211                         list_add_tail(&chunk_rec->list, bad);
8212                 chunk_item = next_cache_extent(chunk_item);
8213         }
8214
8215         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8216                 if (!silent)
8217                         fprintf(stderr,
8218                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8219                                 bg_rec->objectid,
8220                                 bg_rec->offset,
8221                                 bg_rec->flags);
8222                 if (!ret)
8223                         ret = 1;
8224         }
8225
8226         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8227                             chunk_list) {
8228                 if (!silent)
8229                         fprintf(stderr,
8230                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8231                                 dext_rec->objectid,
8232                                 dext_rec->offset,
8233                                 dext_rec->length);
8234                 if (!ret)
8235                         ret = 1;
8236         }
8237         return ret;
8238 }
8239
8240
8241 static int check_device_used(struct device_record *dev_rec,
8242                              struct device_extent_tree *dext_cache)
8243 {
8244         struct cache_extent *cache;
8245         struct device_extent_record *dev_extent_rec;
8246         u64 total_byte = 0;
8247
8248         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8249         while (cache) {
8250                 dev_extent_rec = container_of(cache,
8251                                               struct device_extent_record,
8252                                               cache);
8253                 if (dev_extent_rec->objectid != dev_rec->devid)
8254                         break;
8255
8256                 list_del_init(&dev_extent_rec->device_list);
8257                 total_byte += dev_extent_rec->length;
8258                 cache = next_cache_extent(cache);
8259         }
8260
8261         if (total_byte != dev_rec->byte_used) {
8262                 fprintf(stderr,
8263                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8264                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8265                         dev_rec->type, dev_rec->offset);
8266                 return -1;
8267         } else {
8268                 return 0;
8269         }
8270 }
8271
8272 /* check btrfs_dev_item -> btrfs_dev_extent */
8273 static int check_devices(struct rb_root *dev_cache,
8274                          struct device_extent_tree *dev_extent_cache)
8275 {
8276         struct rb_node *dev_node;
8277         struct device_record *dev_rec;
8278         struct device_extent_record *dext_rec;
8279         int err;
8280         int ret = 0;
8281
8282         dev_node = rb_first(dev_cache);
8283         while (dev_node) {
8284                 dev_rec = container_of(dev_node, struct device_record, node);
8285                 err = check_device_used(dev_rec, dev_extent_cache);
8286                 if (err)
8287                         ret = err;
8288
8289                 dev_node = rb_next(dev_node);
8290         }
8291         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8292                             device_list) {
8293                 fprintf(stderr,
8294                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8295                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8296                 if (!ret)
8297                         ret = 1;
8298         }
8299         return ret;
8300 }
8301
8302 static int add_root_item_to_list(struct list_head *head,
8303                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8304                                   u8 level, u8 drop_level,
8305                                   int level_size, struct btrfs_key *drop_key)
8306 {
8307
8308         struct root_item_record *ri_rec;
8309         ri_rec = malloc(sizeof(*ri_rec));
8310         if (!ri_rec)
8311                 return -ENOMEM;
8312         ri_rec->bytenr = bytenr;
8313         ri_rec->objectid = objectid;
8314         ri_rec->level = level;
8315         ri_rec->level_size = level_size;
8316         ri_rec->drop_level = drop_level;
8317         ri_rec->last_snapshot = last_snapshot;
8318         if (drop_key)
8319                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8320         list_add_tail(&ri_rec->list, head);
8321
8322         return 0;
8323 }
8324
8325 static void free_root_item_list(struct list_head *list)
8326 {
8327         struct root_item_record *ri_rec;
8328
8329         while (!list_empty(list)) {
8330                 ri_rec = list_first_entry(list, struct root_item_record,
8331                                           list);
8332                 list_del_init(&ri_rec->list);
8333                 free(ri_rec);
8334         }
8335 }
8336
8337 static int deal_root_from_list(struct list_head *list,
8338                                struct btrfs_root *root,
8339                                struct block_info *bits,
8340                                int bits_nr,
8341                                struct cache_tree *pending,
8342                                struct cache_tree *seen,
8343                                struct cache_tree *reada,
8344                                struct cache_tree *nodes,
8345                                struct cache_tree *extent_cache,
8346                                struct cache_tree *chunk_cache,
8347                                struct rb_root *dev_cache,
8348                                struct block_group_tree *block_group_cache,
8349                                struct device_extent_tree *dev_extent_cache)
8350 {
8351         int ret = 0;
8352         u64 last;
8353
8354         while (!list_empty(list)) {
8355                 struct root_item_record *rec;
8356                 struct extent_buffer *buf;
8357                 rec = list_entry(list->next,
8358                                  struct root_item_record, list);
8359                 last = 0;
8360                 buf = read_tree_block(root->fs_info->tree_root,
8361                                       rec->bytenr, rec->level_size, 0);
8362                 if (!extent_buffer_uptodate(buf)) {
8363                         free_extent_buffer(buf);
8364                         ret = -EIO;
8365                         break;
8366                 }
8367                 ret = add_root_to_pending(buf, extent_cache, pending,
8368                                     seen, nodes, rec->objectid);
8369                 if (ret < 0)
8370                         break;
8371                 /*
8372                  * To rebuild extent tree, we need deal with snapshot
8373                  * one by one, otherwise we deal with node firstly which
8374                  * can maximize readahead.
8375                  */
8376                 while (1) {
8377                         ret = run_next_block(root, bits, bits_nr, &last,
8378                                              pending, seen, reada, nodes,
8379                                              extent_cache, chunk_cache,
8380                                              dev_cache, block_group_cache,
8381                                              dev_extent_cache, rec);
8382                         if (ret != 0)
8383                                 break;
8384                 }
8385                 free_extent_buffer(buf);
8386                 list_del(&rec->list);
8387                 free(rec);
8388                 if (ret < 0)
8389                         break;
8390         }
8391         while (ret >= 0) {
8392                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8393                                      reada, nodes, extent_cache, chunk_cache,
8394                                      dev_cache, block_group_cache,
8395                                      dev_extent_cache, NULL);
8396                 if (ret != 0) {
8397                         if (ret > 0)
8398                                 ret = 0;
8399                         break;
8400                 }
8401         }
8402         return ret;
8403 }
8404
8405 static int check_chunks_and_extents(struct btrfs_root *root)
8406 {
8407         struct rb_root dev_cache;
8408         struct cache_tree chunk_cache;
8409         struct block_group_tree block_group_cache;
8410         struct device_extent_tree dev_extent_cache;
8411         struct cache_tree extent_cache;
8412         struct cache_tree seen;
8413         struct cache_tree pending;
8414         struct cache_tree reada;
8415         struct cache_tree nodes;
8416         struct extent_io_tree excluded_extents;
8417         struct cache_tree corrupt_blocks;
8418         struct btrfs_path path;
8419         struct btrfs_key key;
8420         struct btrfs_key found_key;
8421         int ret, err = 0;
8422         struct block_info *bits;
8423         int bits_nr;
8424         struct extent_buffer *leaf;
8425         int slot;
8426         struct btrfs_root_item ri;
8427         struct list_head dropping_trees;
8428         struct list_head normal_trees;
8429         struct btrfs_root *root1;
8430         u64 objectid;
8431         u32 level_size;
8432         u8 level;
8433
8434         dev_cache = RB_ROOT;
8435         cache_tree_init(&chunk_cache);
8436         block_group_tree_init(&block_group_cache);
8437         device_extent_tree_init(&dev_extent_cache);
8438
8439         cache_tree_init(&extent_cache);
8440         cache_tree_init(&seen);
8441         cache_tree_init(&pending);
8442         cache_tree_init(&nodes);
8443         cache_tree_init(&reada);
8444         cache_tree_init(&corrupt_blocks);
8445         extent_io_tree_init(&excluded_extents);
8446         INIT_LIST_HEAD(&dropping_trees);
8447         INIT_LIST_HEAD(&normal_trees);
8448
8449         if (repair) {
8450                 root->fs_info->excluded_extents = &excluded_extents;
8451                 root->fs_info->fsck_extent_cache = &extent_cache;
8452                 root->fs_info->free_extent_hook = free_extent_hook;
8453                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8454         }
8455
8456         bits_nr = 1024;
8457         bits = malloc(bits_nr * sizeof(struct block_info));
8458         if (!bits) {
8459                 perror("malloc");
8460                 exit(1);
8461         }
8462
8463         if (ctx.progress_enabled) {
8464                 ctx.tp = TASK_EXTENTS;
8465                 task_start(ctx.info);
8466         }
8467
8468 again:
8469         root1 = root->fs_info->tree_root;
8470         level = btrfs_header_level(root1->node);
8471         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8472                                     root1->node->start, 0, level, 0,
8473                                     root1->nodesize, NULL);
8474         if (ret < 0)
8475                 goto out;
8476         root1 = root->fs_info->chunk_root;
8477         level = btrfs_header_level(root1->node);
8478         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8479                                     root1->node->start, 0, level, 0,
8480                                     root1->nodesize, NULL);
8481         if (ret < 0)
8482                 goto out;
8483         btrfs_init_path(&path);
8484         key.offset = 0;
8485         key.objectid = 0;
8486         key.type = BTRFS_ROOT_ITEM_KEY;
8487         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8488                                         &key, &path, 0, 0);
8489         if (ret < 0)
8490                 goto out;
8491         while(1) {
8492                 leaf = path.nodes[0];
8493                 slot = path.slots[0];
8494                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8495                         ret = btrfs_next_leaf(root, &path);
8496                         if (ret != 0)
8497                                 break;
8498                         leaf = path.nodes[0];
8499                         slot = path.slots[0];
8500                 }
8501                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8502                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8503                         unsigned long offset;
8504                         u64 last_snapshot;
8505
8506                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8507                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8508                         last_snapshot = btrfs_root_last_snapshot(&ri);
8509                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8510                                 level = btrfs_root_level(&ri);
8511                                 level_size = root->nodesize;
8512                                 ret = add_root_item_to_list(&normal_trees,
8513                                                 found_key.objectid,
8514                                                 btrfs_root_bytenr(&ri),
8515                                                 last_snapshot, level,
8516                                                 0, level_size, NULL);
8517                                 if (ret < 0)
8518                                         goto out;
8519                         } else {
8520                                 level = btrfs_root_level(&ri);
8521                                 level_size = root->nodesize;
8522                                 objectid = found_key.objectid;
8523                                 btrfs_disk_key_to_cpu(&found_key,
8524                                                       &ri.drop_progress);
8525                                 ret = add_root_item_to_list(&dropping_trees,
8526                                                 objectid,
8527                                                 btrfs_root_bytenr(&ri),
8528                                                 last_snapshot, level,
8529                                                 ri.drop_level,
8530                                                 level_size, &found_key);
8531                                 if (ret < 0)
8532                                         goto out;
8533                         }
8534                 }
8535                 path.slots[0]++;
8536         }
8537         btrfs_release_path(&path);
8538
8539         /*
8540          * check_block can return -EAGAIN if it fixes something, please keep
8541          * this in mind when dealing with return values from these functions, if
8542          * we get -EAGAIN we want to fall through and restart the loop.
8543          */
8544         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8545                                   &seen, &reada, &nodes, &extent_cache,
8546                                   &chunk_cache, &dev_cache, &block_group_cache,
8547                                   &dev_extent_cache);
8548         if (ret < 0) {
8549                 if (ret == -EAGAIN)
8550                         goto loop;
8551                 goto out;
8552         }
8553         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8554                                   &pending, &seen, &reada, &nodes,
8555                                   &extent_cache, &chunk_cache, &dev_cache,
8556                                   &block_group_cache, &dev_extent_cache);
8557         if (ret < 0) {
8558                 if (ret == -EAGAIN)
8559                         goto loop;
8560                 goto out;
8561         }
8562
8563         ret = check_chunks(&chunk_cache, &block_group_cache,
8564                            &dev_extent_cache, NULL, NULL, NULL, 0);
8565         if (ret) {
8566                 if (ret == -EAGAIN)
8567                         goto loop;
8568                 err = ret;
8569         }
8570
8571         ret = check_extent_refs(root, &extent_cache);
8572         if (ret < 0) {
8573                 if (ret == -EAGAIN)
8574                         goto loop;
8575                 goto out;
8576         }
8577
8578         ret = check_devices(&dev_cache, &dev_extent_cache);
8579         if (ret && err)
8580                 ret = err;
8581
8582 out:
8583         task_stop(ctx.info);
8584         if (repair) {
8585                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8586                 extent_io_tree_cleanup(&excluded_extents);
8587                 root->fs_info->fsck_extent_cache = NULL;
8588                 root->fs_info->free_extent_hook = NULL;
8589                 root->fs_info->corrupt_blocks = NULL;
8590                 root->fs_info->excluded_extents = NULL;
8591         }
8592         free(bits);
8593         free_chunk_cache_tree(&chunk_cache);
8594         free_device_cache_tree(&dev_cache);
8595         free_block_group_tree(&block_group_cache);
8596         free_device_extent_tree(&dev_extent_cache);
8597         free_extent_cache_tree(&seen);
8598         free_extent_cache_tree(&pending);
8599         free_extent_cache_tree(&reada);
8600         free_extent_cache_tree(&nodes);
8601         return ret;
8602 loop:
8603         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8604         free_extent_cache_tree(&seen);
8605         free_extent_cache_tree(&pending);
8606         free_extent_cache_tree(&reada);
8607         free_extent_cache_tree(&nodes);
8608         free_chunk_cache_tree(&chunk_cache);
8609         free_block_group_tree(&block_group_cache);
8610         free_device_cache_tree(&dev_cache);
8611         free_device_extent_tree(&dev_extent_cache);
8612         free_extent_record_cache(root->fs_info, &extent_cache);
8613         free_root_item_list(&normal_trees);
8614         free_root_item_list(&dropping_trees);
8615         extent_io_tree_cleanup(&excluded_extents);
8616         goto again;
8617 }
8618
8619 /*
8620  * Check backrefs of a tree block given by @bytenr or @eb.
8621  *
8622  * @root:       the root containing the @bytenr or @eb
8623  * @eb:         tree block extent buffer, can be NULL
8624  * @bytenr:     bytenr of the tree block to search
8625  * @level:      tree level of the tree block
8626  * @owner:      owner of the tree block
8627  *
8628  * Return >0 for any error found and output error message
8629  * Return 0 for no error found
8630  */
8631 static int check_tree_block_ref(struct btrfs_root *root,
8632                                 struct extent_buffer *eb, u64 bytenr,
8633                                 int level, u64 owner)
8634 {
8635         struct btrfs_key key;
8636         struct btrfs_root *extent_root = root->fs_info->extent_root;
8637         struct btrfs_path path;
8638         struct btrfs_extent_item *ei;
8639         struct btrfs_extent_inline_ref *iref;
8640         struct extent_buffer *leaf;
8641         unsigned long end;
8642         unsigned long ptr;
8643         int slot;
8644         int skinny_level;
8645         int type;
8646         u32 nodesize = root->nodesize;
8647         u32 item_size;
8648         u64 offset;
8649         int found_ref = 0;
8650         int err = 0;
8651         int ret;
8652
8653         btrfs_init_path(&path);
8654         key.objectid = bytenr;
8655         if (btrfs_fs_incompat(root->fs_info,
8656                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8657                 key.type = BTRFS_METADATA_ITEM_KEY;
8658         else
8659                 key.type = BTRFS_EXTENT_ITEM_KEY;
8660         key.offset = (u64)-1;
8661
8662         /* Search for the backref in extent tree */
8663         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8664         if (ret < 0) {
8665                 err |= BACKREF_MISSING;
8666                 goto out;
8667         }
8668         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8669         if (ret) {
8670                 err |= BACKREF_MISSING;
8671                 goto out;
8672         }
8673
8674         leaf = path.nodes[0];
8675         slot = path.slots[0];
8676         btrfs_item_key_to_cpu(leaf, &key, slot);
8677
8678         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8679
8680         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8681                 skinny_level = (int)key.offset;
8682                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8683         } else {
8684                 struct btrfs_tree_block_info *info;
8685
8686                 info = (struct btrfs_tree_block_info *)(ei + 1);
8687                 skinny_level = btrfs_tree_block_level(leaf, info);
8688                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8689         }
8690
8691         if (eb) {
8692                 u64 header_gen;
8693                 u64 extent_gen;
8694
8695                 if (!(btrfs_extent_flags(leaf, ei) &
8696                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8697                         error(
8698                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8699                                 key.objectid, nodesize,
8700                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8701                         err = BACKREF_MISMATCH;
8702                 }
8703                 header_gen = btrfs_header_generation(eb);
8704                 extent_gen = btrfs_extent_generation(leaf, ei);
8705                 if (header_gen != extent_gen) {
8706                         error(
8707         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8708                                 key.objectid, nodesize, header_gen,
8709                                 extent_gen);
8710                         err = BACKREF_MISMATCH;
8711                 }
8712                 if (level != skinny_level) {
8713                         error(
8714                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8715                                 key.objectid, nodesize, level, skinny_level);
8716                         err = BACKREF_MISMATCH;
8717                 }
8718                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8719                         error(
8720                         "extent[%llu %u] is referred by other roots than %llu",
8721                                 key.objectid, nodesize, root->objectid);
8722                         err = BACKREF_MISMATCH;
8723                 }
8724         }
8725
8726         /*
8727          * Iterate the extent/metadata item to find the exact backref
8728          */
8729         item_size = btrfs_item_size_nr(leaf, slot);
8730         ptr = (unsigned long)iref;
8731         end = (unsigned long)ei + item_size;
8732         while (ptr < end) {
8733                 iref = (struct btrfs_extent_inline_ref *)ptr;
8734                 type = btrfs_extent_inline_ref_type(leaf, iref);
8735                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8736
8737                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8738                         (offset == root->objectid || offset == owner)) {
8739                         found_ref = 1;
8740                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8741                         /* Check if the backref points to valid referencer */
8742                         found_ref = !check_tree_block_ref(root, NULL, offset,
8743                                                           level + 1, owner);
8744                 }
8745
8746                 if (found_ref)
8747                         break;
8748                 ptr += btrfs_extent_inline_ref_size(type);
8749         }
8750
8751         /*
8752          * Inlined extent item doesn't have what we need, check
8753          * TREE_BLOCK_REF_KEY
8754          */
8755         if (!found_ref) {
8756                 btrfs_release_path(&path);
8757                 key.objectid = bytenr;
8758                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8759                 key.offset = root->objectid;
8760
8761                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8762                 if (!ret)
8763                         found_ref = 1;
8764         }
8765         if (!found_ref)
8766                 err |= BACKREF_MISSING;
8767 out:
8768         btrfs_release_path(&path);
8769         if (eb && (err & BACKREF_MISSING))
8770                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8771                         bytenr, nodesize, owner, level);
8772         return err;
8773 }
8774
8775 /*
8776  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8777  *
8778  * Return >0 any error found and output error message
8779  * Return 0 for no error found
8780  */
8781 static int check_extent_data_item(struct btrfs_root *root,
8782                                   struct extent_buffer *eb, int slot)
8783 {
8784         struct btrfs_file_extent_item *fi;
8785         struct btrfs_path path;
8786         struct btrfs_root *extent_root = root->fs_info->extent_root;
8787         struct btrfs_key fi_key;
8788         struct btrfs_key dbref_key;
8789         struct extent_buffer *leaf;
8790         struct btrfs_extent_item *ei;
8791         struct btrfs_extent_inline_ref *iref;
8792         struct btrfs_extent_data_ref *dref;
8793         u64 owner;
8794         u64 file_extent_gen;
8795         u64 disk_bytenr;
8796         u64 disk_num_bytes;
8797         u64 extent_num_bytes;
8798         u64 extent_flags;
8799         u64 extent_gen;
8800         u32 item_size;
8801         unsigned long end;
8802         unsigned long ptr;
8803         int type;
8804         u64 ref_root;
8805         int found_dbackref = 0;
8806         int err = 0;
8807         int ret;
8808
8809         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8810         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8811         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8812
8813         /* Nothing to check for hole and inline data extents */
8814         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8815             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8816                 return 0;
8817
8818         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8819         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8820         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8821
8822         /* Check unaligned disk_num_bytes and num_bytes */
8823         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8824                 error(
8825 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8826                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8827                         root->sectorsize);
8828                 err |= BYTES_UNALIGNED;
8829         } else {
8830                 data_bytes_allocated += disk_num_bytes;
8831         }
8832         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8833                 error(
8834 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8835                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8836                         root->sectorsize);
8837                 err |= BYTES_UNALIGNED;
8838         } else {
8839                 data_bytes_referenced += extent_num_bytes;
8840         }
8841         owner = btrfs_header_owner(eb);
8842
8843         /* Check the extent item of the file extent in extent tree */
8844         btrfs_init_path(&path);
8845         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8846         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8847         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8848
8849         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8850         if (ret) {
8851                 err |= BACKREF_MISSING;
8852                 goto error;
8853         }
8854
8855         leaf = path.nodes[0];
8856         slot = path.slots[0];
8857         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8858
8859         extent_flags = btrfs_extent_flags(leaf, ei);
8860         extent_gen = btrfs_extent_generation(leaf, ei);
8861
8862         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8863                 error(
8864                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8865                     disk_bytenr, disk_num_bytes,
8866                     BTRFS_EXTENT_FLAG_DATA);
8867                 err |= BACKREF_MISMATCH;
8868         }
8869
8870         if (file_extent_gen < extent_gen) {
8871                 error(
8872 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8873                         disk_bytenr, disk_num_bytes, file_extent_gen,
8874                         extent_gen);
8875                 err |= BACKREF_MISMATCH;
8876         }
8877
8878         /* Check data backref inside that extent item */
8879         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8880         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8881         ptr = (unsigned long)iref;
8882         end = (unsigned long)ei + item_size;
8883         while (ptr < end) {
8884                 iref = (struct btrfs_extent_inline_ref *)ptr;
8885                 type = btrfs_extent_inline_ref_type(leaf, iref);
8886                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8887
8888                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8889                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8890                         if (ref_root == owner || ref_root == root->objectid)
8891                                 found_dbackref = 1;
8892                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8893                         found_dbackref = !check_tree_block_ref(root, NULL,
8894                                 btrfs_extent_inline_ref_offset(leaf, iref),
8895                                 0, owner);
8896                 }
8897
8898                 if (found_dbackref)
8899                         break;
8900                 ptr += btrfs_extent_inline_ref_size(type);
8901         }
8902
8903         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8904         if (!found_dbackref) {
8905                 btrfs_release_path(&path);
8906
8907                 btrfs_init_path(&path);
8908                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8909                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8910                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8911                                 fi_key.objectid, fi_key.offset);
8912
8913                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8914                                         &dbref_key, &path, 0, 0);
8915                 if (!ret)
8916                         found_dbackref = 1;
8917         }
8918
8919         if (!found_dbackref)
8920                 err |= BACKREF_MISSING;
8921 error:
8922         btrfs_release_path(&path);
8923         if (err & BACKREF_MISSING) {
8924                 error("data extent[%llu %llu] backref lost",
8925                       disk_bytenr, disk_num_bytes);
8926         }
8927         return err;
8928 }
8929
8930 /*
8931  * Get real tree block level for the case like shared block
8932  * Return >= 0 as tree level
8933  * Return <0 for error
8934  */
8935 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8936 {
8937         struct extent_buffer *eb;
8938         struct btrfs_path path;
8939         struct btrfs_key key;
8940         struct btrfs_extent_item *ei;
8941         u64 flags;
8942         u64 transid;
8943         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8944         u8 backref_level;
8945         u8 header_level;
8946         int ret;
8947
8948         /* Search extent tree for extent generation and level */
8949         key.objectid = bytenr;
8950         key.type = BTRFS_METADATA_ITEM_KEY;
8951         key.offset = (u64)-1;
8952
8953         btrfs_init_path(&path);
8954         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8955         if (ret < 0)
8956                 goto release_out;
8957         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8958         if (ret < 0)
8959                 goto release_out;
8960         if (ret > 0) {
8961                 ret = -ENOENT;
8962                 goto release_out;
8963         }
8964
8965         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8966         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8967                             struct btrfs_extent_item);
8968         flags = btrfs_extent_flags(path.nodes[0], ei);
8969         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8970                 ret = -ENOENT;
8971                 goto release_out;
8972         }
8973
8974         /* Get transid for later read_tree_block() check */
8975         transid = btrfs_extent_generation(path.nodes[0], ei);
8976
8977         /* Get backref level as one source */
8978         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8979                 backref_level = key.offset;
8980         } else {
8981                 struct btrfs_tree_block_info *info;
8982
8983                 info = (struct btrfs_tree_block_info *)(ei + 1);
8984                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8985         }
8986         btrfs_release_path(&path);
8987
8988         /* Get level from tree block as an alternative source */
8989         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8990         if (!extent_buffer_uptodate(eb)) {
8991                 free_extent_buffer(eb);
8992                 return -EIO;
8993         }
8994         header_level = btrfs_header_level(eb);
8995         free_extent_buffer(eb);
8996
8997         if (header_level != backref_level)
8998                 return -EIO;
8999         return header_level;
9000
9001 release_out:
9002         btrfs_release_path(&path);
9003         return ret;
9004 }
9005
9006 /*
9007  * Check if a tree block backref is valid (points to a valid tree block)
9008  * if level == -1, level will be resolved
9009  * Return >0 for any error found and print error message
9010  */
9011 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9012                                     u64 bytenr, int level)
9013 {
9014         struct btrfs_root *root;
9015         struct btrfs_key key;
9016         struct btrfs_path path;
9017         struct extent_buffer *eb;
9018         struct extent_buffer *node;
9019         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9020         int err = 0;
9021         int ret;
9022
9023         /* Query level for level == -1 special case */
9024         if (level == -1)
9025                 level = query_tree_block_level(fs_info, bytenr);
9026         if (level < 0) {
9027                 err |= REFERENCER_MISSING;
9028                 goto out;
9029         }
9030
9031         key.objectid = root_id;
9032         key.type = BTRFS_ROOT_ITEM_KEY;
9033         key.offset = (u64)-1;
9034
9035         root = btrfs_read_fs_root(fs_info, &key);
9036         if (IS_ERR(root)) {
9037                 err |= REFERENCER_MISSING;
9038                 goto out;
9039         }
9040
9041         /* Read out the tree block to get item/node key */
9042         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9043         if (!extent_buffer_uptodate(eb)) {
9044                 err |= REFERENCER_MISSING;
9045                 free_extent_buffer(eb);
9046                 goto out;
9047         }
9048
9049         /* Empty tree, no need to check key */
9050         if (!btrfs_header_nritems(eb) && !level) {
9051                 free_extent_buffer(eb);
9052                 goto out;
9053         }
9054
9055         if (level)
9056                 btrfs_node_key_to_cpu(eb, &key, 0);
9057         else
9058                 btrfs_item_key_to_cpu(eb, &key, 0);
9059
9060         free_extent_buffer(eb);
9061
9062         btrfs_init_path(&path);
9063         path.lowest_level = level;
9064         /* Search with the first key, to ensure we can reach it */
9065         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9066         if (ret < 0) {
9067                 err |= REFERENCER_MISSING;
9068                 goto release_out;
9069         }
9070
9071         node = path.nodes[level];
9072         if (btrfs_header_bytenr(node) != bytenr) {
9073                 error(
9074         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9075                         bytenr, nodesize, bytenr,
9076                         btrfs_header_bytenr(node));
9077                 err |= REFERENCER_MISMATCH;
9078         }
9079         if (btrfs_header_level(node) != level) {
9080                 error(
9081         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9082                         bytenr, nodesize, level,
9083                         btrfs_header_level(node));
9084                 err |= REFERENCER_MISMATCH;
9085         }
9086
9087 release_out:
9088         btrfs_release_path(&path);
9089 out:
9090         if (err & REFERENCER_MISSING) {
9091                 if (level < 0)
9092                         error("extent [%llu %d] lost referencer (owner: %llu)",
9093                                 bytenr, nodesize, root_id);
9094                 else
9095                         error(
9096                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9097                                 bytenr, nodesize, root_id, level);
9098         }
9099
9100         return err;
9101 }
9102
9103 /*
9104  * Check referencer for shared block backref
9105  * If level == -1, this function will resolve the level.
9106  */
9107 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9108                                      u64 parent, u64 bytenr, int level)
9109 {
9110         struct extent_buffer *eb;
9111         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9112         u32 nr;
9113         int found_parent = 0;
9114         int i;
9115
9116         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9117         if (!extent_buffer_uptodate(eb))
9118                 goto out;
9119
9120         if (level == -1)
9121                 level = query_tree_block_level(fs_info, bytenr);
9122         if (level < 0)
9123                 goto out;
9124
9125         if (level + 1 != btrfs_header_level(eb))
9126                 goto out;
9127
9128         nr = btrfs_header_nritems(eb);
9129         for (i = 0; i < nr; i++) {
9130                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9131                         found_parent = 1;
9132                         break;
9133                 }
9134         }
9135 out:
9136         free_extent_buffer(eb);
9137         if (!found_parent) {
9138                 error(
9139         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9140                         bytenr, nodesize, parent, level);
9141                 return REFERENCER_MISSING;
9142         }
9143         return 0;
9144 }
9145
9146 /*
9147  * Check referencer for normal (inlined) data ref
9148  * If len == 0, it will be resolved by searching in extent tree
9149  */
9150 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9151                                      u64 root_id, u64 objectid, u64 offset,
9152                                      u64 bytenr, u64 len, u32 count)
9153 {
9154         struct btrfs_root *root;
9155         struct btrfs_root *extent_root = fs_info->extent_root;
9156         struct btrfs_key key;
9157         struct btrfs_path path;
9158         struct extent_buffer *leaf;
9159         struct btrfs_file_extent_item *fi;
9160         u32 found_count = 0;
9161         int slot;
9162         int ret = 0;
9163
9164         if (!len) {
9165                 key.objectid = bytenr;
9166                 key.type = BTRFS_EXTENT_ITEM_KEY;
9167                 key.offset = (u64)-1;
9168
9169                 btrfs_init_path(&path);
9170                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9171                 if (ret < 0)
9172                         goto out;
9173                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9174                 if (ret)
9175                         goto out;
9176                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9177                 if (key.objectid != bytenr ||
9178                     key.type != BTRFS_EXTENT_ITEM_KEY)
9179                         goto out;
9180                 len = key.offset;
9181                 btrfs_release_path(&path);
9182         }
9183         key.objectid = root_id;
9184         key.type = BTRFS_ROOT_ITEM_KEY;
9185         key.offset = (u64)-1;
9186         btrfs_init_path(&path);
9187
9188         root = btrfs_read_fs_root(fs_info, &key);
9189         if (IS_ERR(root))
9190                 goto out;
9191
9192         key.objectid = objectid;
9193         key.type = BTRFS_EXTENT_DATA_KEY;
9194         /*
9195          * It can be nasty as data backref offset is
9196          * file offset - file extent offset, which is smaller or
9197          * equal to original backref offset.  The only special case is
9198          * overflow.  So we need to special check and do further search.
9199          */
9200         key.offset = offset & (1ULL << 63) ? 0 : offset;
9201
9202         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9203         if (ret < 0)
9204                 goto out;
9205
9206         /*
9207          * Search afterwards to get correct one
9208          * NOTE: As we must do a comprehensive check on the data backref to
9209          * make sure the dref count also matches, we must iterate all file
9210          * extents for that inode.
9211          */
9212         while (1) {
9213                 leaf = path.nodes[0];
9214                 slot = path.slots[0];
9215
9216                 btrfs_item_key_to_cpu(leaf, &key, slot);
9217                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9218                         break;
9219                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9220                 /*
9221                  * Except normal disk bytenr and disk num bytes, we still
9222                  * need to do extra check on dbackref offset as
9223                  * dbackref offset = file_offset - file_extent_offset
9224                  */
9225                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9226                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9227                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9228                     offset)
9229                         found_count++;
9230
9231                 ret = btrfs_next_item(root, &path);
9232                 if (ret)
9233                         break;
9234         }
9235 out:
9236         btrfs_release_path(&path);
9237         if (found_count != count) {
9238                 error(
9239 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9240                         bytenr, len, root_id, objectid, offset, count, found_count);
9241                 return REFERENCER_MISSING;
9242         }
9243         return 0;
9244 }
9245
9246 /*
9247  * Check if the referencer of a shared data backref exists
9248  */
9249 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9250                                      u64 parent, u64 bytenr)
9251 {
9252         struct extent_buffer *eb;
9253         struct btrfs_key key;
9254         struct btrfs_file_extent_item *fi;
9255         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9256         u32 nr;
9257         int found_parent = 0;
9258         int i;
9259
9260         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9261         if (!extent_buffer_uptodate(eb))
9262                 goto out;
9263
9264         nr = btrfs_header_nritems(eb);
9265         for (i = 0; i < nr; i++) {
9266                 btrfs_item_key_to_cpu(eb, &key, i);
9267                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9268                         continue;
9269
9270                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9271                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9272                         continue;
9273
9274                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9275                         found_parent = 1;
9276                         break;
9277                 }
9278         }
9279
9280 out:
9281         free_extent_buffer(eb);
9282         if (!found_parent) {
9283                 error("shared extent %llu referencer lost (parent: %llu)",
9284                         bytenr, parent);
9285                 return REFERENCER_MISSING;
9286         }
9287         return 0;
9288 }
9289
9290 /*
9291  * This function will check a given extent item, including its backref and
9292  * itself (like crossing stripe boundary and type)
9293  *
9294  * Since we don't use extent_record anymore, introduce new error bit
9295  */
9296 static int check_extent_item(struct btrfs_fs_info *fs_info,
9297                              struct extent_buffer *eb, int slot)
9298 {
9299         struct btrfs_extent_item *ei;
9300         struct btrfs_extent_inline_ref *iref;
9301         struct btrfs_extent_data_ref *dref;
9302         unsigned long end;
9303         unsigned long ptr;
9304         int type;
9305         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9306         u32 item_size = btrfs_item_size_nr(eb, slot);
9307         u64 flags;
9308         u64 offset;
9309         int metadata = 0;
9310         int level;
9311         struct btrfs_key key;
9312         int ret;
9313         int err = 0;
9314
9315         btrfs_item_key_to_cpu(eb, &key, slot);
9316         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9317                 bytes_used += key.offset;
9318         else
9319                 bytes_used += nodesize;
9320
9321         if (item_size < sizeof(*ei)) {
9322                 /*
9323                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9324                  * old thing when on disk format is still un-determined.
9325                  * No need to care about it anymore
9326                  */
9327                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9328                 return -ENOTTY;
9329         }
9330
9331         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9332         flags = btrfs_extent_flags(eb, ei);
9333
9334         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9335                 metadata = 1;
9336         if (metadata && check_crossing_stripes(global_info, key.objectid,
9337                                                eb->len)) {
9338                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9339                       key.objectid, key.objectid + nodesize);
9340                 err |= CROSSING_STRIPE_BOUNDARY;
9341         }
9342
9343         ptr = (unsigned long)(ei + 1);
9344
9345         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9346                 /* Old EXTENT_ITEM metadata */
9347                 struct btrfs_tree_block_info *info;
9348
9349                 info = (struct btrfs_tree_block_info *)ptr;
9350                 level = btrfs_tree_block_level(eb, info);
9351                 ptr += sizeof(struct btrfs_tree_block_info);
9352         } else {
9353                 /* New METADATA_ITEM */
9354                 level = key.offset;
9355         }
9356         end = (unsigned long)ei + item_size;
9357
9358         if (ptr >= end) {
9359                 err |= ITEM_SIZE_MISMATCH;
9360                 goto out;
9361         }
9362
9363         /* Now check every backref in this extent item */
9364 next:
9365         iref = (struct btrfs_extent_inline_ref *)ptr;
9366         type = btrfs_extent_inline_ref_type(eb, iref);
9367         offset = btrfs_extent_inline_ref_offset(eb, iref);
9368         switch (type) {
9369         case BTRFS_TREE_BLOCK_REF_KEY:
9370                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9371                                                level);
9372                 err |= ret;
9373                 break;
9374         case BTRFS_SHARED_BLOCK_REF_KEY:
9375                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9376                                                  level);
9377                 err |= ret;
9378                 break;
9379         case BTRFS_EXTENT_DATA_REF_KEY:
9380                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9381                 ret = check_extent_data_backref(fs_info,
9382                                 btrfs_extent_data_ref_root(eb, dref),
9383                                 btrfs_extent_data_ref_objectid(eb, dref),
9384                                 btrfs_extent_data_ref_offset(eb, dref),
9385                                 key.objectid, key.offset,
9386                                 btrfs_extent_data_ref_count(eb, dref));
9387                 err |= ret;
9388                 break;
9389         case BTRFS_SHARED_DATA_REF_KEY:
9390                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9391                 err |= ret;
9392                 break;
9393         default:
9394                 error("extent[%llu %d %llu] has unknown ref type: %d",
9395                         key.objectid, key.type, key.offset, type);
9396                 err |= UNKNOWN_TYPE;
9397                 goto out;
9398         }
9399
9400         ptr += btrfs_extent_inline_ref_size(type);
9401         if (ptr < end)
9402                 goto next;
9403
9404 out:
9405         return err;
9406 }
9407
9408 /*
9409  * Check if a dev extent item is referred correctly by its chunk
9410  */
9411 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9412                                  struct extent_buffer *eb, int slot)
9413 {
9414         struct btrfs_root *chunk_root = fs_info->chunk_root;
9415         struct btrfs_dev_extent *ptr;
9416         struct btrfs_path path;
9417         struct btrfs_key chunk_key;
9418         struct btrfs_key devext_key;
9419         struct btrfs_chunk *chunk;
9420         struct extent_buffer *l;
9421         int num_stripes;
9422         u64 length;
9423         int i;
9424         int found_chunk = 0;
9425         int ret;
9426
9427         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9428         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9429         length = btrfs_dev_extent_length(eb, ptr);
9430
9431         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9432         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9433         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9434
9435         btrfs_init_path(&path);
9436         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9437         if (ret)
9438                 goto out;
9439
9440         l = path.nodes[0];
9441         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9442         if (btrfs_chunk_length(l, chunk) != length)
9443                 goto out;
9444
9445         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9446         for (i = 0; i < num_stripes; i++) {
9447                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9448                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9449
9450                 if (devid == devext_key.objectid &&
9451                     offset == devext_key.offset) {
9452                         found_chunk = 1;
9453                         break;
9454                 }
9455         }
9456 out:
9457         btrfs_release_path(&path);
9458         if (!found_chunk) {
9459                 error(
9460                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9461                         devext_key.objectid, devext_key.offset, length);
9462                 return REFERENCER_MISSING;
9463         }
9464         return 0;
9465 }
9466
9467 /*
9468  * Check if the used space is correct with the dev item
9469  */
9470 static int check_dev_item(struct btrfs_fs_info *fs_info,
9471                           struct extent_buffer *eb, int slot)
9472 {
9473         struct btrfs_root *dev_root = fs_info->dev_root;
9474         struct btrfs_dev_item *dev_item;
9475         struct btrfs_path path;
9476         struct btrfs_key key;
9477         struct btrfs_dev_extent *ptr;
9478         u64 dev_id;
9479         u64 used;
9480         u64 total = 0;
9481         int ret;
9482
9483         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9484         dev_id = btrfs_device_id(eb, dev_item);
9485         used = btrfs_device_bytes_used(eb, dev_item);
9486
9487         key.objectid = dev_id;
9488         key.type = BTRFS_DEV_EXTENT_KEY;
9489         key.offset = 0;
9490
9491         btrfs_init_path(&path);
9492         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9493         if (ret < 0) {
9494                 btrfs_item_key_to_cpu(eb, &key, slot);
9495                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9496                         key.objectid, key.type, key.offset);
9497                 btrfs_release_path(&path);
9498                 return REFERENCER_MISSING;
9499         }
9500
9501         /* Iterate dev_extents to calculate the used space of a device */
9502         while (1) {
9503                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9504
9505                 if (key.objectid > dev_id)
9506                         break;
9507                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9508                         goto next;
9509
9510                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9511                                      struct btrfs_dev_extent);
9512                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9513 next:
9514                 ret = btrfs_next_item(dev_root, &path);
9515                 if (ret)
9516                         break;
9517         }
9518         btrfs_release_path(&path);
9519
9520         if (used != total) {
9521                 btrfs_item_key_to_cpu(eb, &key, slot);
9522                 error(
9523 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9524                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9525                         BTRFS_DEV_EXTENT_KEY, dev_id);
9526                 return ACCOUNTING_MISMATCH;
9527         }
9528         return 0;
9529 }
9530
9531 /*
9532  * Check a block group item with its referener (chunk) and its used space
9533  * with extent/metadata item
9534  */
9535 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9536                                   struct extent_buffer *eb, int slot)
9537 {
9538         struct btrfs_root *extent_root = fs_info->extent_root;
9539         struct btrfs_root *chunk_root = fs_info->chunk_root;
9540         struct btrfs_block_group_item *bi;
9541         struct btrfs_block_group_item bg_item;
9542         struct btrfs_path path;
9543         struct btrfs_key bg_key;
9544         struct btrfs_key chunk_key;
9545         struct btrfs_key extent_key;
9546         struct btrfs_chunk *chunk;
9547         struct extent_buffer *leaf;
9548         struct btrfs_extent_item *ei;
9549         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9550         u64 flags;
9551         u64 bg_flags;
9552         u64 used;
9553         u64 total = 0;
9554         int ret;
9555         int err = 0;
9556
9557         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9558         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9559         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9560         used = btrfs_block_group_used(&bg_item);
9561         bg_flags = btrfs_block_group_flags(&bg_item);
9562
9563         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9564         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9565         chunk_key.offset = bg_key.objectid;
9566
9567         btrfs_init_path(&path);
9568         /* Search for the referencer chunk */
9569         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9570         if (ret) {
9571                 error(
9572                 "block group[%llu %llu] did not find the related chunk item",
9573                         bg_key.objectid, bg_key.offset);
9574                 err |= REFERENCER_MISSING;
9575         } else {
9576                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9577                                         struct btrfs_chunk);
9578                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9579                                                 bg_key.offset) {
9580                         error(
9581         "block group[%llu %llu] related chunk item length does not match",
9582                                 bg_key.objectid, bg_key.offset);
9583                         err |= REFERENCER_MISMATCH;
9584                 }
9585         }
9586         btrfs_release_path(&path);
9587
9588         /* Search from the block group bytenr */
9589         extent_key.objectid = bg_key.objectid;
9590         extent_key.type = 0;
9591         extent_key.offset = 0;
9592
9593         btrfs_init_path(&path);
9594         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9595         if (ret < 0)
9596                 goto out;
9597
9598         /* Iterate extent tree to account used space */
9599         while (1) {
9600                 leaf = path.nodes[0];
9601                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9602                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9603                         break;
9604
9605                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9606                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9607                         goto next;
9608                 if (extent_key.objectid < bg_key.objectid)
9609                         goto next;
9610
9611                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9612                         total += nodesize;
9613                 else
9614                         total += extent_key.offset;
9615
9616                 ei = btrfs_item_ptr(leaf, path.slots[0],
9617                                     struct btrfs_extent_item);
9618                 flags = btrfs_extent_flags(leaf, ei);
9619                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9620                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9621                                 error(
9622                         "bad extent[%llu, %llu) type mismatch with chunk",
9623                                         extent_key.objectid,
9624                                         extent_key.objectid + extent_key.offset);
9625                                 err |= CHUNK_TYPE_MISMATCH;
9626                         }
9627                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9628                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9629                                     BTRFS_BLOCK_GROUP_METADATA))) {
9630                                 error(
9631                         "bad extent[%llu, %llu) type mismatch with chunk",
9632                                         extent_key.objectid,
9633                                         extent_key.objectid + nodesize);
9634                                 err |= CHUNK_TYPE_MISMATCH;
9635                         }
9636                 }
9637 next:
9638                 ret = btrfs_next_item(extent_root, &path);
9639                 if (ret)
9640                         break;
9641         }
9642
9643 out:
9644         btrfs_release_path(&path);
9645
9646         if (total != used) {
9647                 error(
9648                 "block group[%llu %llu] used %llu but extent items used %llu",
9649                         bg_key.objectid, bg_key.offset, used, total);
9650                 err |= ACCOUNTING_MISMATCH;
9651         }
9652         return err;
9653 }
9654
9655 /*
9656  * Check a chunk item.
9657  * Including checking all referred dev_extents and block group
9658  */
9659 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9660                             struct extent_buffer *eb, int slot)
9661 {
9662         struct btrfs_root *extent_root = fs_info->extent_root;
9663         struct btrfs_root *dev_root = fs_info->dev_root;
9664         struct btrfs_path path;
9665         struct btrfs_key chunk_key;
9666         struct btrfs_key bg_key;
9667         struct btrfs_key devext_key;
9668         struct btrfs_chunk *chunk;
9669         struct extent_buffer *leaf;
9670         struct btrfs_block_group_item *bi;
9671         struct btrfs_block_group_item bg_item;
9672         struct btrfs_dev_extent *ptr;
9673         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9674         u64 length;
9675         u64 chunk_end;
9676         u64 type;
9677         u64 profile;
9678         int num_stripes;
9679         u64 offset;
9680         u64 objectid;
9681         int i;
9682         int ret;
9683         int err = 0;
9684
9685         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9686         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9687         length = btrfs_chunk_length(eb, chunk);
9688         chunk_end = chunk_key.offset + length;
9689         if (!IS_ALIGNED(length, sectorsize)) {
9690                 error("chunk[%llu %llu) not aligned to %u",
9691                         chunk_key.offset, chunk_end, sectorsize);
9692                 err |= BYTES_UNALIGNED;
9693                 goto out;
9694         }
9695
9696         type = btrfs_chunk_type(eb, chunk);
9697         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9698         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9699                 error("chunk[%llu %llu) has no chunk type",
9700                         chunk_key.offset, chunk_end);
9701                 err |= UNKNOWN_TYPE;
9702         }
9703         if (profile && (profile & (profile - 1))) {
9704                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9705                         chunk_key.offset, chunk_end, profile);
9706                 err |= UNKNOWN_TYPE;
9707         }
9708
9709         bg_key.objectid = chunk_key.offset;
9710         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9711         bg_key.offset = length;
9712
9713         btrfs_init_path(&path);
9714         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9715         if (ret) {
9716                 error(
9717                 "chunk[%llu %llu) did not find the related block group item",
9718                         chunk_key.offset, chunk_end);
9719                 err |= REFERENCER_MISSING;
9720         } else{
9721                 leaf = path.nodes[0];
9722                 bi = btrfs_item_ptr(leaf, path.slots[0],
9723                                     struct btrfs_block_group_item);
9724                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9725                                    sizeof(bg_item));
9726                 if (btrfs_block_group_flags(&bg_item) != type) {
9727                         error(
9728 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9729                                 chunk_key.offset, chunk_end, type,
9730                                 btrfs_block_group_flags(&bg_item));
9731                         err |= REFERENCER_MISSING;
9732                 }
9733         }
9734
9735         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9736         for (i = 0; i < num_stripes; i++) {
9737                 btrfs_release_path(&path);
9738                 btrfs_init_path(&path);
9739                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9740                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9741                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9742
9743                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9744                                         0, 0);
9745                 if (ret)
9746                         goto not_match_dev;
9747
9748                 leaf = path.nodes[0];
9749                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9750                                      struct btrfs_dev_extent);
9751                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9752                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9753                 if (objectid != chunk_key.objectid ||
9754                     offset != chunk_key.offset ||
9755                     btrfs_dev_extent_length(leaf, ptr) != length)
9756                         goto not_match_dev;
9757                 continue;
9758 not_match_dev:
9759                 err |= BACKREF_MISSING;
9760                 error(
9761                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9762                         chunk_key.objectid, chunk_end, i);
9763                 continue;
9764         }
9765         btrfs_release_path(&path);
9766 out:
9767         return err;
9768 }
9769
9770 /*
9771  * Main entry function to check known items and update related accounting info
9772  */
9773 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9774 {
9775         struct btrfs_fs_info *fs_info = root->fs_info;
9776         struct btrfs_key key;
9777         int slot = 0;
9778         int type;
9779         struct btrfs_extent_data_ref *dref;
9780         int ret;
9781         int err = 0;
9782
9783 next:
9784         btrfs_item_key_to_cpu(eb, &key, slot);
9785         type = key.type;
9786
9787         switch (type) {
9788         case BTRFS_EXTENT_DATA_KEY:
9789                 ret = check_extent_data_item(root, eb, slot);
9790                 err |= ret;
9791                 break;
9792         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9793                 ret = check_block_group_item(fs_info, eb, slot);
9794                 err |= ret;
9795                 break;
9796         case BTRFS_DEV_ITEM_KEY:
9797                 ret = check_dev_item(fs_info, eb, slot);
9798                 err |= ret;
9799                 break;
9800         case BTRFS_CHUNK_ITEM_KEY:
9801                 ret = check_chunk_item(fs_info, eb, slot);
9802                 err |= ret;
9803                 break;
9804         case BTRFS_DEV_EXTENT_KEY:
9805                 ret = check_dev_extent_item(fs_info, eb, slot);
9806                 err |= ret;
9807                 break;
9808         case BTRFS_EXTENT_ITEM_KEY:
9809         case BTRFS_METADATA_ITEM_KEY:
9810                 ret = check_extent_item(fs_info, eb, slot);
9811                 err |= ret;
9812                 break;
9813         case BTRFS_EXTENT_CSUM_KEY:
9814                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9815                 break;
9816         case BTRFS_TREE_BLOCK_REF_KEY:
9817                 ret = check_tree_block_backref(fs_info, key.offset,
9818                                                key.objectid, -1);
9819                 err |= ret;
9820                 break;
9821         case BTRFS_EXTENT_DATA_REF_KEY:
9822                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9823                 ret = check_extent_data_backref(fs_info,
9824                                 btrfs_extent_data_ref_root(eb, dref),
9825                                 btrfs_extent_data_ref_objectid(eb, dref),
9826                                 btrfs_extent_data_ref_offset(eb, dref),
9827                                 key.objectid, 0,
9828                                 btrfs_extent_data_ref_count(eb, dref));
9829                 err |= ret;
9830                 break;
9831         case BTRFS_SHARED_BLOCK_REF_KEY:
9832                 ret = check_shared_block_backref(fs_info, key.offset,
9833                                                  key.objectid, -1);
9834                 err |= ret;
9835                 break;
9836         case BTRFS_SHARED_DATA_REF_KEY:
9837                 ret = check_shared_data_backref(fs_info, key.offset,
9838                                                 key.objectid);
9839                 err |= ret;
9840                 break;
9841         default:
9842                 break;
9843         }
9844
9845         if (++slot < btrfs_header_nritems(eb))
9846                 goto next;
9847
9848         return err;
9849 }
9850
9851 /*
9852  * Helper function for later fs/subvol tree check.  To determine if a tree
9853  * block should be checked.
9854  * This function will ensure only the direct referencer with lowest rootid to
9855  * check a fs/subvolume tree block.
9856  *
9857  * Backref check at extent tree would detect errors like missing subvolume
9858  * tree, so we can do aggressive check to reduce duplicated checks.
9859  */
9860 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9861 {
9862         struct btrfs_root *extent_root = root->fs_info->extent_root;
9863         struct btrfs_key key;
9864         struct btrfs_path path;
9865         struct extent_buffer *leaf;
9866         int slot;
9867         struct btrfs_extent_item *ei;
9868         unsigned long ptr;
9869         unsigned long end;
9870         int type;
9871         u32 item_size;
9872         u64 offset;
9873         struct btrfs_extent_inline_ref *iref;
9874         int ret;
9875
9876         btrfs_init_path(&path);
9877         key.objectid = btrfs_header_bytenr(eb);
9878         key.type = BTRFS_METADATA_ITEM_KEY;
9879         key.offset = (u64)-1;
9880
9881         /*
9882          * Any failure in backref resolving means we can't determine
9883          * whom the tree block belongs to.
9884          * So in that case, we need to check that tree block
9885          */
9886         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9887         if (ret < 0)
9888                 goto need_check;
9889
9890         ret = btrfs_previous_extent_item(extent_root, &path,
9891                                          btrfs_header_bytenr(eb));
9892         if (ret)
9893                 goto need_check;
9894
9895         leaf = path.nodes[0];
9896         slot = path.slots[0];
9897         btrfs_item_key_to_cpu(leaf, &key, slot);
9898         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9899
9900         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9901                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9902         } else {
9903                 struct btrfs_tree_block_info *info;
9904
9905                 info = (struct btrfs_tree_block_info *)(ei + 1);
9906                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9907         }
9908
9909         item_size = btrfs_item_size_nr(leaf, slot);
9910         ptr = (unsigned long)iref;
9911         end = (unsigned long)ei + item_size;
9912         while (ptr < end) {
9913                 iref = (struct btrfs_extent_inline_ref *)ptr;
9914                 type = btrfs_extent_inline_ref_type(leaf, iref);
9915                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9916
9917                 /*
9918                  * We only check the tree block if current root is
9919                  * the lowest referencer of it.
9920                  */
9921                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9922                     offset < root->objectid) {
9923                         btrfs_release_path(&path);
9924                         return 0;
9925                 }
9926
9927                 ptr += btrfs_extent_inline_ref_size(type);
9928         }
9929         /*
9930          * Normally we should also check keyed tree block ref, but that may be
9931          * very time consuming.  Inlined ref should already make us skip a lot
9932          * of refs now.  So skip search keyed tree block ref.
9933          */
9934
9935 need_check:
9936         btrfs_release_path(&path);
9937         return 1;
9938 }
9939
9940 /*
9941  * Traversal function for tree block. We will do:
9942  * 1) Skip shared fs/subvolume tree blocks
9943  * 2) Update related bytes accounting
9944  * 3) Pre-order traversal
9945  */
9946 static int traverse_tree_block(struct btrfs_root *root,
9947                                 struct extent_buffer *node)
9948 {
9949         struct extent_buffer *eb;
9950         struct btrfs_key key;
9951         struct btrfs_key drop_key;
9952         int level;
9953         u64 nr;
9954         int i;
9955         int err = 0;
9956         int ret;
9957
9958         /*
9959          * Skip shared fs/subvolume tree block, in that case they will
9960          * be checked by referencer with lowest rootid
9961          */
9962         if (is_fstree(root->objectid) && !should_check(root, node))
9963                 return 0;
9964
9965         /* Update bytes accounting */
9966         total_btree_bytes += node->len;
9967         if (fs_root_objectid(btrfs_header_owner(node)))
9968                 total_fs_tree_bytes += node->len;
9969         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9970                 total_extent_tree_bytes += node->len;
9971         if (!found_old_backref &&
9972             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9973             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9974             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9975                 found_old_backref = 1;
9976
9977         /* pre-order tranversal, check itself first */
9978         level = btrfs_header_level(node);
9979         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9980                                    btrfs_header_level(node),
9981                                    btrfs_header_owner(node));
9982         err |= ret;
9983         if (err)
9984                 error(
9985         "check %s failed root %llu bytenr %llu level %d, force continue check",
9986                         level ? "node":"leaf", root->objectid,
9987                         btrfs_header_bytenr(node), btrfs_header_level(node));
9988
9989         if (!level) {
9990                 btree_space_waste += btrfs_leaf_free_space(root, node);
9991                 ret = check_leaf_items(root, node);
9992                 err |= ret;
9993                 return err;
9994         }
9995
9996         nr = btrfs_header_nritems(node);
9997         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
9998         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
9999                 sizeof(struct btrfs_key_ptr);
10000
10001         /* Then check all its children */
10002         for (i = 0; i < nr; i++) {
10003                 u64 blocknr = btrfs_node_blockptr(node, i);
10004
10005                 btrfs_node_key_to_cpu(node, &key, i);
10006                 if (level == root->root_item.drop_level &&
10007                     is_dropped_key(&key, &drop_key))
10008                         continue;
10009
10010                 /*
10011                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10012                  * to call the function itself.
10013                  */
10014                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10015                 if (extent_buffer_uptodate(eb)) {
10016                         ret = traverse_tree_block(root, eb);
10017                         err |= ret;
10018                 }
10019                 free_extent_buffer(eb);
10020         }
10021
10022         return err;
10023 }
10024
10025 /*
10026  * Low memory usage version check_chunks_and_extents.
10027  */
10028 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10029 {
10030         struct btrfs_path path;
10031         struct btrfs_key key;
10032         struct btrfs_root *root1;
10033         struct btrfs_root *cur_root;
10034         int err = 0;
10035         int ret;
10036
10037         root1 = root->fs_info->chunk_root;
10038         ret = traverse_tree_block(root1, root1->node);
10039         err |= ret;
10040
10041         root1 = root->fs_info->tree_root;
10042         ret = traverse_tree_block(root1, root1->node);
10043         err |= ret;
10044
10045         btrfs_init_path(&path);
10046         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10047         key.offset = 0;
10048         key.type = BTRFS_ROOT_ITEM_KEY;
10049
10050         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10051         if (ret) {
10052                 error("cannot find extent treet in tree_root");
10053                 goto out;
10054         }
10055
10056         while (1) {
10057                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10058                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10059                         goto next;
10060                 key.offset = (u64)-1;
10061
10062                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10063                 if (IS_ERR(cur_root) || !cur_root) {
10064                         error("failed to read tree: %lld", key.objectid);
10065                         goto next;
10066                 }
10067
10068                 ret = traverse_tree_block(cur_root, cur_root->node);
10069                 err |= ret;
10070
10071 next:
10072                 ret = btrfs_next_item(root1, &path);
10073                 if (ret)
10074                         goto out;
10075         }
10076
10077 out:
10078         btrfs_release_path(&path);
10079         return err;
10080 }
10081
10082 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10083                            struct btrfs_root *root, int overwrite)
10084 {
10085         struct extent_buffer *c;
10086         struct extent_buffer *old = root->node;
10087         int level;
10088         int ret;
10089         struct btrfs_disk_key disk_key = {0,0,0};
10090
10091         level = 0;
10092
10093         if (overwrite) {
10094                 c = old;
10095                 extent_buffer_get(c);
10096                 goto init;
10097         }
10098         c = btrfs_alloc_free_block(trans, root,
10099                                    root->nodesize,
10100                                    root->root_key.objectid,
10101                                    &disk_key, level, 0, 0);
10102         if (IS_ERR(c)) {
10103                 c = old;
10104                 extent_buffer_get(c);
10105                 overwrite = 1;
10106         }
10107 init:
10108         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10109         btrfs_set_header_level(c, level);
10110         btrfs_set_header_bytenr(c, c->start);
10111         btrfs_set_header_generation(c, trans->transid);
10112         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10113         btrfs_set_header_owner(c, root->root_key.objectid);
10114
10115         write_extent_buffer(c, root->fs_info->fsid,
10116                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10117
10118         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10119                             btrfs_header_chunk_tree_uuid(c),
10120                             BTRFS_UUID_SIZE);
10121
10122         btrfs_mark_buffer_dirty(c);
10123         /*
10124          * this case can happen in the following case:
10125          *
10126          * 1.overwrite previous root.
10127          *
10128          * 2.reinit reloc data root, this is because we skip pin
10129          * down reloc data tree before which means we can allocate
10130          * same block bytenr here.
10131          */
10132         if (old->start == c->start) {
10133                 btrfs_set_root_generation(&root->root_item,
10134                                           trans->transid);
10135                 root->root_item.level = btrfs_header_level(root->node);
10136                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10137                                         &root->root_key, &root->root_item);
10138                 if (ret) {
10139                         free_extent_buffer(c);
10140                         return ret;
10141                 }
10142         }
10143         free_extent_buffer(old);
10144         root->node = c;
10145         add_root_to_dirty_list(root);
10146         return 0;
10147 }
10148
10149 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10150                                 struct extent_buffer *eb, int tree_root)
10151 {
10152         struct extent_buffer *tmp;
10153         struct btrfs_root_item *ri;
10154         struct btrfs_key key;
10155         u64 bytenr;
10156         u32 nodesize;
10157         int level = btrfs_header_level(eb);
10158         int nritems;
10159         int ret;
10160         int i;
10161
10162         /*
10163          * If we have pinned this block before, don't pin it again.
10164          * This can not only avoid forever loop with broken filesystem
10165          * but also give us some speedups.
10166          */
10167         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10168                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10169                 return 0;
10170
10171         btrfs_pin_extent(fs_info, eb->start, eb->len);
10172
10173         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10174         nritems = btrfs_header_nritems(eb);
10175         for (i = 0; i < nritems; i++) {
10176                 if (level == 0) {
10177                         btrfs_item_key_to_cpu(eb, &key, i);
10178                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10179                                 continue;
10180                         /* Skip the extent root and reloc roots */
10181                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10182                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10183                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10184                                 continue;
10185                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10186                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10187
10188                         /*
10189                          * If at any point we start needing the real root we
10190                          * will have to build a stump root for the root we are
10191                          * in, but for now this doesn't actually use the root so
10192                          * just pass in extent_root.
10193                          */
10194                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10195                                               nodesize, 0);
10196                         if (!extent_buffer_uptodate(tmp)) {
10197                                 fprintf(stderr, "Error reading root block\n");
10198                                 return -EIO;
10199                         }
10200                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10201                         free_extent_buffer(tmp);
10202                         if (ret)
10203                                 return ret;
10204                 } else {
10205                         bytenr = btrfs_node_blockptr(eb, i);
10206
10207                         /* If we aren't the tree root don't read the block */
10208                         if (level == 1 && !tree_root) {
10209                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10210                                 continue;
10211                         }
10212
10213                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10214                                               nodesize, 0);
10215                         if (!extent_buffer_uptodate(tmp)) {
10216                                 fprintf(stderr, "Error reading tree block\n");
10217                                 return -EIO;
10218                         }
10219                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10220                         free_extent_buffer(tmp);
10221                         if (ret)
10222                                 return ret;
10223                 }
10224         }
10225
10226         return 0;
10227 }
10228
10229 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10230 {
10231         int ret;
10232
10233         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10234         if (ret)
10235                 return ret;
10236
10237         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10238 }
10239
10240 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10241 {
10242         struct btrfs_block_group_cache *cache;
10243         struct btrfs_path *path;
10244         struct extent_buffer *leaf;
10245         struct btrfs_chunk *chunk;
10246         struct btrfs_key key;
10247         int ret;
10248         u64 start;
10249
10250         path = btrfs_alloc_path();
10251         if (!path)
10252                 return -ENOMEM;
10253
10254         key.objectid = 0;
10255         key.type = BTRFS_CHUNK_ITEM_KEY;
10256         key.offset = 0;
10257
10258         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10259         if (ret < 0) {
10260                 btrfs_free_path(path);
10261                 return ret;
10262         }
10263
10264         /*
10265          * We do this in case the block groups were screwed up and had alloc
10266          * bits that aren't actually set on the chunks.  This happens with
10267          * restored images every time and could happen in real life I guess.
10268          */
10269         fs_info->avail_data_alloc_bits = 0;
10270         fs_info->avail_metadata_alloc_bits = 0;
10271         fs_info->avail_system_alloc_bits = 0;
10272
10273         /* First we need to create the in-memory block groups */
10274         while (1) {
10275                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10276                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10277                         if (ret < 0) {
10278                                 btrfs_free_path(path);
10279                                 return ret;
10280                         }
10281                         if (ret) {
10282                                 ret = 0;
10283                                 break;
10284                         }
10285                 }
10286                 leaf = path->nodes[0];
10287                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10288                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10289                         path->slots[0]++;
10290                         continue;
10291                 }
10292
10293                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10294                                        struct btrfs_chunk);
10295                 btrfs_add_block_group(fs_info, 0,
10296                                       btrfs_chunk_type(leaf, chunk),
10297                                       key.objectid, key.offset,
10298                                       btrfs_chunk_length(leaf, chunk));
10299                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10300                                  key.offset + btrfs_chunk_length(leaf, chunk),
10301                                  GFP_NOFS);
10302                 path->slots[0]++;
10303         }
10304         start = 0;
10305         while (1) {
10306                 cache = btrfs_lookup_first_block_group(fs_info, start);
10307                 if (!cache)
10308                         break;
10309                 cache->cached = 1;
10310                 start = cache->key.objectid + cache->key.offset;
10311         }
10312
10313         btrfs_free_path(path);
10314         return 0;
10315 }
10316
10317 static int reset_balance(struct btrfs_trans_handle *trans,
10318                          struct btrfs_fs_info *fs_info)
10319 {
10320         struct btrfs_root *root = fs_info->tree_root;
10321         struct btrfs_path *path;
10322         struct extent_buffer *leaf;
10323         struct btrfs_key key;
10324         int del_slot, del_nr = 0;
10325         int ret;
10326         int found = 0;
10327
10328         path = btrfs_alloc_path();
10329         if (!path)
10330                 return -ENOMEM;
10331
10332         key.objectid = BTRFS_BALANCE_OBJECTID;
10333         key.type = BTRFS_BALANCE_ITEM_KEY;
10334         key.offset = 0;
10335
10336         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10337         if (ret) {
10338                 if (ret > 0)
10339                         ret = 0;
10340                 if (!ret)
10341                         goto reinit_data_reloc;
10342                 else
10343                         goto out;
10344         }
10345
10346         ret = btrfs_del_item(trans, root, path);
10347         if (ret)
10348                 goto out;
10349         btrfs_release_path(path);
10350
10351         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10352         key.type = BTRFS_ROOT_ITEM_KEY;
10353         key.offset = 0;
10354
10355         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10356         if (ret < 0)
10357                 goto out;
10358         while (1) {
10359                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10360                         if (!found)
10361                                 break;
10362
10363                         if (del_nr) {
10364                                 ret = btrfs_del_items(trans, root, path,
10365                                                       del_slot, del_nr);
10366                                 del_nr = 0;
10367                                 if (ret)
10368                                         goto out;
10369                         }
10370                         key.offset++;
10371                         btrfs_release_path(path);
10372
10373                         found = 0;
10374                         ret = btrfs_search_slot(trans, root, &key, path,
10375                                                 -1, 1);
10376                         if (ret < 0)
10377                                 goto out;
10378                         continue;
10379                 }
10380                 found = 1;
10381                 leaf = path->nodes[0];
10382                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10383                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10384                         break;
10385                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10386                         path->slots[0]++;
10387                         continue;
10388                 }
10389                 if (!del_nr) {
10390                         del_slot = path->slots[0];
10391                         del_nr = 1;
10392                 } else {
10393                         del_nr++;
10394                 }
10395                 path->slots[0]++;
10396         }
10397
10398         if (del_nr) {
10399                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10400                 if (ret)
10401                         goto out;
10402         }
10403         btrfs_release_path(path);
10404
10405 reinit_data_reloc:
10406         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10407         key.type = BTRFS_ROOT_ITEM_KEY;
10408         key.offset = (u64)-1;
10409         root = btrfs_read_fs_root(fs_info, &key);
10410         if (IS_ERR(root)) {
10411                 fprintf(stderr, "Error reading data reloc tree\n");
10412                 ret = PTR_ERR(root);
10413                 goto out;
10414         }
10415         record_root_in_trans(trans, root);
10416         ret = btrfs_fsck_reinit_root(trans, root, 0);
10417         if (ret)
10418                 goto out;
10419         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10420 out:
10421         btrfs_free_path(path);
10422         return ret;
10423 }
10424
10425 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10426                               struct btrfs_fs_info *fs_info)
10427 {
10428         u64 start = 0;
10429         int ret;
10430
10431         /*
10432          * The only reason we don't do this is because right now we're just
10433          * walking the trees we find and pinning down their bytes, we don't look
10434          * at any of the leaves.  In order to do mixed groups we'd have to check
10435          * the leaves of any fs roots and pin down the bytes for any file
10436          * extents we find.  Not hard but why do it if we don't have to?
10437          */
10438         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10439                 fprintf(stderr, "We don't support re-initing the extent tree "
10440                         "for mixed block groups yet, please notify a btrfs "
10441                         "developer you want to do this so they can add this "
10442                         "functionality.\n");
10443                 return -EINVAL;
10444         }
10445
10446         /*
10447          * first we need to walk all of the trees except the extent tree and pin
10448          * down the bytes that are in use so we don't overwrite any existing
10449          * metadata.
10450          */
10451         ret = pin_metadata_blocks(fs_info);
10452         if (ret) {
10453                 fprintf(stderr, "error pinning down used bytes\n");
10454                 return ret;
10455         }
10456
10457         /*
10458          * Need to drop all the block groups since we're going to recreate all
10459          * of them again.
10460          */
10461         btrfs_free_block_groups(fs_info);
10462         ret = reset_block_groups(fs_info);
10463         if (ret) {
10464                 fprintf(stderr, "error resetting the block groups\n");
10465                 return ret;
10466         }
10467
10468         /* Ok we can allocate now, reinit the extent root */
10469         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10470         if (ret) {
10471                 fprintf(stderr, "extent root initialization failed\n");
10472                 /*
10473                  * When the transaction code is updated we should end the
10474                  * transaction, but for now progs only knows about commit so
10475                  * just return an error.
10476                  */
10477                 return ret;
10478         }
10479
10480         /*
10481          * Now we have all the in-memory block groups setup so we can make
10482          * allocations properly, and the metadata we care about is safe since we
10483          * pinned all of it above.
10484          */
10485         while (1) {
10486                 struct btrfs_block_group_cache *cache;
10487
10488                 cache = btrfs_lookup_first_block_group(fs_info, start);
10489                 if (!cache)
10490                         break;
10491                 start = cache->key.objectid + cache->key.offset;
10492                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10493                                         &cache->key, &cache->item,
10494                                         sizeof(cache->item));
10495                 if (ret) {
10496                         fprintf(stderr, "Error adding block group\n");
10497                         return ret;
10498                 }
10499                 btrfs_extent_post_op(trans, fs_info->extent_root);
10500         }
10501
10502         ret = reset_balance(trans, fs_info);
10503         if (ret)
10504                 fprintf(stderr, "error resetting the pending balance\n");
10505
10506         return ret;
10507 }
10508
10509 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10510 {
10511         struct btrfs_path *path;
10512         struct btrfs_trans_handle *trans;
10513         struct btrfs_key key;
10514         int ret;
10515
10516         printf("Recowing metadata block %llu\n", eb->start);
10517         key.objectid = btrfs_header_owner(eb);
10518         key.type = BTRFS_ROOT_ITEM_KEY;
10519         key.offset = (u64)-1;
10520
10521         root = btrfs_read_fs_root(root->fs_info, &key);
10522         if (IS_ERR(root)) {
10523                 fprintf(stderr, "Couldn't find owner root %llu\n",
10524                         key.objectid);
10525                 return PTR_ERR(root);
10526         }
10527
10528         path = btrfs_alloc_path();
10529         if (!path)
10530                 return -ENOMEM;
10531
10532         trans = btrfs_start_transaction(root, 1);
10533         if (IS_ERR(trans)) {
10534                 btrfs_free_path(path);
10535                 return PTR_ERR(trans);
10536         }
10537
10538         path->lowest_level = btrfs_header_level(eb);
10539         if (path->lowest_level)
10540                 btrfs_node_key_to_cpu(eb, &key, 0);
10541         else
10542                 btrfs_item_key_to_cpu(eb, &key, 0);
10543
10544         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10545         btrfs_commit_transaction(trans, root);
10546         btrfs_free_path(path);
10547         return ret;
10548 }
10549
10550 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10551 {
10552         struct btrfs_path *path;
10553         struct btrfs_trans_handle *trans;
10554         struct btrfs_key key;
10555         int ret;
10556
10557         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10558                bad->key.type, bad->key.offset);
10559         key.objectid = bad->root_id;
10560         key.type = BTRFS_ROOT_ITEM_KEY;
10561         key.offset = (u64)-1;
10562
10563         root = btrfs_read_fs_root(root->fs_info, &key);
10564         if (IS_ERR(root)) {
10565                 fprintf(stderr, "Couldn't find owner root %llu\n",
10566                         key.objectid);
10567                 return PTR_ERR(root);
10568         }
10569
10570         path = btrfs_alloc_path();
10571         if (!path)
10572                 return -ENOMEM;
10573
10574         trans = btrfs_start_transaction(root, 1);
10575         if (IS_ERR(trans)) {
10576                 btrfs_free_path(path);
10577                 return PTR_ERR(trans);
10578         }
10579
10580         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10581         if (ret) {
10582                 if (ret > 0)
10583                         ret = 0;
10584                 goto out;
10585         }
10586         ret = btrfs_del_item(trans, root, path);
10587 out:
10588         btrfs_commit_transaction(trans, root);
10589         btrfs_free_path(path);
10590         return ret;
10591 }
10592
10593 static int zero_log_tree(struct btrfs_root *root)
10594 {
10595         struct btrfs_trans_handle *trans;
10596         int ret;
10597
10598         trans = btrfs_start_transaction(root, 1);
10599         if (IS_ERR(trans)) {
10600                 ret = PTR_ERR(trans);
10601                 return ret;
10602         }
10603         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10604         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10605         ret = btrfs_commit_transaction(trans, root);
10606         return ret;
10607 }
10608
10609 static int populate_csum(struct btrfs_trans_handle *trans,
10610                          struct btrfs_root *csum_root, char *buf, u64 start,
10611                          u64 len)
10612 {
10613         u64 offset = 0;
10614         u64 sectorsize;
10615         int ret = 0;
10616
10617         while (offset < len) {
10618                 sectorsize = csum_root->sectorsize;
10619                 ret = read_extent_data(csum_root, buf, start + offset,
10620                                        &sectorsize, 0);
10621                 if (ret)
10622                         break;
10623                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10624                                             start + offset, buf, sectorsize);
10625                 if (ret)
10626                         break;
10627                 offset += sectorsize;
10628         }
10629         return ret;
10630 }
10631
10632 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10633                                       struct btrfs_root *csum_root,
10634                                       struct btrfs_root *cur_root)
10635 {
10636         struct btrfs_path *path;
10637         struct btrfs_key key;
10638         struct extent_buffer *node;
10639         struct btrfs_file_extent_item *fi;
10640         char *buf = NULL;
10641         u64 start = 0;
10642         u64 len = 0;
10643         int slot = 0;
10644         int ret = 0;
10645
10646         path = btrfs_alloc_path();
10647         if (!path)
10648                 return -ENOMEM;
10649         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10650         if (!buf) {
10651                 ret = -ENOMEM;
10652                 goto out;
10653         }
10654
10655         key.objectid = 0;
10656         key.offset = 0;
10657         key.type = 0;
10658
10659         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10660         if (ret < 0)
10661                 goto out;
10662         /* Iterate all regular file extents and fill its csum */
10663         while (1) {
10664                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10665
10666                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10667                         goto next;
10668                 node = path->nodes[0];
10669                 slot = path->slots[0];
10670                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10671                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10672                         goto next;
10673                 start = btrfs_file_extent_disk_bytenr(node, fi);
10674                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10675
10676                 ret = populate_csum(trans, csum_root, buf, start, len);
10677                 if (ret == -EEXIST)
10678                         ret = 0;
10679                 if (ret < 0)
10680                         goto out;
10681 next:
10682                 /*
10683                  * TODO: if next leaf is corrupted, jump to nearest next valid
10684                  * leaf.
10685                  */
10686                 ret = btrfs_next_item(cur_root, path);
10687                 if (ret < 0)
10688                         goto out;
10689                 if (ret > 0) {
10690                         ret = 0;
10691                         goto out;
10692                 }
10693         }
10694
10695 out:
10696         btrfs_free_path(path);
10697         free(buf);
10698         return ret;
10699 }
10700
10701 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10702                                   struct btrfs_root *csum_root)
10703 {
10704         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10705         struct btrfs_path *path;
10706         struct btrfs_root *tree_root = fs_info->tree_root;
10707         struct btrfs_root *cur_root;
10708         struct extent_buffer *node;
10709         struct btrfs_key key;
10710         int slot = 0;
10711         int ret = 0;
10712
10713         path = btrfs_alloc_path();
10714         if (!path)
10715                 return -ENOMEM;
10716
10717         key.objectid = BTRFS_FS_TREE_OBJECTID;
10718         key.offset = 0;
10719         key.type = BTRFS_ROOT_ITEM_KEY;
10720
10721         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10722         if (ret < 0)
10723                 goto out;
10724         if (ret > 0) {
10725                 ret = -ENOENT;
10726                 goto out;
10727         }
10728
10729         while (1) {
10730                 node = path->nodes[0];
10731                 slot = path->slots[0];
10732                 btrfs_item_key_to_cpu(node, &key, slot);
10733                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10734                         goto out;
10735                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10736                         goto next;
10737                 if (!is_fstree(key.objectid))
10738                         goto next;
10739                 key.offset = (u64)-1;
10740
10741                 cur_root = btrfs_read_fs_root(fs_info, &key);
10742                 if (IS_ERR(cur_root) || !cur_root) {
10743                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10744                                 key.objectid);
10745                         goto out;
10746                 }
10747                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10748                                 cur_root);
10749                 if (ret < 0)
10750                         goto out;
10751 next:
10752                 ret = btrfs_next_item(tree_root, path);
10753                 if (ret > 0) {
10754                         ret = 0;
10755                         goto out;
10756                 }
10757                 if (ret < 0)
10758                         goto out;
10759         }
10760
10761 out:
10762         btrfs_free_path(path);
10763         return ret;
10764 }
10765
10766 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10767                                       struct btrfs_root *csum_root)
10768 {
10769         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10770         struct btrfs_path *path;
10771         struct btrfs_extent_item *ei;
10772         struct extent_buffer *leaf;
10773         char *buf;
10774         struct btrfs_key key;
10775         int ret;
10776
10777         path = btrfs_alloc_path();
10778         if (!path)
10779                 return -ENOMEM;
10780
10781         key.objectid = 0;
10782         key.type = BTRFS_EXTENT_ITEM_KEY;
10783         key.offset = 0;
10784
10785         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10786         if (ret < 0) {
10787                 btrfs_free_path(path);
10788                 return ret;
10789         }
10790
10791         buf = malloc(csum_root->sectorsize);
10792         if (!buf) {
10793                 btrfs_free_path(path);
10794                 return -ENOMEM;
10795         }
10796
10797         while (1) {
10798                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10799                         ret = btrfs_next_leaf(extent_root, path);
10800                         if (ret < 0)
10801                                 break;
10802                         if (ret) {
10803                                 ret = 0;
10804                                 break;
10805                         }
10806                 }
10807                 leaf = path->nodes[0];
10808
10809                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10810                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10811                         path->slots[0]++;
10812                         continue;
10813                 }
10814
10815                 ei = btrfs_item_ptr(leaf, path->slots[0],
10816                                     struct btrfs_extent_item);
10817                 if (!(btrfs_extent_flags(leaf, ei) &
10818                       BTRFS_EXTENT_FLAG_DATA)) {
10819                         path->slots[0]++;
10820                         continue;
10821                 }
10822
10823                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10824                                     key.offset);
10825                 if (ret)
10826                         break;
10827                 path->slots[0]++;
10828         }
10829
10830         btrfs_free_path(path);
10831         free(buf);
10832         return ret;
10833 }
10834
10835 /*
10836  * Recalculate the csum and put it into the csum tree.
10837  *
10838  * Extent tree init will wipe out all the extent info, so in that case, we
10839  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10840  * will use fs/subvol trees to init the csum tree.
10841  */
10842 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10843                           struct btrfs_root *csum_root,
10844                           int search_fs_tree)
10845 {
10846         if (search_fs_tree)
10847                 return fill_csum_tree_from_fs(trans, csum_root);
10848         else
10849                 return fill_csum_tree_from_extent(trans, csum_root);
10850 }
10851
10852 static void free_roots_info_cache(void)
10853 {
10854         if (!roots_info_cache)
10855                 return;
10856
10857         while (!cache_tree_empty(roots_info_cache)) {
10858                 struct cache_extent *entry;
10859                 struct root_item_info *rii;
10860
10861                 entry = first_cache_extent(roots_info_cache);
10862                 if (!entry)
10863                         break;
10864                 remove_cache_extent(roots_info_cache, entry);
10865                 rii = container_of(entry, struct root_item_info, cache_extent);
10866                 free(rii);
10867         }
10868
10869         free(roots_info_cache);
10870         roots_info_cache = NULL;
10871 }
10872
10873 static int build_roots_info_cache(struct btrfs_fs_info *info)
10874 {
10875         int ret = 0;
10876         struct btrfs_key key;
10877         struct extent_buffer *leaf;
10878         struct btrfs_path *path;
10879
10880         if (!roots_info_cache) {
10881                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10882                 if (!roots_info_cache)
10883                         return -ENOMEM;
10884                 cache_tree_init(roots_info_cache);
10885         }
10886
10887         path = btrfs_alloc_path();
10888         if (!path)
10889                 return -ENOMEM;
10890
10891         key.objectid = 0;
10892         key.type = BTRFS_EXTENT_ITEM_KEY;
10893         key.offset = 0;
10894
10895         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10896         if (ret < 0)
10897                 goto out;
10898         leaf = path->nodes[0];
10899
10900         while (1) {
10901                 struct btrfs_key found_key;
10902                 struct btrfs_extent_item *ei;
10903                 struct btrfs_extent_inline_ref *iref;
10904                 int slot = path->slots[0];
10905                 int type;
10906                 u64 flags;
10907                 u64 root_id;
10908                 u8 level;
10909                 struct cache_extent *entry;
10910                 struct root_item_info *rii;
10911
10912                 if (slot >= btrfs_header_nritems(leaf)) {
10913                         ret = btrfs_next_leaf(info->extent_root, path);
10914                         if (ret < 0) {
10915                                 break;
10916                         } else if (ret) {
10917                                 ret = 0;
10918                                 break;
10919                         }
10920                         leaf = path->nodes[0];
10921                         slot = path->slots[0];
10922                 }
10923
10924                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10925
10926                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10927                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10928                         goto next;
10929
10930                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10931                 flags = btrfs_extent_flags(leaf, ei);
10932
10933                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10934                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10935                         goto next;
10936
10937                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10938                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10939                         level = found_key.offset;
10940                 } else {
10941                         struct btrfs_tree_block_info *binfo;
10942
10943                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10944                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10945                         level = btrfs_tree_block_level(leaf, binfo);
10946                 }
10947
10948                 /*
10949                  * For a root extent, it must be of the following type and the
10950                  * first (and only one) iref in the item.
10951                  */
10952                 type = btrfs_extent_inline_ref_type(leaf, iref);
10953                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10954                         goto next;
10955
10956                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10957                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10958                 if (!entry) {
10959                         rii = malloc(sizeof(struct root_item_info));
10960                         if (!rii) {
10961                                 ret = -ENOMEM;
10962                                 goto out;
10963                         }
10964                         rii->cache_extent.start = root_id;
10965                         rii->cache_extent.size = 1;
10966                         rii->level = (u8)-1;
10967                         entry = &rii->cache_extent;
10968                         ret = insert_cache_extent(roots_info_cache, entry);
10969                         ASSERT(ret == 0);
10970                 } else {
10971                         rii = container_of(entry, struct root_item_info,
10972                                            cache_extent);
10973                 }
10974
10975                 ASSERT(rii->cache_extent.start == root_id);
10976                 ASSERT(rii->cache_extent.size == 1);
10977
10978                 if (level > rii->level || rii->level == (u8)-1) {
10979                         rii->level = level;
10980                         rii->bytenr = found_key.objectid;
10981                         rii->gen = btrfs_extent_generation(leaf, ei);
10982                         rii->node_count = 1;
10983                 } else if (level == rii->level) {
10984                         rii->node_count++;
10985                 }
10986 next:
10987                 path->slots[0]++;
10988         }
10989
10990 out:
10991         btrfs_free_path(path);
10992
10993         return ret;
10994 }
10995
10996 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10997                                   struct btrfs_path *path,
10998                                   const struct btrfs_key *root_key,
10999                                   const int read_only_mode)
11000 {
11001         const u64 root_id = root_key->objectid;
11002         struct cache_extent *entry;
11003         struct root_item_info *rii;
11004         struct btrfs_root_item ri;
11005         unsigned long offset;
11006
11007         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11008         if (!entry) {
11009                 fprintf(stderr,
11010                         "Error: could not find extent items for root %llu\n",
11011                         root_key->objectid);
11012                 return -ENOENT;
11013         }
11014
11015         rii = container_of(entry, struct root_item_info, cache_extent);
11016         ASSERT(rii->cache_extent.start == root_id);
11017         ASSERT(rii->cache_extent.size == 1);
11018
11019         if (rii->node_count != 1) {
11020                 fprintf(stderr,
11021                         "Error: could not find btree root extent for root %llu\n",
11022                         root_id);
11023                 return -ENOENT;
11024         }
11025
11026         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11027         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11028
11029         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11030             btrfs_root_level(&ri) != rii->level ||
11031             btrfs_root_generation(&ri) != rii->gen) {
11032
11033                 /*
11034                  * If we're in repair mode but our caller told us to not update
11035                  * the root item, i.e. just check if it needs to be updated, don't
11036                  * print this message, since the caller will call us again shortly
11037                  * for the same root item without read only mode (the caller will
11038                  * open a transaction first).
11039                  */
11040                 if (!(read_only_mode && repair))
11041                         fprintf(stderr,
11042                                 "%sroot item for root %llu,"
11043                                 " current bytenr %llu, current gen %llu, current level %u,"
11044                                 " new bytenr %llu, new gen %llu, new level %u\n",
11045                                 (read_only_mode ? "" : "fixing "),
11046                                 root_id,
11047                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11048                                 btrfs_root_level(&ri),
11049                                 rii->bytenr, rii->gen, rii->level);
11050
11051                 if (btrfs_root_generation(&ri) > rii->gen) {
11052                         fprintf(stderr,
11053                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11054                                 root_id, btrfs_root_generation(&ri), rii->gen);
11055                         return -EINVAL;
11056                 }
11057
11058                 if (!read_only_mode) {
11059                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11060                         btrfs_set_root_level(&ri, rii->level);
11061                         btrfs_set_root_generation(&ri, rii->gen);
11062                         write_extent_buffer(path->nodes[0], &ri,
11063                                             offset, sizeof(ri));
11064                 }
11065
11066                 return 1;
11067         }
11068
11069         return 0;
11070 }
11071
11072 /*
11073  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11074  * caused read-only snapshots to be corrupted if they were created at a moment
11075  * when the source subvolume/snapshot had orphan items. The issue was that the
11076  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11077  * node instead of the post orphan cleanup root node.
11078  * So this function, and its callees, just detects and fixes those cases. Even
11079  * though the regression was for read-only snapshots, this function applies to
11080  * any snapshot/subvolume root.
11081  * This must be run before any other repair code - not doing it so, makes other
11082  * repair code delete or modify backrefs in the extent tree for example, which
11083  * will result in an inconsistent fs after repairing the root items.
11084  */
11085 static int repair_root_items(struct btrfs_fs_info *info)
11086 {
11087         struct btrfs_path *path = NULL;
11088         struct btrfs_key key;
11089         struct extent_buffer *leaf;
11090         struct btrfs_trans_handle *trans = NULL;
11091         int ret = 0;
11092         int bad_roots = 0;
11093         int need_trans = 0;
11094
11095         ret = build_roots_info_cache(info);
11096         if (ret)
11097                 goto out;
11098
11099         path = btrfs_alloc_path();
11100         if (!path) {
11101                 ret = -ENOMEM;
11102                 goto out;
11103         }
11104
11105         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11106         key.type = BTRFS_ROOT_ITEM_KEY;
11107         key.offset = 0;
11108
11109 again:
11110         /*
11111          * Avoid opening and committing transactions if a leaf doesn't have
11112          * any root items that need to be fixed, so that we avoid rotating
11113          * backup roots unnecessarily.
11114          */
11115         if (need_trans) {
11116                 trans = btrfs_start_transaction(info->tree_root, 1);
11117                 if (IS_ERR(trans)) {
11118                         ret = PTR_ERR(trans);
11119                         goto out;
11120                 }
11121         }
11122
11123         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11124                                 0, trans ? 1 : 0);
11125         if (ret < 0)
11126                 goto out;
11127         leaf = path->nodes[0];
11128
11129         while (1) {
11130                 struct btrfs_key found_key;
11131
11132                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11133                         int no_more_keys = find_next_key(path, &key);
11134
11135                         btrfs_release_path(path);
11136                         if (trans) {
11137                                 ret = btrfs_commit_transaction(trans,
11138                                                                info->tree_root);
11139                                 trans = NULL;
11140                                 if (ret < 0)
11141                                         goto out;
11142                         }
11143                         need_trans = 0;
11144                         if (no_more_keys)
11145                                 break;
11146                         goto again;
11147                 }
11148
11149                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11150
11151                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11152                         goto next;
11153                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11154                         goto next;
11155
11156                 ret = maybe_repair_root_item(info, path, &found_key,
11157                                              trans ? 0 : 1);
11158                 if (ret < 0)
11159                         goto out;
11160                 if (ret) {
11161                         if (!trans && repair) {
11162                                 need_trans = 1;
11163                                 key = found_key;
11164                                 btrfs_release_path(path);
11165                                 goto again;
11166                         }
11167                         bad_roots++;
11168                 }
11169 next:
11170                 path->slots[0]++;
11171         }
11172         ret = 0;
11173 out:
11174         free_roots_info_cache();
11175         btrfs_free_path(path);
11176         if (trans)
11177                 btrfs_commit_transaction(trans, info->tree_root);
11178         if (ret < 0)
11179                 return ret;
11180
11181         return bad_roots;
11182 }
11183
11184 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11185 {
11186         struct btrfs_trans_handle *trans;
11187         struct btrfs_block_group_cache *bg_cache;
11188         u64 current = 0;
11189         int ret = 0;
11190
11191         /* Clear all free space cache inodes and its extent data */
11192         while (1) {
11193                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11194                 if (!bg_cache)
11195                         break;
11196                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11197                 if (ret < 0)
11198                         return ret;
11199                 current = bg_cache->key.objectid + bg_cache->key.offset;
11200         }
11201
11202         /* Don't forget to set cache_generation to -1 */
11203         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11204         if (IS_ERR(trans)) {
11205                 error("failed to update super block cache generation");
11206                 return PTR_ERR(trans);
11207         }
11208         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11209         btrfs_commit_transaction(trans, fs_info->tree_root);
11210
11211         return ret;
11212 }
11213
11214 const char * const cmd_check_usage[] = {
11215         "btrfs check [options] <device>",
11216         "Check structural integrity of a filesystem (unmounted).",
11217         "Check structural integrity of an unmounted filesystem. Verify internal",
11218         "trees' consistency and item connectivity. In the repair mode try to",
11219         "fix the problems found. ",
11220         "WARNING: the repair mode is considered dangerous",
11221         "",
11222         "-s|--super <superblock>     use this superblock copy",
11223         "-b|--backup                 use the first valid backup root copy",
11224         "--repair                    try to repair the filesystem",
11225         "--readonly                  run in read-only mode (default)",
11226         "--init-csum-tree            create a new CRC tree",
11227         "--init-extent-tree          create a new extent tree",
11228         "--mode <MODE>               allows choice of memory/IO trade-offs",
11229         "                            where MODE is one of:",
11230         "                            original - read inodes and extents to memory (requires",
11231         "                                       more memory, does less IO)",
11232         "                            lowmem   - try to use less memory but read blocks again",
11233         "                                       when needed",
11234         "--check-data-csum           verify checksums of data blocks",
11235         "-Q|--qgroup-report          print a report on qgroup consistency",
11236         "-E|--subvol-extents <subvolid>",
11237         "                            print subvolume extents and sharing state",
11238         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11239         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11240         "-p|--progress               indicate progress",
11241         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11242         "                            NOTE: v1 support implemented",
11243         NULL
11244 };
11245
11246 int cmd_check(int argc, char **argv)
11247 {
11248         struct cache_tree root_cache;
11249         struct btrfs_root *root;
11250         struct btrfs_fs_info *info;
11251         u64 bytenr = 0;
11252         u64 subvolid = 0;
11253         u64 tree_root_bytenr = 0;
11254         u64 chunk_root_bytenr = 0;
11255         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11256         int ret;
11257         u64 num;
11258         int init_csum_tree = 0;
11259         int readonly = 0;
11260         int clear_space_cache = 0;
11261         int qgroup_report = 0;
11262         int qgroups_repaired = 0;
11263         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11264
11265         while(1) {
11266                 int c;
11267                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11268                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11269                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11270                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11271                 static const struct option long_options[] = {
11272                         { "super", required_argument, NULL, 's' },
11273                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11274                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11275                         { "init-csum-tree", no_argument, NULL,
11276                                 GETOPT_VAL_INIT_CSUM },
11277                         { "init-extent-tree", no_argument, NULL,
11278                                 GETOPT_VAL_INIT_EXTENT },
11279                         { "check-data-csum", no_argument, NULL,
11280                                 GETOPT_VAL_CHECK_CSUM },
11281                         { "backup", no_argument, NULL, 'b' },
11282                         { "subvol-extents", required_argument, NULL, 'E' },
11283                         { "qgroup-report", no_argument, NULL, 'Q' },
11284                         { "tree-root", required_argument, NULL, 'r' },
11285                         { "chunk-root", required_argument, NULL,
11286                                 GETOPT_VAL_CHUNK_TREE },
11287                         { "progress", no_argument, NULL, 'p' },
11288                         { "mode", required_argument, NULL,
11289                                 GETOPT_VAL_MODE },
11290                         { "clear-space-cache", required_argument, NULL,
11291                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11292                         { NULL, 0, NULL, 0}
11293                 };
11294
11295                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11296                 if (c < 0)
11297                         break;
11298                 switch(c) {
11299                         case 'a': /* ignored */ break;
11300                         case 'b':
11301                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11302                                 break;
11303                         case 's':
11304                                 num = arg_strtou64(optarg);
11305                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11306                                         error(
11307                                         "super mirror should be less than %d",
11308                                                 BTRFS_SUPER_MIRROR_MAX);
11309                                         exit(1);
11310                                 }
11311                                 bytenr = btrfs_sb_offset(((int)num));
11312                                 printf("using SB copy %llu, bytenr %llu\n", num,
11313                                        (unsigned long long)bytenr);
11314                                 break;
11315                         case 'Q':
11316                                 qgroup_report = 1;
11317                                 break;
11318                         case 'E':
11319                                 subvolid = arg_strtou64(optarg);
11320                                 break;
11321                         case 'r':
11322                                 tree_root_bytenr = arg_strtou64(optarg);
11323                                 break;
11324                         case GETOPT_VAL_CHUNK_TREE:
11325                                 chunk_root_bytenr = arg_strtou64(optarg);
11326                                 break;
11327                         case 'p':
11328                                 ctx.progress_enabled = true;
11329                                 break;
11330                         case '?':
11331                         case 'h':
11332                                 usage(cmd_check_usage);
11333                         case GETOPT_VAL_REPAIR:
11334                                 printf("enabling repair mode\n");
11335                                 repair = 1;
11336                                 ctree_flags |= OPEN_CTREE_WRITES;
11337                                 break;
11338                         case GETOPT_VAL_READONLY:
11339                                 readonly = 1;
11340                                 break;
11341                         case GETOPT_VAL_INIT_CSUM:
11342                                 printf("Creating a new CRC tree\n");
11343                                 init_csum_tree = 1;
11344                                 repair = 1;
11345                                 ctree_flags |= OPEN_CTREE_WRITES;
11346                                 break;
11347                         case GETOPT_VAL_INIT_EXTENT:
11348                                 init_extent_tree = 1;
11349                                 ctree_flags |= (OPEN_CTREE_WRITES |
11350                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11351                                 repair = 1;
11352                                 break;
11353                         case GETOPT_VAL_CHECK_CSUM:
11354                                 check_data_csum = 1;
11355                                 break;
11356                         case GETOPT_VAL_MODE:
11357                                 check_mode = parse_check_mode(optarg);
11358                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11359                                         error("unknown mode: %s", optarg);
11360                                         exit(1);
11361                                 }
11362                                 break;
11363                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11364                                 if (strcmp(optarg, "v1") != 0) {
11365                                         error(
11366                         "only v1 support implmented, unrecognized value %s",
11367                         optarg);
11368                                         exit(1);
11369                                 }
11370                                 clear_space_cache = 1;
11371                                 ctree_flags |= OPEN_CTREE_WRITES;
11372                                 break;
11373                 }
11374         }
11375
11376         if (check_argc_exact(argc - optind, 1))
11377                 usage(cmd_check_usage);
11378
11379         if (ctx.progress_enabled) {
11380                 ctx.tp = TASK_NOTHING;
11381                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11382         }
11383
11384         /* This check is the only reason for --readonly to exist */
11385         if (readonly && repair) {
11386                 error("repair options are not compatible with --readonly");
11387                 exit(1);
11388         }
11389
11390         /*
11391          * Not supported yet
11392          */
11393         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11394                 error("low memory mode doesn't support repair yet");
11395                 exit(1);
11396         }
11397
11398         radix_tree_init();
11399         cache_tree_init(&root_cache);
11400
11401         if((ret = check_mounted(argv[optind])) < 0) {
11402                 error("could not check mount status: %s", strerror(-ret));
11403                 goto err_out;
11404         } else if(ret) {
11405                 error("%s is currently mounted, aborting", argv[optind]);
11406                 ret = -EBUSY;
11407                 goto err_out;
11408         }
11409
11410         /* only allow partial opening under repair mode */
11411         if (repair)
11412                 ctree_flags |= OPEN_CTREE_PARTIAL;
11413
11414         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11415                                   chunk_root_bytenr, ctree_flags);
11416         if (!info) {
11417                 error("cannot open file system");
11418                 ret = -EIO;
11419                 goto err_out;
11420         }
11421
11422         global_info = info;
11423         root = info->fs_root;
11424         if (clear_space_cache) {
11425                 if (btrfs_fs_compat_ro(info,
11426                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11427                         error(
11428                         "free space cache v2 detected, clearing not implemented");
11429                         ret = 1;
11430                         goto close_out;
11431                 }
11432                 printf("Clearing free space cache\n");
11433                 ret = clear_free_space_cache(info);
11434                 if (ret) {
11435                         error("failed to clear free space cache");
11436                         ret = 1;
11437                 } else {
11438                         printf("Free space cache cleared\n");
11439                 }
11440                 goto close_out;
11441         }
11442
11443         /*
11444          * repair mode will force us to commit transaction which
11445          * will make us fail to load log tree when mounting.
11446          */
11447         if (repair && btrfs_super_log_root(info->super_copy)) {
11448                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11449                 if (!ret) {
11450                         ret = 1;
11451                         goto close_out;
11452                 }
11453                 ret = zero_log_tree(root);
11454                 if (ret) {
11455                         error("failed to zero log tree: %d", ret);
11456                         goto close_out;
11457                 }
11458         }
11459
11460         uuid_unparse(info->super_copy->fsid, uuidbuf);
11461         if (qgroup_report) {
11462                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11463                        uuidbuf);
11464                 ret = qgroup_verify_all(info);
11465                 if (ret == 0)
11466                         report_qgroups(1);
11467                 goto close_out;
11468         }
11469         if (subvolid) {
11470                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11471                        subvolid, argv[optind], uuidbuf);
11472                 ret = print_extent_state(info, subvolid);
11473                 goto close_out;
11474         }
11475         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11476
11477         if (!extent_buffer_uptodate(info->tree_root->node) ||
11478             !extent_buffer_uptodate(info->dev_root->node) ||
11479             !extent_buffer_uptodate(info->chunk_root->node)) {
11480                 error("critical roots corrupted, unable to check the filesystem");
11481                 ret = -EIO;
11482                 goto close_out;
11483         }
11484
11485         if (init_extent_tree || init_csum_tree) {
11486                 struct btrfs_trans_handle *trans;
11487
11488                 trans = btrfs_start_transaction(info->extent_root, 0);
11489                 if (IS_ERR(trans)) {
11490                         error("error starting transaction");
11491                         ret = PTR_ERR(trans);
11492                         goto close_out;
11493                 }
11494
11495                 if (init_extent_tree) {
11496                         printf("Creating a new extent tree\n");
11497                         ret = reinit_extent_tree(trans, info);
11498                         if (ret)
11499                                 goto close_out;
11500                 }
11501
11502                 if (init_csum_tree) {
11503                         printf("Reinitialize checksum tree\n");
11504                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11505                         if (ret) {
11506                                 error("checksum tree initialization failed: %d",
11507                                                 ret);
11508                                 ret = -EIO;
11509                                 goto close_out;
11510                         }
11511
11512                         ret = fill_csum_tree(trans, info->csum_root,
11513                                              init_extent_tree);
11514                         if (ret) {
11515                                 error("checksum tree refilling failed: %d", ret);
11516                                 return -EIO;
11517                         }
11518                 }
11519                 /*
11520                  * Ok now we commit and run the normal fsck, which will add
11521                  * extent entries for all of the items it finds.
11522                  */
11523                 ret = btrfs_commit_transaction(trans, info->extent_root);
11524                 if (ret)
11525                         goto close_out;
11526         }
11527         if (!extent_buffer_uptodate(info->extent_root->node)) {
11528                 error("critical: extent_root, unable to check the filesystem");
11529                 ret = -EIO;
11530                 goto close_out;
11531         }
11532         if (!extent_buffer_uptodate(info->csum_root->node)) {
11533                 error("critical: csum_root, unable to check the filesystem");
11534                 ret = -EIO;
11535                 goto close_out;
11536         }
11537
11538         if (!ctx.progress_enabled)
11539                 printf("checking extents");
11540         if (check_mode == CHECK_MODE_LOWMEM)
11541                 ret = check_chunks_and_extents_v2(root);
11542         else
11543                 ret = check_chunks_and_extents(root);
11544         if (ret)
11545                 printf("Errors found in extent allocation tree or chunk allocation");
11546
11547         ret = repair_root_items(info);
11548         if (ret < 0)
11549                 goto close_out;
11550         if (repair) {
11551                 fprintf(stderr, "Fixed %d roots.\n", ret);
11552                 ret = 0;
11553         } else if (ret > 0) {
11554                 fprintf(stderr,
11555                        "Found %d roots with an outdated root item.\n",
11556                        ret);
11557                 fprintf(stderr,
11558                         "Please run a filesystem check with the option --repair to fix them.\n");
11559                 ret = 1;
11560                 goto close_out;
11561         }
11562
11563         if (!ctx.progress_enabled) {
11564                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11565                         fprintf(stderr, "checking free space tree\n");
11566                 else
11567                         fprintf(stderr, "checking free space cache\n");
11568         }
11569         ret = check_space_cache(root);
11570         if (ret)
11571                 goto out;
11572
11573         /*
11574          * We used to have to have these hole extents in between our real
11575          * extents so if we don't have this flag set we need to make sure there
11576          * are no gaps in the file extents for inodes, otherwise we can just
11577          * ignore it when this happens.
11578          */
11579         no_holes = btrfs_fs_incompat(root->fs_info,
11580                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11581         if (!ctx.progress_enabled)
11582                 fprintf(stderr, "checking fs roots\n");
11583         ret = check_fs_roots(root, &root_cache);
11584         if (ret)
11585                 goto out;
11586
11587         fprintf(stderr, "checking csums\n");
11588         ret = check_csums(root);
11589         if (ret)
11590                 goto out;
11591
11592         fprintf(stderr, "checking root refs\n");
11593         ret = check_root_refs(root, &root_cache);
11594         if (ret)
11595                 goto out;
11596
11597         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11598                 struct extent_buffer *eb;
11599
11600                 eb = list_first_entry(&root->fs_info->recow_ebs,
11601                                       struct extent_buffer, recow);
11602                 list_del_init(&eb->recow);
11603                 ret = recow_extent_buffer(root, eb);
11604                 if (ret)
11605                         break;
11606         }
11607
11608         while (!list_empty(&delete_items)) {
11609                 struct bad_item *bad;
11610
11611                 bad = list_first_entry(&delete_items, struct bad_item, list);
11612                 list_del_init(&bad->list);
11613                 if (repair)
11614                         ret = delete_bad_item(root, bad);
11615                 free(bad);
11616         }
11617
11618         if (info->quota_enabled) {
11619                 int err;
11620                 fprintf(stderr, "checking quota groups\n");
11621                 err = qgroup_verify_all(info);
11622                 if (err)
11623                         goto out;
11624                 report_qgroups(0);
11625                 err = repair_qgroups(info, &qgroups_repaired);
11626                 if (err)
11627                         goto out;
11628         }
11629
11630         if (!list_empty(&root->fs_info->recow_ebs)) {
11631                 error("transid errors in file system");
11632                 ret = 1;
11633         }
11634 out:
11635         /* Don't override original ret */
11636         if (!ret && qgroups_repaired)
11637                 ret = qgroups_repaired;
11638
11639         if (found_old_backref) { /*
11640                  * there was a disk format change when mixed
11641                  * backref was in testing tree. The old format
11642                  * existed about one week.
11643                  */
11644                 printf("\n * Found old mixed backref format. "
11645                        "The old format is not supported! *"
11646                        "\n * Please mount the FS in readonly mode, "
11647                        "backup data and re-format the FS. *\n\n");
11648                 ret = 1;
11649         }
11650         printf("found %llu bytes used err is %d\n",
11651                (unsigned long long)bytes_used, ret);
11652         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11653         printf("total tree bytes: %llu\n",
11654                (unsigned long long)total_btree_bytes);
11655         printf("total fs tree bytes: %llu\n",
11656                (unsigned long long)total_fs_tree_bytes);
11657         printf("total extent tree bytes: %llu\n",
11658                (unsigned long long)total_extent_tree_bytes);
11659         printf("btree space waste bytes: %llu\n",
11660                (unsigned long long)btree_space_waste);
11661         printf("file data blocks allocated: %llu\n referenced %llu\n",
11662                 (unsigned long long)data_bytes_allocated,
11663                 (unsigned long long)data_bytes_referenced);
11664
11665         free_qgroup_counts();
11666         free_root_recs_tree(&root_cache);
11667 close_out:
11668         close_ctree(root);
11669 err_out:
11670         if (ctx.progress_enabled)
11671                 task_deinit(ctx.info);
11672
11673         return ret;
11674 }