btrfs-progs: check: use on-stack path buffer in repair_btree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         u8 filetype;
189         u8 ref_type;
190         int errors;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         u8 filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              u8 filetype, u8 itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         u8 filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         trans = btrfs_start_transaction(root, 1);
2201         if (IS_ERR(trans))
2202                 return PTR_ERR(trans);
2203
2204         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205                 (unsigned long long)rec->ino);
2206
2207         btrfs_init_path(&path);
2208         key.objectid = backref->dir;
2209         key.type = BTRFS_DIR_INDEX_KEY;
2210         key.offset = backref->index;
2211         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2212         BUG_ON(ret);
2213
2214         leaf = path.nodes[0];
2215         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2216
2217         disk_key.objectid = cpu_to_le64(rec->ino);
2218         disk_key.type = BTRFS_INODE_ITEM_KEY;
2219         disk_key.offset = 0;
2220
2221         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223         btrfs_set_dir_data_len(leaf, dir_item, 0);
2224         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225         name_ptr = (unsigned long)(dir_item + 1);
2226         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227         btrfs_mark_buffer_dirty(leaf);
2228         btrfs_release_path(&path);
2229         btrfs_commit_transaction(trans, root);
2230
2231         backref->found_dir_index = 1;
2232         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233         BUG_ON(IS_ERR(dir_rec));
2234         if (!dir_rec)
2235                 return 0;
2236         dir_rec->found_size += backref->namelen;
2237         if (dir_rec->found_size == dir_rec->isize &&
2238             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240         if (dir_rec->found_size != dir_rec->isize)
2241                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2242
2243         return 0;
2244 }
2245
2246 static int delete_dir_index(struct btrfs_root *root,
2247                             struct cache_tree *inode_cache,
2248                             struct inode_record *rec,
2249                             struct inode_backref *backref)
2250 {
2251         struct btrfs_trans_handle *trans;
2252         struct btrfs_dir_item *di;
2253         struct btrfs_path path;
2254         int ret = 0;
2255
2256         trans = btrfs_start_transaction(root, 1);
2257         if (IS_ERR(trans))
2258                 return PTR_ERR(trans);
2259
2260         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261                 (unsigned long long)backref->dir,
2262                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263                 (unsigned long long)root->objectid);
2264
2265         btrfs_init_path(&path);
2266         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267                                     backref->name, backref->namelen,
2268                                     backref->index, -1);
2269         if (IS_ERR(di)) {
2270                 ret = PTR_ERR(di);
2271                 btrfs_release_path(&path);
2272                 btrfs_commit_transaction(trans, root);
2273                 if (ret == -ENOENT)
2274                         return 0;
2275                 return ret;
2276         }
2277
2278         if (!di)
2279                 ret = btrfs_del_item(trans, root, &path);
2280         else
2281                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2282         BUG_ON(ret);
2283         btrfs_release_path(&path);
2284         btrfs_commit_transaction(trans, root);
2285         return ret;
2286 }
2287
2288 static int create_inode_item(struct btrfs_root *root,
2289                              struct inode_record *rec,
2290                              struct inode_backref *backref, int root_dir)
2291 {
2292         struct btrfs_trans_handle *trans;
2293         struct btrfs_inode_item inode_item;
2294         time_t now = time(NULL);
2295         int ret;
2296
2297         trans = btrfs_start_transaction(root, 1);
2298         if (IS_ERR(trans)) {
2299                 ret = PTR_ERR(trans);
2300                 return ret;
2301         }
2302
2303         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304                 "be incomplete, please check permissions and content after "
2305                 "the fsck completes.\n", (unsigned long long)root->objectid,
2306                 (unsigned long long)rec->ino);
2307
2308         memset(&inode_item, 0, sizeof(inode_item));
2309         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310         if (root_dir)
2311                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312         else
2313                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315         if (rec->found_dir_item) {
2316                 if (rec->found_file_extent)
2317                         fprintf(stderr, "root %llu inode %llu has both a dir "
2318                                 "item and extents, unsure if it is a dir or a "
2319                                 "regular file so setting it as a directory\n",
2320                                 (unsigned long long)root->objectid,
2321                                 (unsigned long long)rec->ino);
2322                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324         } else if (!rec->found_dir_item) {
2325                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327         }
2328         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336
2337         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338         BUG_ON(ret);
2339         btrfs_commit_transaction(trans, root);
2340         return 0;
2341 }
2342
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344                                  struct inode_record *rec,
2345                                  struct cache_tree *inode_cache,
2346                                  int delete)
2347 {
2348         struct inode_backref *tmp, *backref;
2349         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2350         int ret = 0;
2351         int repaired = 0;
2352
2353         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354                 if (!delete && rec->ino == root_dirid) {
2355                         if (!rec->found_inode_item) {
2356                                 ret = create_inode_item(root, rec, backref, 1);
2357                                 if (ret)
2358                                         break;
2359                                 repaired++;
2360                         }
2361                 }
2362
2363                 /* Index 0 for root dir's are special, don't mess with it */
2364                 if (rec->ino == root_dirid && backref->index == 0)
2365                         continue;
2366
2367                 if (delete &&
2368                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2369                      (backref->found_dir_index && backref->found_inode_ref &&
2370                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371                         ret = delete_dir_index(root, inode_cache, rec, backref);
2372                         if (ret)
2373                                 break;
2374                         repaired++;
2375                         list_del(&backref->list);
2376                         free(backref);
2377                 }
2378
2379                 if (!delete && !backref->found_dir_index &&
2380                     backref->found_dir_item && backref->found_inode_ref) {
2381                         ret = add_missing_dir_index(root, inode_cache, rec,
2382                                                     backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         if (backref->found_dir_item &&
2387                             backref->found_dir_index &&
2388                             backref->found_dir_index) {
2389                                 if (!backref->errors &&
2390                                     backref->found_inode_ref) {
2391                                         list_del(&backref->list);
2392                                         free(backref);
2393                                 }
2394                         }
2395                 }
2396
2397                 if (!delete && (!backref->found_dir_index &&
2398                                 !backref->found_dir_item &&
2399                                 backref->found_inode_ref)) {
2400                         struct btrfs_trans_handle *trans;
2401                         struct btrfs_key location;
2402
2403                         ret = check_dir_conflict(root, backref->name,
2404                                                  backref->namelen,
2405                                                  backref->dir,
2406                                                  backref->index);
2407                         if (ret) {
2408                                 /*
2409                                  * let nlink fixing routine to handle it,
2410                                  * which can do it better.
2411                                  */
2412                                 ret = 0;
2413                                 break;
2414                         }
2415                         location.objectid = rec->ino;
2416                         location.type = BTRFS_INODE_ITEM_KEY;
2417                         location.offset = 0;
2418
2419                         trans = btrfs_start_transaction(root, 1);
2420                         if (IS_ERR(trans)) {
2421                                 ret = PTR_ERR(trans);
2422                                 break;
2423                         }
2424                         fprintf(stderr, "adding missing dir index/item pair "
2425                                 "for inode %llu\n",
2426                                 (unsigned long long)rec->ino);
2427                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2428                                                     backref->namelen,
2429                                                     backref->dir, &location,
2430                                                     imode_to_type(rec->imode),
2431                                                     backref->index);
2432                         BUG_ON(ret);
2433                         btrfs_commit_transaction(trans, root);
2434                         repaired++;
2435                 }
2436
2437                 if (!delete && (backref->found_inode_ref &&
2438                                 backref->found_dir_index &&
2439                                 backref->found_dir_item &&
2440                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441                                 !rec->found_inode_item)) {
2442                         ret = create_inode_item(root, rec, backref, 0);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                 }
2447
2448         }
2449         return ret ? ret : repaired;
2450 }
2451
2452 /*
2453  * To determine the file type for nlink/inode_item repair
2454  *
2455  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456  * Return -ENOENT if file type is not found.
2457  */
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2459 {
2460         struct inode_backref *backref;
2461
2462         /* For inode item recovered case */
2463         if (rec->found_inode_item) {
2464                 *type = imode_to_type(rec->imode);
2465                 return 0;
2466         }
2467
2468         list_for_each_entry(backref, &rec->backrefs, list) {
2469                 if (backref->found_dir_index || backref->found_dir_item) {
2470                         *type = backref->filetype;
2471                         return 0;
2472                 }
2473         }
2474         return -ENOENT;
2475 }
2476
2477 /*
2478  * To determine the file name for nlink repair
2479  *
2480  * Return 0 if file name is found, set name and namelen.
2481  * Return -ENOENT if file name is not found.
2482  */
2483 static int find_file_name(struct inode_record *rec,
2484                           char *name, int *namelen)
2485 {
2486         struct inode_backref *backref;
2487
2488         list_for_each_entry(backref, &rec->backrefs, list) {
2489                 if (backref->found_dir_index || backref->found_dir_item ||
2490                     backref->found_inode_ref) {
2491                         memcpy(name, backref->name, backref->namelen);
2492                         *namelen = backref->namelen;
2493                         return 0;
2494                 }
2495         }
2496         return -ENOENT;
2497 }
2498
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501                        struct btrfs_root *root,
2502                        struct btrfs_path *path,
2503                        struct inode_record *rec)
2504 {
2505         struct inode_backref *backref;
2506         struct inode_backref *tmp;
2507         struct btrfs_key key;
2508         struct btrfs_inode_item *inode_item;
2509         int ret = 0;
2510
2511         /* We don't believe this either, reset it and iterate backref */
2512         rec->found_link = 0;
2513
2514         /* Remove all backref including the valid ones */
2515         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517                                    backref->index, backref->name,
2518                                    backref->namelen, 0);
2519                 if (ret < 0)
2520                         goto out;
2521
2522                 /* remove invalid backref, so it won't be added back */
2523                 if (!(backref->found_dir_index &&
2524                       backref->found_dir_item &&
2525                       backref->found_inode_ref)) {
2526                         list_del(&backref->list);
2527                         free(backref);
2528                 } else {
2529                         rec->found_link++;
2530                 }
2531         }
2532
2533         /* Set nlink to 0 */
2534         key.objectid = rec->ino;
2535         key.type = BTRFS_INODE_ITEM_KEY;
2536         key.offset = 0;
2537         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2538         if (ret < 0)
2539                 goto out;
2540         if (ret > 0) {
2541                 ret = -ENOENT;
2542                 goto out;
2543         }
2544         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545                                     struct btrfs_inode_item);
2546         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547         btrfs_mark_buffer_dirty(path->nodes[0]);
2548         btrfs_release_path(path);
2549
2550         /*
2551          * Add back valid inode_ref/dir_item/dir_index,
2552          * add_link() will handle the nlink inc, so new nlink must be correct
2553          */
2554         list_for_each_entry(backref, &rec->backrefs, list) {
2555                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556                                      backref->name, backref->namelen,
2557                                      backref->filetype, &backref->index, 1);
2558                 if (ret < 0)
2559                         goto out;
2560         }
2561 out:
2562         btrfs_release_path(path);
2563         return ret;
2564 }
2565
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567                                struct btrfs_root *root,
2568                                struct btrfs_path *path,
2569                                struct inode_record *rec)
2570 {
2571         char *dir_name = "lost+found";
2572         char namebuf[BTRFS_NAME_LEN] = {0};
2573         u64 lost_found_ino;
2574         u32 mode = 0700;
2575         u8 type = 0;
2576         int namelen = 0;
2577         int name_recovered = 0;
2578         int type_recovered = 0;
2579         int ret = 0;
2580
2581         /*
2582          * Get file name and type first before these invalid inode ref
2583          * are deleted by remove_all_invalid_backref()
2584          */
2585         name_recovered = !find_file_name(rec, namebuf, &namelen);
2586         type_recovered = !find_file_type(rec, &type);
2587
2588         if (!name_recovered) {
2589                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590                        rec->ino, rec->ino);
2591                 namelen = count_digits(rec->ino);
2592                 sprintf(namebuf, "%llu", rec->ino);
2593                 name_recovered = 1;
2594         }
2595         if (!type_recovered) {
2596                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597                        rec->ino);
2598                 type = BTRFS_FT_REG_FILE;
2599                 type_recovered = 1;
2600         }
2601
2602         ret = reset_nlink(trans, root, path, rec);
2603         if (ret < 0) {
2604                 fprintf(stderr,
2605                         "Failed to reset nlink for inode %llu: %s\n",
2606                         rec->ino, strerror(-ret));
2607                 goto out;
2608         }
2609
2610         if (rec->found_link == 0) {
2611                 lost_found_ino = root->highest_inode;
2612                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2613                         ret = -EOVERFLOW;
2614                         goto out;
2615                 }
2616                 lost_found_ino++;
2617                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2619                                   mode);
2620                 if (ret < 0) {
2621                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622                                 dir_name, strerror(-ret));
2623                         goto out;
2624                 }
2625                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626                                      namebuf, namelen, type, NULL, 1);
2627                 /*
2628                  * Add ".INO" suffix several times to handle case where
2629                  * "FILENAME.INO" is already taken by another file.
2630                  */
2631                 while (ret == -EEXIST) {
2632                         /*
2633                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634                          */
2635                         if (namelen + count_digits(rec->ino) + 1 >
2636                             BTRFS_NAME_LEN) {
2637                                 ret = -EFBIG;
2638                                 goto out;
2639                         }
2640                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641                                  ".%llu", rec->ino);
2642                         namelen += count_digits(rec->ino) + 1;
2643                         ret = btrfs_add_link(trans, root, rec->ino,
2644                                              lost_found_ino, namebuf,
2645                                              namelen, type, NULL, 1);
2646                 }
2647                 if (ret < 0) {
2648                         fprintf(stderr,
2649                                 "Failed to link the inode %llu to %s dir: %s\n",
2650                                 rec->ino, dir_name, strerror(-ret));
2651                         goto out;
2652                 }
2653                 /*
2654                  * Just increase the found_link, don't actually add the
2655                  * backref. This will make things easier and this inode
2656                  * record will be freed after the repair is done.
2657                  * So fsck will not report problem about this inode.
2658                  */
2659                 rec->found_link++;
2660                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661                        namelen, namebuf, dir_name);
2662         }
2663         printf("Fixed the nlink of inode %llu\n", rec->ino);
2664 out:
2665         /*
2666          * Clear the flag anyway, or we will loop forever for the same inode
2667          * as it will not be removed from the bad inode list and the dead loop
2668          * happens.
2669          */
2670         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671         btrfs_release_path(path);
2672         return ret;
2673 }
2674
2675 /*
2676  * Check if there is any normal(reg or prealloc) file extent for given
2677  * ino.
2678  * This is used to determine the file type when neither its dir_index/item or
2679  * inode_item exists.
2680  *
2681  * This will *NOT* report error, if any error happens, just consider it does
2682  * not have any normal file extent.
2683  */
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 {
2686         struct btrfs_path path;
2687         struct btrfs_key key;
2688         struct btrfs_key found_key;
2689         struct btrfs_file_extent_item *fi;
2690         u8 type;
2691         int ret = 0;
2692
2693         btrfs_init_path(&path);
2694         key.objectid = ino;
2695         key.type = BTRFS_EXTENT_DATA_KEY;
2696         key.offset = 0;
2697
2698         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2699         if (ret < 0) {
2700                 ret = 0;
2701                 goto out;
2702         }
2703         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704                 ret = btrfs_next_leaf(root, &path);
2705                 if (ret) {
2706                         ret = 0;
2707                         goto out;
2708                 }
2709         }
2710         while (1) {
2711                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2712                                       path.slots[0]);
2713                 if (found_key.objectid != ino ||
2714                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2715                         break;
2716                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717                                     struct btrfs_file_extent_item);
2718                 type = btrfs_file_extent_type(path.nodes[0], fi);
2719                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2720                         ret = 1;
2721                         goto out;
2722                 }
2723         }
2724 out:
2725         btrfs_release_path(&path);
2726         return ret;
2727 }
2728
2729 static u32 btrfs_type_to_imode(u8 type)
2730 {
2731         static u32 imode_by_btrfs_type[] = {
2732                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2733                 [BTRFS_FT_DIR]          = S_IFDIR,
2734                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2735                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2736                 [BTRFS_FT_FIFO]         = S_IFIFO,
2737                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2738                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2739         };
2740
2741         return imode_by_btrfs_type[(type)];
2742 }
2743
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745                                 struct btrfs_root *root,
2746                                 struct btrfs_path *path,
2747                                 struct inode_record *rec)
2748 {
2749         u8 filetype;
2750         u32 mode = 0700;
2751         int type_recovered = 0;
2752         int ret = 0;
2753
2754         printf("Trying to rebuild inode:%llu\n", rec->ino);
2755
2756         type_recovered = !find_file_type(rec, &filetype);
2757
2758         /*
2759          * Try to determine inode type if type not found.
2760          *
2761          * For found regular file extent, it must be FILE.
2762          * For found dir_item/index, it must be DIR.
2763          *
2764          * For undetermined one, use FILE as fallback.
2765          *
2766          * TODO:
2767          * 1. If found backref(inode_index/item is already handled) to it,
2768          *    it must be DIR.
2769          *    Need new inode-inode ref structure to allow search for that.
2770          */
2771         if (!type_recovered) {
2772                 if (rec->found_file_extent &&
2773                     find_normal_file_extent(root, rec->ino)) {
2774                         type_recovered = 1;
2775                         filetype = BTRFS_FT_REG_FILE;
2776                 } else if (rec->found_dir_item) {
2777                         type_recovered = 1;
2778                         filetype = BTRFS_FT_DIR;
2779                 } else if (!list_empty(&rec->orphan_extents)) {
2780                         type_recovered = 1;
2781                         filetype = BTRFS_FT_REG_FILE;
2782                 } else{
2783                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2784                                rec->ino);
2785                         type_recovered = 1;
2786                         filetype = BTRFS_FT_REG_FILE;
2787                 }
2788         }
2789
2790         ret = btrfs_new_inode(trans, root, rec->ino,
2791                               mode | btrfs_type_to_imode(filetype));
2792         if (ret < 0)
2793                 goto out;
2794
2795         /*
2796          * Here inode rebuild is done, we only rebuild the inode item,
2797          * don't repair the nlink(like move to lost+found).
2798          * That is the job of nlink repair.
2799          *
2800          * We just fill the record and return
2801          */
2802         rec->found_dir_item = 1;
2803         rec->imode = mode | btrfs_type_to_imode(filetype);
2804         rec->nlink = 0;
2805         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806         /* Ensure the inode_nlinks repair function will be called */
2807         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2808 out:
2809         return ret;
2810 }
2811
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813                                       struct btrfs_root *root,
2814                                       struct btrfs_path *path,
2815                                       struct inode_record *rec)
2816 {
2817         struct orphan_data_extent *orphan;
2818         struct orphan_data_extent *tmp;
2819         int ret = 0;
2820
2821         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2822                 /*
2823                  * Check for conflicting file extents
2824                  *
2825                  * Here we don't know whether the extents is compressed or not,
2826                  * so we can only assume it not compressed nor data offset,
2827                  * and use its disk_len as extent length.
2828                  */
2829                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830                                        orphan->offset, orphan->disk_len, 0);
2831                 btrfs_release_path(path);
2832                 if (ret < 0)
2833                         goto out;
2834                 if (!ret) {
2835                         fprintf(stderr,
2836                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837                                 orphan->disk_bytenr, orphan->disk_len);
2838                         ret = btrfs_free_extent(trans,
2839                                         root->fs_info->extent_root,
2840                                         orphan->disk_bytenr, orphan->disk_len,
2841                                         0, root->objectid, orphan->objectid,
2842                                         orphan->offset);
2843                         if (ret < 0)
2844                                 goto out;
2845                 }
2846                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847                                 orphan->offset, orphan->disk_bytenr,
2848                                 orphan->disk_len, orphan->disk_len);
2849                 if (ret < 0)
2850                         goto out;
2851
2852                 /* Update file size info */
2853                 rec->found_size += orphan->disk_len;
2854                 if (rec->found_size == rec->nbytes)
2855                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2856
2857                 /* Update the file extent hole info too */
2858                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2859                                            orphan->disk_len);
2860                 if (ret < 0)
2861                         goto out;
2862                 if (RB_EMPTY_ROOT(&rec->holes))
2863                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2864
2865                 list_del(&orphan->list);
2866                 free(orphan);
2867         }
2868         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2869 out:
2870         return ret;
2871 }
2872
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874                                         struct btrfs_root *root,
2875                                         struct btrfs_path *path,
2876                                         struct inode_record *rec)
2877 {
2878         struct rb_node *node;
2879         struct file_extent_hole *hole;
2880         int found = 0;
2881         int ret = 0;
2882
2883         node = rb_first(&rec->holes);
2884
2885         while (node) {
2886                 found = 1;
2887                 hole = rb_entry(node, struct file_extent_hole, node);
2888                 ret = btrfs_punch_hole(trans, root, rec->ino,
2889                                        hole->start, hole->len);
2890                 if (ret < 0)
2891                         goto out;
2892                 ret = del_file_extent_hole(&rec->holes, hole->start,
2893                                            hole->len);
2894                 if (ret < 0)
2895                         goto out;
2896                 if (RB_EMPTY_ROOT(&rec->holes))
2897                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898                 node = rb_first(&rec->holes);
2899         }
2900         /* special case for a file losing all its file extent */
2901         if (!found) {
2902                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903                                        round_up(rec->isize, root->sectorsize));
2904                 if (ret < 0)
2905                         goto out;
2906         }
2907         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908                rec->ino, root->objectid);
2909 out:
2910         return ret;
2911 }
2912
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2914 {
2915         struct btrfs_trans_handle *trans;
2916         struct btrfs_path path;
2917         int ret = 0;
2918
2919         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920                              I_ERR_NO_ORPHAN_ITEM |
2921                              I_ERR_LINK_COUNT_WRONG |
2922                              I_ERR_NO_INODE_ITEM |
2923                              I_ERR_FILE_EXTENT_ORPHAN |
2924                              I_ERR_FILE_EXTENT_DISCOUNT|
2925                              I_ERR_FILE_NBYTES_WRONG)))
2926                 return rec->errors;
2927
2928         /*
2929          * For nlink repair, it may create a dir and add link, so
2930          * 2 for parent(256)'s dir_index and dir_item
2931          * 2 for lost+found dir's inode_item and inode_ref
2932          * 1 for the new inode_ref of the file
2933          * 2 for lost+found dir's dir_index and dir_item for the file
2934          */
2935         trans = btrfs_start_transaction(root, 7);
2936         if (IS_ERR(trans))
2937                 return PTR_ERR(trans);
2938
2939         btrfs_init_path(&path);
2940         if (rec->errors & I_ERR_NO_INODE_ITEM)
2941                 ret = repair_inode_no_item(trans, root, &path, rec);
2942         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947                 ret = repair_inode_isize(trans, root, &path, rec);
2948         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951                 ret = repair_inode_nlinks(trans, root, &path, rec);
2952         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953                 ret = repair_inode_nbytes(trans, root, &path, rec);
2954         btrfs_commit_transaction(trans, root);
2955         btrfs_release_path(&path);
2956         return ret;
2957 }
2958
2959 static int check_inode_recs(struct btrfs_root *root,
2960                             struct cache_tree *inode_cache)
2961 {
2962         struct cache_extent *cache;
2963         struct ptr_node *node;
2964         struct inode_record *rec;
2965         struct inode_backref *backref;
2966         int stage = 0;
2967         int ret = 0;
2968         int err = 0;
2969         u64 error = 0;
2970         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2971
2972         if (btrfs_root_refs(&root->root_item) == 0) {
2973                 if (!cache_tree_empty(inode_cache))
2974                         fprintf(stderr, "warning line %d\n", __LINE__);
2975                 return 0;
2976         }
2977
2978         /*
2979          * We need to record the highest inode number for later 'lost+found'
2980          * dir creation.
2981          * We must select an ino not used/referred by any existing inode, or
2982          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983          * this may cause 'lost+found' dir has wrong nlinks.
2984          */
2985         cache = last_cache_extent(inode_cache);
2986         if (cache) {
2987                 node = container_of(cache, struct ptr_node, cache);
2988                 rec = node->data;
2989                 if (rec->ino > root->highest_inode)
2990                         root->highest_inode = rec->ino;
2991         }
2992
2993         /*
2994          * We need to repair backrefs first because we could change some of the
2995          * errors in the inode recs.
2996          *
2997          * We also need to go through and delete invalid backrefs first and then
2998          * add the correct ones second.  We do this because we may get EEXIST
2999          * when adding back the correct index because we hadn't yet deleted the
3000          * invalid index.
3001          *
3002          * For example, if we were missing a dir index then the directories
3003          * isize would be wrong, so if we fixed the isize to what we thought it
3004          * would be and then fixed the backref we'd still have a invalid fs, so
3005          * we need to add back the dir index and then check to see if the isize
3006          * is still wrong.
3007          */
3008         while (stage < 3) {
3009                 stage++;
3010                 if (stage == 3 && !err)
3011                         break;
3012
3013                 cache = search_cache_extent(inode_cache, 0);
3014                 while (repair && cache) {
3015                         node = container_of(cache, struct ptr_node, cache);
3016                         rec = node->data;
3017                         cache = next_cache_extent(cache);
3018
3019                         /* Need to free everything up and rescan */
3020                         if (stage == 3) {
3021                                 remove_cache_extent(inode_cache, &node->cache);
3022                                 free(node);
3023                                 free_inode_rec(rec);
3024                                 continue;
3025                         }
3026
3027                         if (list_empty(&rec->backrefs))
3028                                 continue;
3029
3030                         ret = repair_inode_backrefs(root, rec, inode_cache,
3031                                                     stage == 1);
3032                         if (ret < 0) {
3033                                 err = ret;
3034                                 stage = 2;
3035                                 break;
3036                         } if (ret > 0) {
3037                                 err = -EAGAIN;
3038                         }
3039                 }
3040         }
3041         if (err)
3042                 return err;
3043
3044         rec = get_inode_rec(inode_cache, root_dirid, 0);
3045         BUG_ON(IS_ERR(rec));
3046         if (rec) {
3047                 ret = check_root_dir(rec);
3048                 if (ret) {
3049                         fprintf(stderr, "root %llu root dir %llu error\n",
3050                                 (unsigned long long)root->root_key.objectid,
3051                                 (unsigned long long)root_dirid);
3052                         print_inode_error(root, rec);
3053                         error++;
3054                 }
3055         } else {
3056                 if (repair) {
3057                         struct btrfs_trans_handle *trans;
3058
3059                         trans = btrfs_start_transaction(root, 1);
3060                         if (IS_ERR(trans)) {
3061                                 err = PTR_ERR(trans);
3062                                 return err;
3063                         }
3064
3065                         fprintf(stderr,
3066                                 "root %llu missing its root dir, recreating\n",
3067                                 (unsigned long long)root->objectid);
3068
3069                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3070                         BUG_ON(ret);
3071
3072                         btrfs_commit_transaction(trans, root);
3073                         return -EAGAIN;
3074                 }
3075
3076                 fprintf(stderr, "root %llu root dir %llu not found\n",
3077                         (unsigned long long)root->root_key.objectid,
3078                         (unsigned long long)root_dirid);
3079         }
3080
3081         while (1) {
3082                 cache = search_cache_extent(inode_cache, 0);
3083                 if (!cache)
3084                         break;
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 remove_cache_extent(inode_cache, &node->cache);
3088                 free(node);
3089                 if (rec->ino == root_dirid ||
3090                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091                         free_inode_rec(rec);
3092                         continue;
3093                 }
3094
3095                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096                         ret = check_orphan_item(root, rec->ino);
3097                         if (ret == 0)
3098                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099                         if (can_free_inode_rec(rec)) {
3100                                 free_inode_rec(rec);
3101                                 continue;
3102                         }
3103                 }
3104
3105                 if (!rec->found_inode_item)
3106                         rec->errors |= I_ERR_NO_INODE_ITEM;
3107                 if (rec->found_link != rec->nlink)
3108                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3109                 if (repair) {
3110                         ret = try_repair_inode(root, rec);
3111                         if (ret == 0 && can_free_inode_rec(rec)) {
3112                                 free_inode_rec(rec);
3113                                 continue;
3114                         }
3115                         ret = 0;
3116                 }
3117
3118                 if (!(repair && ret == 0))
3119                         error++;
3120                 print_inode_error(root, rec);
3121                 list_for_each_entry(backref, &rec->backrefs, list) {
3122                         if (!backref->found_dir_item)
3123                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124                         if (!backref->found_dir_index)
3125                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126                         if (!backref->found_inode_ref)
3127                                 backref->errors |= REF_ERR_NO_INODE_REF;
3128                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129                                 " namelen %u name %s filetype %d errors %x",
3130                                 (unsigned long long)backref->dir,
3131                                 (unsigned long long)backref->index,
3132                                 backref->namelen, backref->name,
3133                                 backref->filetype, backref->errors);
3134                         print_ref_error(backref->errors);
3135                 }
3136                 free_inode_rec(rec);
3137         }
3138         return (error > 0) ? -1 : 0;
3139 }
3140
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3142                                         u64 objectid)
3143 {
3144         struct cache_extent *cache;
3145         struct root_record *rec = NULL;
3146         int ret;
3147
3148         cache = lookup_cache_extent(root_cache, objectid, 1);
3149         if (cache) {
3150                 rec = container_of(cache, struct root_record, cache);
3151         } else {
3152                 rec = calloc(1, sizeof(*rec));
3153                 if (!rec)
3154                         return ERR_PTR(-ENOMEM);
3155                 rec->objectid = objectid;
3156                 INIT_LIST_HEAD(&rec->backrefs);
3157                 rec->cache.start = objectid;
3158                 rec->cache.size = 1;
3159
3160                 ret = insert_cache_extent(root_cache, &rec->cache);
3161                 if (ret)
3162                         return ERR_PTR(-EEXIST);
3163         }
3164         return rec;
3165 }
3166
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168                                              u64 ref_root, u64 dir, u64 index,
3169                                              const char *name, int namelen)
3170 {
3171         struct root_backref *backref;
3172
3173         list_for_each_entry(backref, &rec->backrefs, list) {
3174                 if (backref->ref_root != ref_root || backref->dir != dir ||
3175                     backref->namelen != namelen)
3176                         continue;
3177                 if (memcmp(name, backref->name, namelen))
3178                         continue;
3179                 return backref;
3180         }
3181
3182         backref = calloc(1, sizeof(*backref) + namelen + 1);
3183         if (!backref)
3184                 return NULL;
3185         backref->ref_root = ref_root;
3186         backref->dir = dir;
3187         backref->index = index;
3188         backref->namelen = namelen;
3189         memcpy(backref->name, name, namelen);
3190         backref->name[namelen] = '\0';
3191         list_add_tail(&backref->list, &rec->backrefs);
3192         return backref;
3193 }
3194
3195 static void free_root_record(struct cache_extent *cache)
3196 {
3197         struct root_record *rec;
3198         struct root_backref *backref;
3199
3200         rec = container_of(cache, struct root_record, cache);
3201         while (!list_empty(&rec->backrefs)) {
3202                 backref = to_root_backref(rec->backrefs.next);
3203                 list_del(&backref->list);
3204                 free(backref);
3205         }
3206
3207         free(rec);
3208 }
3209
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3211
3212 static int add_root_backref(struct cache_tree *root_cache,
3213                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3214                             const char *name, int namelen,
3215                             int item_type, int errors)
3216 {
3217         struct root_record *rec;
3218         struct root_backref *backref;
3219
3220         rec = get_root_rec(root_cache, root_id);
3221         BUG_ON(IS_ERR(rec));
3222         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3223         BUG_ON(!backref);
3224
3225         backref->errors |= errors;
3226
3227         if (item_type != BTRFS_DIR_ITEM_KEY) {
3228                 if (backref->found_dir_index || backref->found_back_ref ||
3229                     backref->found_forward_ref) {
3230                         if (backref->index != index)
3231                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3232                 } else {
3233                         backref->index = index;
3234                 }
3235         }
3236
3237         if (item_type == BTRFS_DIR_ITEM_KEY) {
3238                 if (backref->found_forward_ref)
3239                         rec->found_ref++;
3240                 backref->found_dir_item = 1;
3241         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242                 backref->found_dir_index = 1;
3243         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244                 if (backref->found_forward_ref)
3245                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3246                 else if (backref->found_dir_item)
3247                         rec->found_ref++;
3248                 backref->found_forward_ref = 1;
3249         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250                 if (backref->found_back_ref)
3251                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252                 backref->found_back_ref = 1;
3253         } else {
3254                 BUG_ON(1);
3255         }
3256
3257         if (backref->found_forward_ref && backref->found_dir_item)
3258                 backref->reachable = 1;
3259         return 0;
3260 }
3261
3262 static int merge_root_recs(struct btrfs_root *root,
3263                            struct cache_tree *src_cache,
3264                            struct cache_tree *dst_cache)
3265 {
3266         struct cache_extent *cache;
3267         struct ptr_node *node;
3268         struct inode_record *rec;
3269         struct inode_backref *backref;
3270         int ret = 0;
3271
3272         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273                 free_inode_recs_tree(src_cache);
3274                 return 0;
3275         }
3276
3277         while (1) {
3278                 cache = search_cache_extent(src_cache, 0);
3279                 if (!cache)
3280                         break;
3281                 node = container_of(cache, struct ptr_node, cache);
3282                 rec = node->data;
3283                 remove_cache_extent(src_cache, &node->cache);
3284                 free(node);
3285
3286                 ret = is_child_root(root, root->objectid, rec->ino);
3287                 if (ret < 0)
3288                         break;
3289                 else if (ret == 0)
3290                         goto skip;
3291
3292                 list_for_each_entry(backref, &rec->backrefs, list) {
3293                         BUG_ON(backref->found_inode_ref);
3294                         if (backref->found_dir_item)
3295                                 add_root_backref(dst_cache, rec->ino,
3296                                         root->root_key.objectid, backref->dir,
3297                                         backref->index, backref->name,
3298                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3299                                         backref->errors);
3300                         if (backref->found_dir_index)
3301                                 add_root_backref(dst_cache, rec->ino,
3302                                         root->root_key.objectid, backref->dir,
3303                                         backref->index, backref->name,
3304                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3305                                         backref->errors);
3306                 }
3307 skip:
3308                 free_inode_rec(rec);
3309         }
3310         if (ret < 0)
3311                 return ret;
3312         return 0;
3313 }
3314
3315 static int check_root_refs(struct btrfs_root *root,
3316                            struct cache_tree *root_cache)
3317 {
3318         struct root_record *rec;
3319         struct root_record *ref_root;
3320         struct root_backref *backref;
3321         struct cache_extent *cache;
3322         int loop = 1;
3323         int ret;
3324         int error;
3325         int errors = 0;
3326
3327         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328         BUG_ON(IS_ERR(rec));
3329         rec->found_ref = 1;
3330
3331         /* fixme: this can not detect circular references */
3332         while (loop) {
3333                 loop = 0;
3334                 cache = search_cache_extent(root_cache, 0);
3335                 while (1) {
3336                         if (!cache)
3337                                 break;
3338                         rec = container_of(cache, struct root_record, cache);
3339                         cache = next_cache_extent(cache);
3340
3341                         if (rec->found_ref == 0)
3342                                 continue;
3343
3344                         list_for_each_entry(backref, &rec->backrefs, list) {
3345                                 if (!backref->reachable)
3346                                         continue;
3347
3348                                 ref_root = get_root_rec(root_cache,
3349                                                         backref->ref_root);
3350                                 BUG_ON(IS_ERR(ref_root));
3351                                 if (ref_root->found_ref > 0)
3352                                         continue;
3353
3354                                 backref->reachable = 0;
3355                                 rec->found_ref--;
3356                                 if (rec->found_ref == 0)
3357                                         loop = 1;
3358                         }
3359                 }
3360         }
3361
3362         cache = search_cache_extent(root_cache, 0);
3363         while (1) {
3364                 if (!cache)
3365                         break;
3366                 rec = container_of(cache, struct root_record, cache);
3367                 cache = next_cache_extent(cache);
3368
3369                 if (rec->found_ref == 0 &&
3370                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372                         ret = check_orphan_item(root->fs_info->tree_root,
3373                                                 rec->objectid);
3374                         if (ret == 0)
3375                                 continue;
3376
3377                         /*
3378                          * If we don't have a root item then we likely just have
3379                          * a dir item in a snapshot for this root but no actual
3380                          * ref key or anything so it's meaningless.
3381                          */
3382                         if (!rec->found_root_item)
3383                                 continue;
3384                         errors++;
3385                         fprintf(stderr, "fs tree %llu not referenced\n",
3386                                 (unsigned long long)rec->objectid);
3387                 }
3388
3389                 error = 0;
3390                 if (rec->found_ref > 0 && !rec->found_root_item)
3391                         error = 1;
3392                 list_for_each_entry(backref, &rec->backrefs, list) {
3393                         if (!backref->found_dir_item)
3394                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395                         if (!backref->found_dir_index)
3396                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397                         if (!backref->found_back_ref)
3398                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399                         if (!backref->found_forward_ref)
3400                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3401                         if (backref->reachable && backref->errors)
3402                                 error = 1;
3403                 }
3404                 if (!error)
3405                         continue;
3406
3407                 errors++;
3408                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409                         (unsigned long long)rec->objectid, rec->found_ref,
3410                          rec->found_root_item ? "" : "not found");
3411
3412                 list_for_each_entry(backref, &rec->backrefs, list) {
3413                         if (!backref->reachable)
3414                                 continue;
3415                         if (!backref->errors && rec->found_root_item)
3416                                 continue;
3417                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418                                 " index %llu namelen %u name %s errors %x\n",
3419                                 (unsigned long long)backref->ref_root,
3420                                 (unsigned long long)backref->dir,
3421                                 (unsigned long long)backref->index,
3422                                 backref->namelen, backref->name,
3423                                 backref->errors);
3424                         print_ref_error(backref->errors);
3425                 }
3426         }
3427         return errors > 0 ? 1 : 0;
3428 }
3429
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431                             struct btrfs_key *key,
3432                             struct cache_tree *root_cache)
3433 {
3434         u64 dirid;
3435         u64 index;
3436         u32 len;
3437         u32 name_len;
3438         struct btrfs_root_ref *ref;
3439         char namebuf[BTRFS_NAME_LEN];
3440         int error;
3441
3442         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3443
3444         dirid = btrfs_root_ref_dirid(eb, ref);
3445         index = btrfs_root_ref_sequence(eb, ref);
3446         name_len = btrfs_root_ref_name_len(eb, ref);
3447
3448         if (name_len <= BTRFS_NAME_LEN) {
3449                 len = name_len;
3450                 error = 0;
3451         } else {
3452                 len = BTRFS_NAME_LEN;
3453                 error = REF_ERR_NAME_TOO_LONG;
3454         }
3455         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3456
3457         if (key->type == BTRFS_ROOT_REF_KEY) {
3458                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459                                  index, namebuf, len, key->type, error);
3460         } else {
3461                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462                                  index, namebuf, len, key->type, error);
3463         }
3464         return 0;
3465 }
3466
3467 static void free_corrupt_block(struct cache_extent *cache)
3468 {
3469         struct btrfs_corrupt_block *corrupt;
3470
3471         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3472         free(corrupt);
3473 }
3474
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3476
3477 /*
3478  * Repair the btree of the given root.
3479  *
3480  * The fix is to remove the node key in corrupt_blocks cache_tree.
3481  * and rebalance the tree.
3482  * After the fix, the btree should be writeable.
3483  */
3484 static int repair_btree(struct btrfs_root *root,
3485                         struct cache_tree *corrupt_blocks)
3486 {
3487         struct btrfs_trans_handle *trans;
3488         struct btrfs_path path;
3489         struct btrfs_corrupt_block *corrupt;
3490         struct cache_extent *cache;
3491         struct btrfs_key key;
3492         u64 offset;
3493         int level;
3494         int ret = 0;
3495
3496         if (cache_tree_empty(corrupt_blocks))
3497                 return 0;
3498
3499         trans = btrfs_start_transaction(root, 1);
3500         if (IS_ERR(trans)) {
3501                 ret = PTR_ERR(trans);
3502                 fprintf(stderr, "Error starting transaction: %s\n",
3503                         strerror(-ret));
3504                 return ret;
3505         }
3506         btrfs_init_path(&path);
3507         cache = first_cache_extent(corrupt_blocks);
3508         while (cache) {
3509                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3510                                        cache);
3511                 level = corrupt->level;
3512                 path.lowest_level = level;
3513                 key.objectid = corrupt->key.objectid;
3514                 key.type = corrupt->key.type;
3515                 key.offset = corrupt->key.offset;
3516
3517                 /*
3518                  * Here we don't want to do any tree balance, since it may
3519                  * cause a balance with corrupted brother leaf/node,
3520                  * so ins_len set to 0 here.
3521                  * Balance will be done after all corrupt node/leaf is deleted.
3522                  */
3523                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3524                 if (ret < 0)
3525                         goto out;
3526                 offset = btrfs_node_blockptr(path.nodes[level],
3527                                              path.slots[level]);
3528
3529                 /* Remove the ptr */
3530                 ret = btrfs_del_ptr(trans, root, &path, level,
3531                                     path.slots[level]);
3532                 if (ret < 0)
3533                         goto out;
3534                 /*
3535                  * Remove the corresponding extent
3536                  * return value is not concerned.
3537                  */
3538                 btrfs_release_path(&path);
3539                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3540                                         0, root->root_key.objectid,
3541                                         level - 1, 0);
3542                 cache = next_cache_extent(cache);
3543         }
3544
3545         /* Balance the btree using btrfs_search_slot() */
3546         cache = first_cache_extent(corrupt_blocks);
3547         while (cache) {
3548                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3549                                        cache);
3550                 memcpy(&key, &corrupt->key, sizeof(key));
3551                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3552                 if (ret < 0)
3553                         goto out;
3554                 /* return will always >0 since it won't find the item */
3555                 ret = 0;
3556                 btrfs_release_path(&path);
3557                 cache = next_cache_extent(cache);
3558         }
3559 out:
3560         btrfs_commit_transaction(trans, root);
3561         btrfs_release_path(&path);
3562         return ret;
3563 }
3564
3565 static int check_fs_root(struct btrfs_root *root,
3566                          struct cache_tree *root_cache,
3567                          struct walk_control *wc)
3568 {
3569         int ret = 0;
3570         int err = 0;
3571         int wret;
3572         int level;
3573         struct btrfs_path path;
3574         struct shared_node root_node;
3575         struct root_record *rec;
3576         struct btrfs_root_item *root_item = &root->root_item;
3577         struct cache_tree corrupt_blocks;
3578         struct orphan_data_extent *orphan;
3579         struct orphan_data_extent *tmp;
3580         enum btrfs_tree_block_status status;
3581         struct node_refs nrefs;
3582
3583         /*
3584          * Reuse the corrupt_block cache tree to record corrupted tree block
3585          *
3586          * Unlike the usage in extent tree check, here we do it in a per
3587          * fs/subvol tree base.
3588          */
3589         cache_tree_init(&corrupt_blocks);
3590         root->fs_info->corrupt_blocks = &corrupt_blocks;
3591
3592         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3593                 rec = get_root_rec(root_cache, root->root_key.objectid);
3594                 BUG_ON(IS_ERR(rec));
3595                 if (btrfs_root_refs(root_item) > 0)
3596                         rec->found_root_item = 1;
3597         }
3598
3599         btrfs_init_path(&path);
3600         memset(&root_node, 0, sizeof(root_node));
3601         cache_tree_init(&root_node.root_cache);
3602         cache_tree_init(&root_node.inode_cache);
3603         memset(&nrefs, 0, sizeof(nrefs));
3604
3605         /* Move the orphan extent record to corresponding inode_record */
3606         list_for_each_entry_safe(orphan, tmp,
3607                                  &root->orphan_data_extents, list) {
3608                 struct inode_record *inode;
3609
3610                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3611                                       1);
3612                 BUG_ON(IS_ERR(inode));
3613                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3614                 list_move(&orphan->list, &inode->orphan_extents);
3615         }
3616
3617         level = btrfs_header_level(root->node);
3618         memset(wc->nodes, 0, sizeof(wc->nodes));
3619         wc->nodes[level] = &root_node;
3620         wc->active_node = level;
3621         wc->root_level = level;
3622
3623         /* We may not have checked the root block, lets do that now */
3624         if (btrfs_is_leaf(root->node))
3625                 status = btrfs_check_leaf(root, NULL, root->node);
3626         else
3627                 status = btrfs_check_node(root, NULL, root->node);
3628         if (status != BTRFS_TREE_BLOCK_CLEAN)
3629                 return -EIO;
3630
3631         if (btrfs_root_refs(root_item) > 0 ||
3632             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3633                 path.nodes[level] = root->node;
3634                 extent_buffer_get(root->node);
3635                 path.slots[level] = 0;
3636         } else {
3637                 struct btrfs_key key;
3638                 struct btrfs_disk_key found_key;
3639
3640                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3641                 level = root_item->drop_level;
3642                 path.lowest_level = level;
3643                 if (level > btrfs_header_level(root->node) ||
3644                     level >= BTRFS_MAX_LEVEL) {
3645                         error("ignoring invalid drop level: %u", level);
3646                         goto skip_walking;
3647                 }
3648                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3649                 if (wret < 0)
3650                         goto skip_walking;
3651                 btrfs_node_key(path.nodes[level], &found_key,
3652                                 path.slots[level]);
3653                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3654                                         sizeof(found_key)));
3655         }
3656
3657         while (1) {
3658                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3659                 if (wret < 0)
3660                         ret = wret;
3661                 if (wret != 0)
3662                         break;
3663
3664                 wret = walk_up_tree(root, &path, wc, &level);
3665                 if (wret < 0)
3666                         ret = wret;
3667                 if (wret != 0)
3668                         break;
3669         }
3670 skip_walking:
3671         btrfs_release_path(&path);
3672
3673         if (!cache_tree_empty(&corrupt_blocks)) {
3674                 struct cache_extent *cache;
3675                 struct btrfs_corrupt_block *corrupt;
3676
3677                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3678                        root->root_key.objectid);
3679                 cache = first_cache_extent(&corrupt_blocks);
3680                 while (cache) {
3681                         corrupt = container_of(cache,
3682                                                struct btrfs_corrupt_block,
3683                                                cache);
3684                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3685                                cache->start, corrupt->level,
3686                                corrupt->key.objectid, corrupt->key.type,
3687                                corrupt->key.offset);
3688                         cache = next_cache_extent(cache);
3689                 }
3690                 if (repair) {
3691                         printf("Try to repair the btree for root %llu\n",
3692                                root->root_key.objectid);
3693                         ret = repair_btree(root, &corrupt_blocks);
3694                         if (ret < 0)
3695                                 fprintf(stderr, "Failed to repair btree: %s\n",
3696                                         strerror(-ret));
3697                         if (!ret)
3698                                 printf("Btree for root %llu is fixed\n",
3699                                        root->root_key.objectid);
3700                 }
3701         }
3702
3703         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3704         if (err < 0)
3705                 ret = err;
3706
3707         if (root_node.current) {
3708                 root_node.current->checked = 1;
3709                 maybe_free_inode_rec(&root_node.inode_cache,
3710                                 root_node.current);
3711         }
3712
3713         err = check_inode_recs(root, &root_node.inode_cache);
3714         if (!ret)
3715                 ret = err;
3716
3717         free_corrupt_blocks_tree(&corrupt_blocks);
3718         root->fs_info->corrupt_blocks = NULL;
3719         free_orphan_data_extents(&root->orphan_data_extents);
3720         return ret;
3721 }
3722
3723 static int fs_root_objectid(u64 objectid)
3724 {
3725         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3726             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3727                 return 1;
3728         return is_fstree(objectid);
3729 }
3730
3731 static int check_fs_roots(struct btrfs_root *root,
3732                           struct cache_tree *root_cache)
3733 {
3734         struct btrfs_path path;
3735         struct btrfs_key key;
3736         struct walk_control wc;
3737         struct extent_buffer *leaf, *tree_node;
3738         struct btrfs_root *tmp_root;
3739         struct btrfs_root *tree_root = root->fs_info->tree_root;
3740         int ret;
3741         int err = 0;
3742
3743         if (ctx.progress_enabled) {
3744                 ctx.tp = TASK_FS_ROOTS;
3745                 task_start(ctx.info);
3746         }
3747
3748         /*
3749          * Just in case we made any changes to the extent tree that weren't
3750          * reflected into the free space cache yet.
3751          */
3752         if (repair)
3753                 reset_cached_block_groups(root->fs_info);
3754         memset(&wc, 0, sizeof(wc));
3755         cache_tree_init(&wc.shared);
3756         btrfs_init_path(&path);
3757
3758 again:
3759         key.offset = 0;
3760         key.objectid = 0;
3761         key.type = BTRFS_ROOT_ITEM_KEY;
3762         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3763         if (ret < 0) {
3764                 err = 1;
3765                 goto out;
3766         }
3767         tree_node = tree_root->node;
3768         while (1) {
3769                 if (tree_node != tree_root->node) {
3770                         free_root_recs_tree(root_cache);
3771                         btrfs_release_path(&path);
3772                         goto again;
3773                 }
3774                 leaf = path.nodes[0];
3775                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3776                         ret = btrfs_next_leaf(tree_root, &path);
3777                         if (ret) {
3778                                 if (ret < 0)
3779                                         err = 1;
3780                                 break;
3781                         }
3782                         leaf = path.nodes[0];
3783                 }
3784                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3785                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3786                     fs_root_objectid(key.objectid)) {
3787                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3788                                 tmp_root = btrfs_read_fs_root_no_cache(
3789                                                 root->fs_info, &key);
3790                         } else {
3791                                 key.offset = (u64)-1;
3792                                 tmp_root = btrfs_read_fs_root(
3793                                                 root->fs_info, &key);
3794                         }
3795                         if (IS_ERR(tmp_root)) {
3796                                 err = 1;
3797                                 goto next;
3798                         }
3799                         ret = check_fs_root(tmp_root, root_cache, &wc);
3800                         if (ret == -EAGAIN) {
3801                                 free_root_recs_tree(root_cache);
3802                                 btrfs_release_path(&path);
3803                                 goto again;
3804                         }
3805                         if (ret)
3806                                 err = 1;
3807                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3808                                 btrfs_free_fs_root(tmp_root);
3809                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3810                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3811                         process_root_ref(leaf, path.slots[0], &key,
3812                                          root_cache);
3813                 }
3814 next:
3815                 path.slots[0]++;
3816         }
3817 out:
3818         btrfs_release_path(&path);
3819         if (err)
3820                 free_extent_cache_tree(&wc.shared);
3821         if (!cache_tree_empty(&wc.shared))
3822                 fprintf(stderr, "warning line %d\n", __LINE__);
3823
3824         task_stop(ctx.info);
3825
3826         return err;
3827 }
3828
3829 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3830 {
3831         struct list_head *cur = rec->backrefs.next;
3832         struct extent_backref *back;
3833         struct tree_backref *tback;
3834         struct data_backref *dback;
3835         u64 found = 0;
3836         int err = 0;
3837
3838         while(cur != &rec->backrefs) {
3839                 back = to_extent_backref(cur);
3840                 cur = cur->next;
3841                 if (!back->found_extent_tree) {
3842                         err = 1;
3843                         if (!print_errs)
3844                                 goto out;
3845                         if (back->is_data) {
3846                                 dback = to_data_backref(back);
3847                                 fprintf(stderr, "Backref %llu %s %llu"
3848                                         " owner %llu offset %llu num_refs %lu"
3849                                         " not found in extent tree\n",
3850                                         (unsigned long long)rec->start,
3851                                         back->full_backref ?
3852                                         "parent" : "root",
3853                                         back->full_backref ?
3854                                         (unsigned long long)dback->parent:
3855                                         (unsigned long long)dback->root,
3856                                         (unsigned long long)dback->owner,
3857                                         (unsigned long long)dback->offset,
3858                                         (unsigned long)dback->num_refs);
3859                         } else {
3860                                 tback = to_tree_backref(back);
3861                                 fprintf(stderr, "Backref %llu parent %llu"
3862                                         " root %llu not found in extent tree\n",
3863                                         (unsigned long long)rec->start,
3864                                         (unsigned long long)tback->parent,
3865                                         (unsigned long long)tback->root);
3866                         }
3867                 }
3868                 if (!back->is_data && !back->found_ref) {
3869                         err = 1;
3870                         if (!print_errs)
3871                                 goto out;
3872                         tback = to_tree_backref(back);
3873                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3874                                 (unsigned long long)rec->start,
3875                                 back->full_backref ? "parent" : "root",
3876                                 back->full_backref ?
3877                                 (unsigned long long)tback->parent :
3878                                 (unsigned long long)tback->root, back);
3879                 }
3880                 if (back->is_data) {
3881                         dback = to_data_backref(back);
3882                         if (dback->found_ref != dback->num_refs) {
3883                                 err = 1;
3884                                 if (!print_errs)
3885                                         goto out;
3886                                 fprintf(stderr, "Incorrect local backref count"
3887                                         " on %llu %s %llu owner %llu"
3888                                         " offset %llu found %u wanted %u back %p\n",
3889                                         (unsigned long long)rec->start,
3890                                         back->full_backref ?
3891                                         "parent" : "root",
3892                                         back->full_backref ?
3893                                         (unsigned long long)dback->parent:
3894                                         (unsigned long long)dback->root,
3895                                         (unsigned long long)dback->owner,
3896                                         (unsigned long long)dback->offset,
3897                                         dback->found_ref, dback->num_refs, back);
3898                         }
3899                         if (dback->disk_bytenr != rec->start) {
3900                                 err = 1;
3901                                 if (!print_errs)
3902                                         goto out;
3903                                 fprintf(stderr, "Backref disk bytenr does not"
3904                                         " match extent record, bytenr=%llu, "
3905                                         "ref bytenr=%llu\n",
3906                                         (unsigned long long)rec->start,
3907                                         (unsigned long long)dback->disk_bytenr);
3908                         }
3909
3910                         if (dback->bytes != rec->nr) {
3911                                 err = 1;
3912                                 if (!print_errs)
3913                                         goto out;
3914                                 fprintf(stderr, "Backref bytes do not match "
3915                                         "extent backref, bytenr=%llu, ref "
3916                                         "bytes=%llu, backref bytes=%llu\n",
3917                                         (unsigned long long)rec->start,
3918                                         (unsigned long long)rec->nr,
3919                                         (unsigned long long)dback->bytes);
3920                         }
3921                 }
3922                 if (!back->is_data) {
3923                         found += 1;
3924                 } else {
3925                         dback = to_data_backref(back);
3926                         found += dback->found_ref;
3927                 }
3928         }
3929         if (found != rec->refs) {
3930                 err = 1;
3931                 if (!print_errs)
3932                         goto out;
3933                 fprintf(stderr, "Incorrect global backref count "
3934                         "on %llu found %llu wanted %llu\n",
3935                         (unsigned long long)rec->start,
3936                         (unsigned long long)found,
3937                         (unsigned long long)rec->refs);
3938         }
3939 out:
3940         return err;
3941 }
3942
3943 static int free_all_extent_backrefs(struct extent_record *rec)
3944 {
3945         struct extent_backref *back;
3946         struct list_head *cur;
3947         while (!list_empty(&rec->backrefs)) {
3948                 cur = rec->backrefs.next;
3949                 back = to_extent_backref(cur);
3950                 list_del(cur);
3951                 free(back);
3952         }
3953         return 0;
3954 }
3955
3956 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3957                                      struct cache_tree *extent_cache)
3958 {
3959         struct cache_extent *cache;
3960         struct extent_record *rec;
3961
3962         while (1) {
3963                 cache = first_cache_extent(extent_cache);
3964                 if (!cache)
3965                         break;
3966                 rec = container_of(cache, struct extent_record, cache);
3967                 remove_cache_extent(extent_cache, cache);
3968                 free_all_extent_backrefs(rec);
3969                 free(rec);
3970         }
3971 }
3972
3973 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3974                                  struct extent_record *rec)
3975 {
3976         if (rec->content_checked && rec->owner_ref_checked &&
3977             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3978             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3979             !rec->bad_full_backref && !rec->crossing_stripes &&
3980             !rec->wrong_chunk_type) {
3981                 remove_cache_extent(extent_cache, &rec->cache);
3982                 free_all_extent_backrefs(rec);
3983                 list_del_init(&rec->list);
3984                 free(rec);
3985         }
3986         return 0;
3987 }
3988
3989 static int check_owner_ref(struct btrfs_root *root,
3990                             struct extent_record *rec,
3991                             struct extent_buffer *buf)
3992 {
3993         struct extent_backref *node;
3994         struct tree_backref *back;
3995         struct btrfs_root *ref_root;
3996         struct btrfs_key key;
3997         struct btrfs_path path;
3998         struct extent_buffer *parent;
3999         int level;
4000         int found = 0;
4001         int ret;
4002
4003         list_for_each_entry(node, &rec->backrefs, list) {
4004                 if (node->is_data)
4005                         continue;
4006                 if (!node->found_ref)
4007                         continue;
4008                 if (node->full_backref)
4009                         continue;
4010                 back = to_tree_backref(node);
4011                 if (btrfs_header_owner(buf) == back->root)
4012                         return 0;
4013         }
4014         BUG_ON(rec->is_root);
4015
4016         /* try to find the block by search corresponding fs tree */
4017         key.objectid = btrfs_header_owner(buf);
4018         key.type = BTRFS_ROOT_ITEM_KEY;
4019         key.offset = (u64)-1;
4020
4021         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4022         if (IS_ERR(ref_root))
4023                 return 1;
4024
4025         level = btrfs_header_level(buf);
4026         if (level == 0)
4027                 btrfs_item_key_to_cpu(buf, &key, 0);
4028         else
4029                 btrfs_node_key_to_cpu(buf, &key, 0);
4030
4031         btrfs_init_path(&path);
4032         path.lowest_level = level + 1;
4033         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4034         if (ret < 0)
4035                 return 0;
4036
4037         parent = path.nodes[level + 1];
4038         if (parent && buf->start == btrfs_node_blockptr(parent,
4039                                                         path.slots[level + 1]))
4040                 found = 1;
4041
4042         btrfs_release_path(&path);
4043         return found ? 0 : 1;
4044 }
4045
4046 static int is_extent_tree_record(struct extent_record *rec)
4047 {
4048         struct list_head *cur = rec->backrefs.next;
4049         struct extent_backref *node;
4050         struct tree_backref *back;
4051         int is_extent = 0;
4052
4053         while(cur != &rec->backrefs) {
4054                 node = to_extent_backref(cur);
4055                 cur = cur->next;
4056                 if (node->is_data)
4057                         return 0;
4058                 back = to_tree_backref(node);
4059                 if (node->full_backref)
4060                         return 0;
4061                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4062                         is_extent = 1;
4063         }
4064         return is_extent;
4065 }
4066
4067
4068 static int record_bad_block_io(struct btrfs_fs_info *info,
4069                                struct cache_tree *extent_cache,
4070                                u64 start, u64 len)
4071 {
4072         struct extent_record *rec;
4073         struct cache_extent *cache;
4074         struct btrfs_key key;
4075
4076         cache = lookup_cache_extent(extent_cache, start, len);
4077         if (!cache)
4078                 return 0;
4079
4080         rec = container_of(cache, struct extent_record, cache);
4081         if (!is_extent_tree_record(rec))
4082                 return 0;
4083
4084         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4085         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4086 }
4087
4088 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4089                        struct extent_buffer *buf, int slot)
4090 {
4091         if (btrfs_header_level(buf)) {
4092                 struct btrfs_key_ptr ptr1, ptr2;
4093
4094                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4095                                    sizeof(struct btrfs_key_ptr));
4096                 read_extent_buffer(buf, &ptr2,
4097                                    btrfs_node_key_ptr_offset(slot + 1),
4098                                    sizeof(struct btrfs_key_ptr));
4099                 write_extent_buffer(buf, &ptr1,
4100                                     btrfs_node_key_ptr_offset(slot + 1),
4101                                     sizeof(struct btrfs_key_ptr));
4102                 write_extent_buffer(buf, &ptr2,
4103                                     btrfs_node_key_ptr_offset(slot),
4104                                     sizeof(struct btrfs_key_ptr));
4105                 if (slot == 0) {
4106                         struct btrfs_disk_key key;
4107                         btrfs_node_key(buf, &key, 0);
4108                         btrfs_fixup_low_keys(root, path, &key,
4109                                              btrfs_header_level(buf) + 1);
4110                 }
4111         } else {
4112                 struct btrfs_item *item1, *item2;
4113                 struct btrfs_key k1, k2;
4114                 char *item1_data, *item2_data;
4115                 u32 item1_offset, item2_offset, item1_size, item2_size;
4116
4117                 item1 = btrfs_item_nr(slot);
4118                 item2 = btrfs_item_nr(slot + 1);
4119                 btrfs_item_key_to_cpu(buf, &k1, slot);
4120                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4121                 item1_offset = btrfs_item_offset(buf, item1);
4122                 item2_offset = btrfs_item_offset(buf, item2);
4123                 item1_size = btrfs_item_size(buf, item1);
4124                 item2_size = btrfs_item_size(buf, item2);
4125
4126                 item1_data = malloc(item1_size);
4127                 if (!item1_data)
4128                         return -ENOMEM;
4129                 item2_data = malloc(item2_size);
4130                 if (!item2_data) {
4131                         free(item1_data);
4132                         return -ENOMEM;
4133                 }
4134
4135                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4136                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4137
4138                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4139                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4140                 free(item1_data);
4141                 free(item2_data);
4142
4143                 btrfs_set_item_offset(buf, item1, item2_offset);
4144                 btrfs_set_item_offset(buf, item2, item1_offset);
4145                 btrfs_set_item_size(buf, item1, item2_size);
4146                 btrfs_set_item_size(buf, item2, item1_size);
4147
4148                 path->slots[0] = slot;
4149                 btrfs_set_item_key_unsafe(root, path, &k2);
4150                 path->slots[0] = slot + 1;
4151                 btrfs_set_item_key_unsafe(root, path, &k1);
4152         }
4153         return 0;
4154 }
4155
4156 static int fix_key_order(struct btrfs_trans_handle *trans,
4157                          struct btrfs_root *root,
4158                          struct btrfs_path *path)
4159 {
4160         struct extent_buffer *buf;
4161         struct btrfs_key k1, k2;
4162         int i;
4163         int level = path->lowest_level;
4164         int ret = -EIO;
4165
4166         buf = path->nodes[level];
4167         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4168                 if (level) {
4169                         btrfs_node_key_to_cpu(buf, &k1, i);
4170                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4171                 } else {
4172                         btrfs_item_key_to_cpu(buf, &k1, i);
4173                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4174                 }
4175                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4176                         continue;
4177                 ret = swap_values(root, path, buf, i);
4178                 if (ret)
4179                         break;
4180                 btrfs_mark_buffer_dirty(buf);
4181                 i = 0;
4182         }
4183         return ret;
4184 }
4185
4186 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4187                              struct btrfs_root *root,
4188                              struct btrfs_path *path,
4189                              struct extent_buffer *buf, int slot)
4190 {
4191         struct btrfs_key key;
4192         int nritems = btrfs_header_nritems(buf);
4193
4194         btrfs_item_key_to_cpu(buf, &key, slot);
4195
4196         /* These are all the keys we can deal with missing. */
4197         if (key.type != BTRFS_DIR_INDEX_KEY &&
4198             key.type != BTRFS_EXTENT_ITEM_KEY &&
4199             key.type != BTRFS_METADATA_ITEM_KEY &&
4200             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4201             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4202                 return -1;
4203
4204         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4205                (unsigned long long)key.objectid, key.type,
4206                (unsigned long long)key.offset, slot, buf->start);
4207         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4208                               btrfs_item_nr_offset(slot + 1),
4209                               sizeof(struct btrfs_item) *
4210                               (nritems - slot - 1));
4211         btrfs_set_header_nritems(buf, nritems - 1);
4212         if (slot == 0) {
4213                 struct btrfs_disk_key disk_key;
4214
4215                 btrfs_item_key(buf, &disk_key, 0);
4216                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4217         }
4218         btrfs_mark_buffer_dirty(buf);
4219         return 0;
4220 }
4221
4222 static int fix_item_offset(struct btrfs_trans_handle *trans,
4223                            struct btrfs_root *root,
4224                            struct btrfs_path *path)
4225 {
4226         struct extent_buffer *buf;
4227         int i;
4228         int ret = 0;
4229
4230         /* We should only get this for leaves */
4231         BUG_ON(path->lowest_level);
4232         buf = path->nodes[0];
4233 again:
4234         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4235                 unsigned int shift = 0, offset;
4236
4237                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4238                     BTRFS_LEAF_DATA_SIZE(root)) {
4239                         if (btrfs_item_end_nr(buf, i) >
4240                             BTRFS_LEAF_DATA_SIZE(root)) {
4241                                 ret = delete_bogus_item(trans, root, path,
4242                                                         buf, i);
4243                                 if (!ret)
4244                                         goto again;
4245                                 fprintf(stderr, "item is off the end of the "
4246                                         "leaf, can't fix\n");
4247                                 ret = -EIO;
4248                                 break;
4249                         }
4250                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4251                                 btrfs_item_end_nr(buf, i);
4252                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4253                            btrfs_item_offset_nr(buf, i - 1)) {
4254                         if (btrfs_item_end_nr(buf, i) >
4255                             btrfs_item_offset_nr(buf, i - 1)) {
4256                                 ret = delete_bogus_item(trans, root, path,
4257                                                         buf, i);
4258                                 if (!ret)
4259                                         goto again;
4260                                 fprintf(stderr, "items overlap, can't fix\n");
4261                                 ret = -EIO;
4262                                 break;
4263                         }
4264                         shift = btrfs_item_offset_nr(buf, i - 1) -
4265                                 btrfs_item_end_nr(buf, i);
4266                 }
4267                 if (!shift)
4268                         continue;
4269
4270                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4271                        i, shift, (unsigned long long)buf->start);
4272                 offset = btrfs_item_offset_nr(buf, i);
4273                 memmove_extent_buffer(buf,
4274                                       btrfs_leaf_data(buf) + offset + shift,
4275                                       btrfs_leaf_data(buf) + offset,
4276                                       btrfs_item_size_nr(buf, i));
4277                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4278                                       offset + shift);
4279                 btrfs_mark_buffer_dirty(buf);
4280         }
4281
4282         /*
4283          * We may have moved things, in which case we want to exit so we don't
4284          * write those changes out.  Once we have proper abort functionality in
4285          * progs this can be changed to something nicer.
4286          */
4287         BUG_ON(ret);
4288         return ret;
4289 }
4290
4291 /*
4292  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4293  * then just return -EIO.
4294  */
4295 static int try_to_fix_bad_block(struct btrfs_root *root,
4296                                 struct extent_buffer *buf,
4297                                 enum btrfs_tree_block_status status)
4298 {
4299         struct btrfs_trans_handle *trans;
4300         struct ulist *roots;
4301         struct ulist_node *node;
4302         struct btrfs_root *search_root;
4303         struct btrfs_path *path;
4304         struct ulist_iterator iter;
4305         struct btrfs_key root_key, key;
4306         int ret;
4307
4308         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4309             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4310                 return -EIO;
4311
4312         path = btrfs_alloc_path();
4313         if (!path)
4314                 return -EIO;
4315
4316         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4317                                    0, &roots);
4318         if (ret) {
4319                 btrfs_free_path(path);
4320                 return -EIO;
4321         }
4322
4323         ULIST_ITER_INIT(&iter);
4324         while ((node = ulist_next(roots, &iter))) {
4325                 root_key.objectid = node->val;
4326                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4327                 root_key.offset = (u64)-1;
4328
4329                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4330                 if (IS_ERR(root)) {
4331                         ret = -EIO;
4332                         break;
4333                 }
4334
4335
4336                 trans = btrfs_start_transaction(search_root, 0);
4337                 if (IS_ERR(trans)) {
4338                         ret = PTR_ERR(trans);
4339                         break;
4340                 }
4341
4342                 path->lowest_level = btrfs_header_level(buf);
4343                 path->skip_check_block = 1;
4344                 if (path->lowest_level)
4345                         btrfs_node_key_to_cpu(buf, &key, 0);
4346                 else
4347                         btrfs_item_key_to_cpu(buf, &key, 0);
4348                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4349                 if (ret) {
4350                         ret = -EIO;
4351                         btrfs_commit_transaction(trans, search_root);
4352                         break;
4353                 }
4354                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4355                         ret = fix_key_order(trans, search_root, path);
4356                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4357                         ret = fix_item_offset(trans, search_root, path);
4358                 if (ret) {
4359                         btrfs_commit_transaction(trans, search_root);
4360                         break;
4361                 }
4362                 btrfs_release_path(path);
4363                 btrfs_commit_transaction(trans, search_root);
4364         }
4365         ulist_free(roots);
4366         btrfs_free_path(path);
4367         return ret;
4368 }
4369
4370 static int check_block(struct btrfs_root *root,
4371                        struct cache_tree *extent_cache,
4372                        struct extent_buffer *buf, u64 flags)
4373 {
4374         struct extent_record *rec;
4375         struct cache_extent *cache;
4376         struct btrfs_key key;
4377         enum btrfs_tree_block_status status;
4378         int ret = 0;
4379         int level;
4380
4381         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4382         if (!cache)
4383                 return 1;
4384         rec = container_of(cache, struct extent_record, cache);
4385         rec->generation = btrfs_header_generation(buf);
4386
4387         level = btrfs_header_level(buf);
4388         if (btrfs_header_nritems(buf) > 0) {
4389
4390                 if (level == 0)
4391                         btrfs_item_key_to_cpu(buf, &key, 0);
4392                 else
4393                         btrfs_node_key_to_cpu(buf, &key, 0);
4394
4395                 rec->info_objectid = key.objectid;
4396         }
4397         rec->info_level = level;
4398
4399         if (btrfs_is_leaf(buf))
4400                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4401         else
4402                 status = btrfs_check_node(root, &rec->parent_key, buf);
4403
4404         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4405                 if (repair)
4406                         status = try_to_fix_bad_block(root, buf, status);
4407                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4408                         ret = -EIO;
4409                         fprintf(stderr, "bad block %llu\n",
4410                                 (unsigned long long)buf->start);
4411                 } else {
4412                         /*
4413                          * Signal to callers we need to start the scan over
4414                          * again since we'll have cowed blocks.
4415                          */
4416                         ret = -EAGAIN;
4417                 }
4418         } else {
4419                 rec->content_checked = 1;
4420                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4421                         rec->owner_ref_checked = 1;
4422                 else {
4423                         ret = check_owner_ref(root, rec, buf);
4424                         if (!ret)
4425                                 rec->owner_ref_checked = 1;
4426                 }
4427         }
4428         if (!ret)
4429                 maybe_free_extent_rec(extent_cache, rec);
4430         return ret;
4431 }
4432
4433 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4434                                                 u64 parent, u64 root)
4435 {
4436         struct list_head *cur = rec->backrefs.next;
4437         struct extent_backref *node;
4438         struct tree_backref *back;
4439
4440         while(cur != &rec->backrefs) {
4441                 node = to_extent_backref(cur);
4442                 cur = cur->next;
4443                 if (node->is_data)
4444                         continue;
4445                 back = to_tree_backref(node);
4446                 if (parent > 0) {
4447                         if (!node->full_backref)
4448                                 continue;
4449                         if (parent == back->parent)
4450                                 return back;
4451                 } else {
4452                         if (node->full_backref)
4453                                 continue;
4454                         if (back->root == root)
4455                                 return back;
4456                 }
4457         }
4458         return NULL;
4459 }
4460
4461 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4462                                                 u64 parent, u64 root)
4463 {
4464         struct tree_backref *ref = malloc(sizeof(*ref));
4465
4466         if (!ref)
4467                 return NULL;
4468         memset(&ref->node, 0, sizeof(ref->node));
4469         if (parent > 0) {
4470                 ref->parent = parent;
4471                 ref->node.full_backref = 1;
4472         } else {
4473                 ref->root = root;
4474                 ref->node.full_backref = 0;
4475         }
4476         list_add_tail(&ref->node.list, &rec->backrefs);
4477
4478         return ref;
4479 }
4480
4481 static struct data_backref *find_data_backref(struct extent_record *rec,
4482                                                 u64 parent, u64 root,
4483                                                 u64 owner, u64 offset,
4484                                                 int found_ref,
4485                                                 u64 disk_bytenr, u64 bytes)
4486 {
4487         struct list_head *cur = rec->backrefs.next;
4488         struct extent_backref *node;
4489         struct data_backref *back;
4490
4491         while(cur != &rec->backrefs) {
4492                 node = to_extent_backref(cur);
4493                 cur = cur->next;
4494                 if (!node->is_data)
4495                         continue;
4496                 back = to_data_backref(node);
4497                 if (parent > 0) {
4498                         if (!node->full_backref)
4499                                 continue;
4500                         if (parent == back->parent)
4501                                 return back;
4502                 } else {
4503                         if (node->full_backref)
4504                                 continue;
4505                         if (back->root == root && back->owner == owner &&
4506                             back->offset == offset) {
4507                                 if (found_ref && node->found_ref &&
4508                                     (back->bytes != bytes ||
4509                                     back->disk_bytenr != disk_bytenr))
4510                                         continue;
4511                                 return back;
4512                         }
4513                 }
4514         }
4515         return NULL;
4516 }
4517
4518 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4519                                                 u64 parent, u64 root,
4520                                                 u64 owner, u64 offset,
4521                                                 u64 max_size)
4522 {
4523         struct data_backref *ref = malloc(sizeof(*ref));
4524
4525         if (!ref)
4526                 return NULL;
4527         memset(&ref->node, 0, sizeof(ref->node));
4528         ref->node.is_data = 1;
4529
4530         if (parent > 0) {
4531                 ref->parent = parent;
4532                 ref->owner = 0;
4533                 ref->offset = 0;
4534                 ref->node.full_backref = 1;
4535         } else {
4536                 ref->root = root;
4537                 ref->owner = owner;
4538                 ref->offset = offset;
4539                 ref->node.full_backref = 0;
4540         }
4541         ref->bytes = max_size;
4542         ref->found_ref = 0;
4543         ref->num_refs = 0;
4544         list_add_tail(&ref->node.list, &rec->backrefs);
4545         if (max_size > rec->max_size)
4546                 rec->max_size = max_size;
4547         return ref;
4548 }
4549
4550 /* Check if the type of extent matches with its chunk */
4551 static void check_extent_type(struct extent_record *rec)
4552 {
4553         struct btrfs_block_group_cache *bg_cache;
4554
4555         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4556         if (!bg_cache)
4557                 return;
4558
4559         /* data extent, check chunk directly*/
4560         if (!rec->metadata) {
4561                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4562                         rec->wrong_chunk_type = 1;
4563                 return;
4564         }
4565
4566         /* metadata extent, check the obvious case first */
4567         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4568                                  BTRFS_BLOCK_GROUP_METADATA))) {
4569                 rec->wrong_chunk_type = 1;
4570                 return;
4571         }
4572
4573         /*
4574          * Check SYSTEM extent, as it's also marked as metadata, we can only
4575          * make sure it's a SYSTEM extent by its backref
4576          */
4577         if (!list_empty(&rec->backrefs)) {
4578                 struct extent_backref *node;
4579                 struct tree_backref *tback;
4580                 u64 bg_type;
4581
4582                 node = to_extent_backref(rec->backrefs.next);
4583                 if (node->is_data) {
4584                         /* tree block shouldn't have data backref */
4585                         rec->wrong_chunk_type = 1;
4586                         return;
4587                 }
4588                 tback = container_of(node, struct tree_backref, node);
4589
4590                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4591                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4592                 else
4593                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4594                 if (!(bg_cache->flags & bg_type))
4595                         rec->wrong_chunk_type = 1;
4596         }
4597 }
4598
4599 /*
4600  * Allocate a new extent record, fill default values from @tmpl and insert int
4601  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4602  * the cache, otherwise it fails.
4603  */
4604 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4605                 struct extent_record *tmpl)
4606 {
4607         struct extent_record *rec;
4608         int ret = 0;
4609
4610         rec = malloc(sizeof(*rec));
4611         if (!rec)
4612                 return -ENOMEM;
4613         rec->start = tmpl->start;
4614         rec->max_size = tmpl->max_size;
4615         rec->nr = max(tmpl->nr, tmpl->max_size);
4616         rec->found_rec = tmpl->found_rec;
4617         rec->content_checked = tmpl->content_checked;
4618         rec->owner_ref_checked = tmpl->owner_ref_checked;
4619         rec->num_duplicates = 0;
4620         rec->metadata = tmpl->metadata;
4621         rec->flag_block_full_backref = FLAG_UNSET;
4622         rec->bad_full_backref = 0;
4623         rec->crossing_stripes = 0;
4624         rec->wrong_chunk_type = 0;
4625         rec->is_root = tmpl->is_root;
4626         rec->refs = tmpl->refs;
4627         rec->extent_item_refs = tmpl->extent_item_refs;
4628         rec->parent_generation = tmpl->parent_generation;
4629         INIT_LIST_HEAD(&rec->backrefs);
4630         INIT_LIST_HEAD(&rec->dups);
4631         INIT_LIST_HEAD(&rec->list);
4632         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4633         rec->cache.start = tmpl->start;
4634         rec->cache.size = tmpl->nr;
4635         ret = insert_cache_extent(extent_cache, &rec->cache);
4636         if (ret) {
4637                 free(rec);
4638                 return ret;
4639         }
4640         bytes_used += rec->nr;
4641
4642         if (tmpl->metadata)
4643                 rec->crossing_stripes = check_crossing_stripes(global_info,
4644                                 rec->start, global_info->tree_root->nodesize);
4645         check_extent_type(rec);
4646         return ret;
4647 }
4648
4649 /*
4650  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4651  * some are hints:
4652  * - refs              - if found, increase refs
4653  * - is_root           - if found, set
4654  * - content_checked   - if found, set
4655  * - owner_ref_checked - if found, set
4656  *
4657  * If not found, create a new one, initialize and insert.
4658  */
4659 static int add_extent_rec(struct cache_tree *extent_cache,
4660                 struct extent_record *tmpl)
4661 {
4662         struct extent_record *rec;
4663         struct cache_extent *cache;
4664         int ret = 0;
4665         int dup = 0;
4666
4667         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4668         if (cache) {
4669                 rec = container_of(cache, struct extent_record, cache);
4670                 if (tmpl->refs)
4671                         rec->refs++;
4672                 if (rec->nr == 1)
4673                         rec->nr = max(tmpl->nr, tmpl->max_size);
4674
4675                 /*
4676                  * We need to make sure to reset nr to whatever the extent
4677                  * record says was the real size, this way we can compare it to
4678                  * the backrefs.
4679                  */
4680                 if (tmpl->found_rec) {
4681                         if (tmpl->start != rec->start || rec->found_rec) {
4682                                 struct extent_record *tmp;
4683
4684                                 dup = 1;
4685                                 if (list_empty(&rec->list))
4686                                         list_add_tail(&rec->list,
4687                                                       &duplicate_extents);
4688
4689                                 /*
4690                                  * We have to do this song and dance in case we
4691                                  * find an extent record that falls inside of
4692                                  * our current extent record but does not have
4693                                  * the same objectid.
4694                                  */
4695                                 tmp = malloc(sizeof(*tmp));
4696                                 if (!tmp)
4697                                         return -ENOMEM;
4698                                 tmp->start = tmpl->start;
4699                                 tmp->max_size = tmpl->max_size;
4700                                 tmp->nr = tmpl->nr;
4701                                 tmp->found_rec = 1;
4702                                 tmp->metadata = tmpl->metadata;
4703                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4704                                 INIT_LIST_HEAD(&tmp->list);
4705                                 list_add_tail(&tmp->list, &rec->dups);
4706                                 rec->num_duplicates++;
4707                         } else {
4708                                 rec->nr = tmpl->nr;
4709                                 rec->found_rec = 1;
4710                         }
4711                 }
4712
4713                 if (tmpl->extent_item_refs && !dup) {
4714                         if (rec->extent_item_refs) {
4715                                 fprintf(stderr, "block %llu rec "
4716                                         "extent_item_refs %llu, passed %llu\n",
4717                                         (unsigned long long)tmpl->start,
4718                                         (unsigned long long)
4719                                                         rec->extent_item_refs,
4720                                         (unsigned long long)tmpl->extent_item_refs);
4721                         }
4722                         rec->extent_item_refs = tmpl->extent_item_refs;
4723                 }
4724                 if (tmpl->is_root)
4725                         rec->is_root = 1;
4726                 if (tmpl->content_checked)
4727                         rec->content_checked = 1;
4728                 if (tmpl->owner_ref_checked)
4729                         rec->owner_ref_checked = 1;
4730                 memcpy(&rec->parent_key, &tmpl->parent_key,
4731                                 sizeof(tmpl->parent_key));
4732                 if (tmpl->parent_generation)
4733                         rec->parent_generation = tmpl->parent_generation;
4734                 if (rec->max_size < tmpl->max_size)
4735                         rec->max_size = tmpl->max_size;
4736
4737                 /*
4738                  * A metadata extent can't cross stripe_len boundary, otherwise
4739                  * kernel scrub won't be able to handle it.
4740                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4741                  * it.
4742                  */
4743                 if (tmpl->metadata)
4744                         rec->crossing_stripes = check_crossing_stripes(
4745                                         global_info, rec->start,
4746                                         global_info->tree_root->nodesize);
4747                 check_extent_type(rec);
4748                 maybe_free_extent_rec(extent_cache, rec);
4749                 return ret;
4750         }
4751
4752         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4753
4754         return ret;
4755 }
4756
4757 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4758                             u64 parent, u64 root, int found_ref)
4759 {
4760         struct extent_record *rec;
4761         struct tree_backref *back;
4762         struct cache_extent *cache;
4763         int ret;
4764
4765         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4766         if (!cache) {
4767                 struct extent_record tmpl;
4768
4769                 memset(&tmpl, 0, sizeof(tmpl));
4770                 tmpl.start = bytenr;
4771                 tmpl.nr = 1;
4772                 tmpl.metadata = 1;
4773
4774                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4775                 if (ret)
4776                         return ret;
4777
4778                 /* really a bug in cache_extent implement now */
4779                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4780                 if (!cache)
4781                         return -ENOENT;
4782         }
4783
4784         rec = container_of(cache, struct extent_record, cache);
4785         if (rec->start != bytenr) {
4786                 /*
4787                  * Several cause, from unaligned bytenr to over lapping extents
4788                  */
4789                 return -EEXIST;
4790         }
4791
4792         back = find_tree_backref(rec, parent, root);
4793         if (!back) {
4794                 back = alloc_tree_backref(rec, parent, root);
4795                 if (!back)
4796                         return -ENOMEM;
4797         }
4798
4799         if (found_ref) {
4800                 if (back->node.found_ref) {
4801                         fprintf(stderr, "Extent back ref already exists "
4802                                 "for %llu parent %llu root %llu \n",
4803                                 (unsigned long long)bytenr,
4804                                 (unsigned long long)parent,
4805                                 (unsigned long long)root);
4806                 }
4807                 back->node.found_ref = 1;
4808         } else {
4809                 if (back->node.found_extent_tree) {
4810                         fprintf(stderr, "Extent back ref already exists "
4811                                 "for %llu parent %llu root %llu \n",
4812                                 (unsigned long long)bytenr,
4813                                 (unsigned long long)parent,
4814                                 (unsigned long long)root);
4815                 }
4816                 back->node.found_extent_tree = 1;
4817         }
4818         check_extent_type(rec);
4819         maybe_free_extent_rec(extent_cache, rec);
4820         return 0;
4821 }
4822
4823 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4824                             u64 parent, u64 root, u64 owner, u64 offset,
4825                             u32 num_refs, int found_ref, u64 max_size)
4826 {
4827         struct extent_record *rec;
4828         struct data_backref *back;
4829         struct cache_extent *cache;
4830         int ret;
4831
4832         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4833         if (!cache) {
4834                 struct extent_record tmpl;
4835
4836                 memset(&tmpl, 0, sizeof(tmpl));
4837                 tmpl.start = bytenr;
4838                 tmpl.nr = 1;
4839                 tmpl.max_size = max_size;
4840
4841                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4842                 if (ret)
4843                         return ret;
4844
4845                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4846                 if (!cache)
4847                         abort();
4848         }
4849
4850         rec = container_of(cache, struct extent_record, cache);
4851         if (rec->max_size < max_size)
4852                 rec->max_size = max_size;
4853
4854         /*
4855          * If found_ref is set then max_size is the real size and must match the
4856          * existing refs.  So if we have already found a ref then we need to
4857          * make sure that this ref matches the existing one, otherwise we need
4858          * to add a new backref so we can notice that the backrefs don't match
4859          * and we need to figure out who is telling the truth.  This is to
4860          * account for that awful fsync bug I introduced where we'd end up with
4861          * a btrfs_file_extent_item that would have its length include multiple
4862          * prealloc extents or point inside of a prealloc extent.
4863          */
4864         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4865                                  bytenr, max_size);
4866         if (!back) {
4867                 back = alloc_data_backref(rec, parent, root, owner, offset,
4868                                           max_size);
4869                 BUG_ON(!back);
4870         }
4871
4872         if (found_ref) {
4873                 BUG_ON(num_refs != 1);
4874                 if (back->node.found_ref)
4875                         BUG_ON(back->bytes != max_size);
4876                 back->node.found_ref = 1;
4877                 back->found_ref += 1;
4878                 back->bytes = max_size;
4879                 back->disk_bytenr = bytenr;
4880                 rec->refs += 1;
4881                 rec->content_checked = 1;
4882                 rec->owner_ref_checked = 1;
4883         } else {
4884                 if (back->node.found_extent_tree) {
4885                         fprintf(stderr, "Extent back ref already exists "
4886                                 "for %llu parent %llu root %llu "
4887                                 "owner %llu offset %llu num_refs %lu\n",
4888                                 (unsigned long long)bytenr,
4889                                 (unsigned long long)parent,
4890                                 (unsigned long long)root,
4891                                 (unsigned long long)owner,
4892                                 (unsigned long long)offset,
4893                                 (unsigned long)num_refs);
4894                 }
4895                 back->num_refs = num_refs;
4896                 back->node.found_extent_tree = 1;
4897         }
4898         maybe_free_extent_rec(extent_cache, rec);
4899         return 0;
4900 }
4901
4902 static int add_pending(struct cache_tree *pending,
4903                        struct cache_tree *seen, u64 bytenr, u32 size)
4904 {
4905         int ret;
4906         ret = add_cache_extent(seen, bytenr, size);
4907         if (ret)
4908                 return ret;
4909         add_cache_extent(pending, bytenr, size);
4910         return 0;
4911 }
4912
4913 static int pick_next_pending(struct cache_tree *pending,
4914                         struct cache_tree *reada,
4915                         struct cache_tree *nodes,
4916                         u64 last, struct block_info *bits, int bits_nr,
4917                         int *reada_bits)
4918 {
4919         unsigned long node_start = last;
4920         struct cache_extent *cache;
4921         int ret;
4922
4923         cache = search_cache_extent(reada, 0);
4924         if (cache) {
4925                 bits[0].start = cache->start;
4926                 bits[0].size = cache->size;
4927                 *reada_bits = 1;
4928                 return 1;
4929         }
4930         *reada_bits = 0;
4931         if (node_start > 32768)
4932                 node_start -= 32768;
4933
4934         cache = search_cache_extent(nodes, node_start);
4935         if (!cache)
4936                 cache = search_cache_extent(nodes, 0);
4937
4938         if (!cache) {
4939                  cache = search_cache_extent(pending, 0);
4940                  if (!cache)
4941                          return 0;
4942                  ret = 0;
4943                  do {
4944                          bits[ret].start = cache->start;
4945                          bits[ret].size = cache->size;
4946                          cache = next_cache_extent(cache);
4947                          ret++;
4948                  } while (cache && ret < bits_nr);
4949                  return ret;
4950         }
4951
4952         ret = 0;
4953         do {
4954                 bits[ret].start = cache->start;
4955                 bits[ret].size = cache->size;
4956                 cache = next_cache_extent(cache);
4957                 ret++;
4958         } while (cache && ret < bits_nr);
4959
4960         if (bits_nr - ret > 8) {
4961                 u64 lookup = bits[0].start + bits[0].size;
4962                 struct cache_extent *next;
4963                 next = search_cache_extent(pending, lookup);
4964                 while(next) {
4965                         if (next->start - lookup > 32768)
4966                                 break;
4967                         bits[ret].start = next->start;
4968                         bits[ret].size = next->size;
4969                         lookup = next->start + next->size;
4970                         ret++;
4971                         if (ret == bits_nr)
4972                                 break;
4973                         next = next_cache_extent(next);
4974                         if (!next)
4975                                 break;
4976                 }
4977         }
4978         return ret;
4979 }
4980
4981 static void free_chunk_record(struct cache_extent *cache)
4982 {
4983         struct chunk_record *rec;
4984
4985         rec = container_of(cache, struct chunk_record, cache);
4986         list_del_init(&rec->list);
4987         list_del_init(&rec->dextents);
4988         free(rec);
4989 }
4990
4991 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4992 {
4993         cache_tree_free_extents(chunk_cache, free_chunk_record);
4994 }
4995
4996 static void free_device_record(struct rb_node *node)
4997 {
4998         struct device_record *rec;
4999
5000         rec = container_of(node, struct device_record, node);
5001         free(rec);
5002 }
5003
5004 FREE_RB_BASED_TREE(device_cache, free_device_record);
5005
5006 int insert_block_group_record(struct block_group_tree *tree,
5007                               struct block_group_record *bg_rec)
5008 {
5009         int ret;
5010
5011         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5012         if (ret)
5013                 return ret;
5014
5015         list_add_tail(&bg_rec->list, &tree->block_groups);
5016         return 0;
5017 }
5018
5019 static void free_block_group_record(struct cache_extent *cache)
5020 {
5021         struct block_group_record *rec;
5022
5023         rec = container_of(cache, struct block_group_record, cache);
5024         list_del_init(&rec->list);
5025         free(rec);
5026 }
5027
5028 void free_block_group_tree(struct block_group_tree *tree)
5029 {
5030         cache_tree_free_extents(&tree->tree, free_block_group_record);
5031 }
5032
5033 int insert_device_extent_record(struct device_extent_tree *tree,
5034                                 struct device_extent_record *de_rec)
5035 {
5036         int ret;
5037
5038         /*
5039          * Device extent is a bit different from the other extents, because
5040          * the extents which belong to the different devices may have the
5041          * same start and size, so we need use the special extent cache
5042          * search/insert functions.
5043          */
5044         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5045         if (ret)
5046                 return ret;
5047
5048         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5049         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5050         return 0;
5051 }
5052
5053 static void free_device_extent_record(struct cache_extent *cache)
5054 {
5055         struct device_extent_record *rec;
5056
5057         rec = container_of(cache, struct device_extent_record, cache);
5058         if (!list_empty(&rec->chunk_list))
5059                 list_del_init(&rec->chunk_list);
5060         if (!list_empty(&rec->device_list))
5061                 list_del_init(&rec->device_list);
5062         free(rec);
5063 }
5064
5065 void free_device_extent_tree(struct device_extent_tree *tree)
5066 {
5067         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5068 }
5069
5070 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5071 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5072                                  struct extent_buffer *leaf, int slot)
5073 {
5074         struct btrfs_extent_ref_v0 *ref0;
5075         struct btrfs_key key;
5076         int ret;
5077
5078         btrfs_item_key_to_cpu(leaf, &key, slot);
5079         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5080         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5081                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5082                                 0, 0);
5083         } else {
5084                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5085                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5086         }
5087         return ret;
5088 }
5089 #endif
5090
5091 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5092                                             struct btrfs_key *key,
5093                                             int slot)
5094 {
5095         struct btrfs_chunk *ptr;
5096         struct chunk_record *rec;
5097         int num_stripes, i;
5098
5099         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5100         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5101
5102         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5103         if (!rec) {
5104                 fprintf(stderr, "memory allocation failed\n");
5105                 exit(-1);
5106         }
5107
5108         INIT_LIST_HEAD(&rec->list);
5109         INIT_LIST_HEAD(&rec->dextents);
5110         rec->bg_rec = NULL;
5111
5112         rec->cache.start = key->offset;
5113         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5114
5115         rec->generation = btrfs_header_generation(leaf);
5116
5117         rec->objectid = key->objectid;
5118         rec->type = key->type;
5119         rec->offset = key->offset;
5120
5121         rec->length = rec->cache.size;
5122         rec->owner = btrfs_chunk_owner(leaf, ptr);
5123         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5124         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5125         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5126         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5127         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5128         rec->num_stripes = num_stripes;
5129         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5130
5131         for (i = 0; i < rec->num_stripes; ++i) {
5132                 rec->stripes[i].devid =
5133                         btrfs_stripe_devid_nr(leaf, ptr, i);
5134                 rec->stripes[i].offset =
5135                         btrfs_stripe_offset_nr(leaf, ptr, i);
5136                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5137                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5138                                 BTRFS_UUID_SIZE);
5139         }
5140
5141         return rec;
5142 }
5143
5144 static int process_chunk_item(struct cache_tree *chunk_cache,
5145                               struct btrfs_key *key, struct extent_buffer *eb,
5146                               int slot)
5147 {
5148         struct chunk_record *rec;
5149         struct btrfs_chunk *chunk;
5150         int ret = 0;
5151
5152         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5153         /*
5154          * Do extra check for this chunk item,
5155          *
5156          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5157          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5158          * and owner<->key_type check.
5159          */
5160         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5161                                       key->offset);
5162         if (ret < 0) {
5163                 error("chunk(%llu, %llu) is not valid, ignore it",
5164                       key->offset, btrfs_chunk_length(eb, chunk));
5165                 return 0;
5166         }
5167         rec = btrfs_new_chunk_record(eb, key, slot);
5168         ret = insert_cache_extent(chunk_cache, &rec->cache);
5169         if (ret) {
5170                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5171                         rec->offset, rec->length);
5172                 free(rec);
5173         }
5174
5175         return ret;
5176 }
5177
5178 static int process_device_item(struct rb_root *dev_cache,
5179                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5180 {
5181         struct btrfs_dev_item *ptr;
5182         struct device_record *rec;
5183         int ret = 0;
5184
5185         ptr = btrfs_item_ptr(eb,
5186                 slot, struct btrfs_dev_item);
5187
5188         rec = malloc(sizeof(*rec));
5189         if (!rec) {
5190                 fprintf(stderr, "memory allocation failed\n");
5191                 return -ENOMEM;
5192         }
5193
5194         rec->devid = key->offset;
5195         rec->generation = btrfs_header_generation(eb);
5196
5197         rec->objectid = key->objectid;
5198         rec->type = key->type;
5199         rec->offset = key->offset;
5200
5201         rec->devid = btrfs_device_id(eb, ptr);
5202         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5203         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5204
5205         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5206         if (ret) {
5207                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5208                 free(rec);
5209         }
5210
5211         return ret;
5212 }
5213
5214 struct block_group_record *
5215 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5216                              int slot)
5217 {
5218         struct btrfs_block_group_item *ptr;
5219         struct block_group_record *rec;
5220
5221         rec = calloc(1, sizeof(*rec));
5222         if (!rec) {
5223                 fprintf(stderr, "memory allocation failed\n");
5224                 exit(-1);
5225         }
5226
5227         rec->cache.start = key->objectid;
5228         rec->cache.size = key->offset;
5229
5230         rec->generation = btrfs_header_generation(leaf);
5231
5232         rec->objectid = key->objectid;
5233         rec->type = key->type;
5234         rec->offset = key->offset;
5235
5236         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5237         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5238
5239         INIT_LIST_HEAD(&rec->list);
5240
5241         return rec;
5242 }
5243
5244 static int process_block_group_item(struct block_group_tree *block_group_cache,
5245                                     struct btrfs_key *key,
5246                                     struct extent_buffer *eb, int slot)
5247 {
5248         struct block_group_record *rec;
5249         int ret = 0;
5250
5251         rec = btrfs_new_block_group_record(eb, key, slot);
5252         ret = insert_block_group_record(block_group_cache, rec);
5253         if (ret) {
5254                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5255                         rec->objectid, rec->offset);
5256                 free(rec);
5257         }
5258
5259         return ret;
5260 }
5261
5262 struct device_extent_record *
5263 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5264                                struct btrfs_key *key, int slot)
5265 {
5266         struct device_extent_record *rec;
5267         struct btrfs_dev_extent *ptr;
5268
5269         rec = calloc(1, sizeof(*rec));
5270         if (!rec) {
5271                 fprintf(stderr, "memory allocation failed\n");
5272                 exit(-1);
5273         }
5274
5275         rec->cache.objectid = key->objectid;
5276         rec->cache.start = key->offset;
5277
5278         rec->generation = btrfs_header_generation(leaf);
5279
5280         rec->objectid = key->objectid;
5281         rec->type = key->type;
5282         rec->offset = key->offset;
5283
5284         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5285         rec->chunk_objecteid =
5286                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5287         rec->chunk_offset =
5288                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5289         rec->length = btrfs_dev_extent_length(leaf, ptr);
5290         rec->cache.size = rec->length;
5291
5292         INIT_LIST_HEAD(&rec->chunk_list);
5293         INIT_LIST_HEAD(&rec->device_list);
5294
5295         return rec;
5296 }
5297
5298 static int
5299 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5300                            struct btrfs_key *key, struct extent_buffer *eb,
5301                            int slot)
5302 {
5303         struct device_extent_record *rec;
5304         int ret;
5305
5306         rec = btrfs_new_device_extent_record(eb, key, slot);
5307         ret = insert_device_extent_record(dev_extent_cache, rec);
5308         if (ret) {
5309                 fprintf(stderr,
5310                         "Device extent[%llu, %llu, %llu] existed.\n",
5311                         rec->objectid, rec->offset, rec->length);
5312                 free(rec);
5313         }
5314
5315         return ret;
5316 }
5317
5318 static int process_extent_item(struct btrfs_root *root,
5319                                struct cache_tree *extent_cache,
5320                                struct extent_buffer *eb, int slot)
5321 {
5322         struct btrfs_extent_item *ei;
5323         struct btrfs_extent_inline_ref *iref;
5324         struct btrfs_extent_data_ref *dref;
5325         struct btrfs_shared_data_ref *sref;
5326         struct btrfs_key key;
5327         struct extent_record tmpl;
5328         unsigned long end;
5329         unsigned long ptr;
5330         int ret;
5331         int type;
5332         u32 item_size = btrfs_item_size_nr(eb, slot);
5333         u64 refs = 0;
5334         u64 offset;
5335         u64 num_bytes;
5336         int metadata = 0;
5337
5338         btrfs_item_key_to_cpu(eb, &key, slot);
5339
5340         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5341                 metadata = 1;
5342                 num_bytes = root->nodesize;
5343         } else {
5344                 num_bytes = key.offset;
5345         }
5346
5347         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5348                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5349                       key.objectid, root->sectorsize);
5350                 return -EIO;
5351         }
5352         if (item_size < sizeof(*ei)) {
5353 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5354                 struct btrfs_extent_item_v0 *ei0;
5355                 BUG_ON(item_size != sizeof(*ei0));
5356                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5357                 refs = btrfs_extent_refs_v0(eb, ei0);
5358 #else
5359                 BUG();
5360 #endif
5361                 memset(&tmpl, 0, sizeof(tmpl));
5362                 tmpl.start = key.objectid;
5363                 tmpl.nr = num_bytes;
5364                 tmpl.extent_item_refs = refs;
5365                 tmpl.metadata = metadata;
5366                 tmpl.found_rec = 1;
5367                 tmpl.max_size = num_bytes;
5368
5369                 return add_extent_rec(extent_cache, &tmpl);
5370         }
5371
5372         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5373         refs = btrfs_extent_refs(eb, ei);
5374         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5375                 metadata = 1;
5376         else
5377                 metadata = 0;
5378         if (metadata && num_bytes != root->nodesize) {
5379                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5380                       num_bytes, root->nodesize);
5381                 return -EIO;
5382         }
5383         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5384                 error("ignore invalid data extent, length %llu is not aligned to %u",
5385                       num_bytes, root->sectorsize);
5386                 return -EIO;
5387         }
5388
5389         memset(&tmpl, 0, sizeof(tmpl));
5390         tmpl.start = key.objectid;
5391         tmpl.nr = num_bytes;
5392         tmpl.extent_item_refs = refs;
5393         tmpl.metadata = metadata;
5394         tmpl.found_rec = 1;
5395         tmpl.max_size = num_bytes;
5396         add_extent_rec(extent_cache, &tmpl);
5397
5398         ptr = (unsigned long)(ei + 1);
5399         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5400             key.type == BTRFS_EXTENT_ITEM_KEY)
5401                 ptr += sizeof(struct btrfs_tree_block_info);
5402
5403         end = (unsigned long)ei + item_size;
5404         while (ptr < end) {
5405                 iref = (struct btrfs_extent_inline_ref *)ptr;
5406                 type = btrfs_extent_inline_ref_type(eb, iref);
5407                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5408                 switch (type) {
5409                 case BTRFS_TREE_BLOCK_REF_KEY:
5410                         ret = add_tree_backref(extent_cache, key.objectid,
5411                                         0, offset, 0);
5412                         if (ret < 0)
5413                                 error("add_tree_backref failed: %s",
5414                                       strerror(-ret));
5415                         break;
5416                 case BTRFS_SHARED_BLOCK_REF_KEY:
5417                         ret = add_tree_backref(extent_cache, key.objectid,
5418                                         offset, 0, 0);
5419                         if (ret < 0)
5420                                 error("add_tree_backref failed: %s",
5421                                       strerror(-ret));
5422                         break;
5423                 case BTRFS_EXTENT_DATA_REF_KEY:
5424                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5425                         add_data_backref(extent_cache, key.objectid, 0,
5426                                         btrfs_extent_data_ref_root(eb, dref),
5427                                         btrfs_extent_data_ref_objectid(eb,
5428                                                                        dref),
5429                                         btrfs_extent_data_ref_offset(eb, dref),
5430                                         btrfs_extent_data_ref_count(eb, dref),
5431                                         0, num_bytes);
5432                         break;
5433                 case BTRFS_SHARED_DATA_REF_KEY:
5434                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5435                         add_data_backref(extent_cache, key.objectid, offset,
5436                                         0, 0, 0,
5437                                         btrfs_shared_data_ref_count(eb, sref),
5438                                         0, num_bytes);
5439                         break;
5440                 default:
5441                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5442                                 key.objectid, key.type, num_bytes);
5443                         goto out;
5444                 }
5445                 ptr += btrfs_extent_inline_ref_size(type);
5446         }
5447         WARN_ON(ptr > end);
5448 out:
5449         return 0;
5450 }
5451
5452 static int check_cache_range(struct btrfs_root *root,
5453                              struct btrfs_block_group_cache *cache,
5454                              u64 offset, u64 bytes)
5455 {
5456         struct btrfs_free_space *entry;
5457         u64 *logical;
5458         u64 bytenr;
5459         int stripe_len;
5460         int i, nr, ret;
5461
5462         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5463                 bytenr = btrfs_sb_offset(i);
5464                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5465                                        cache->key.objectid, bytenr, 0,
5466                                        &logical, &nr, &stripe_len);
5467                 if (ret)
5468                         return ret;
5469
5470                 while (nr--) {
5471                         if (logical[nr] + stripe_len <= offset)
5472                                 continue;
5473                         if (offset + bytes <= logical[nr])
5474                                 continue;
5475                         if (logical[nr] == offset) {
5476                                 if (stripe_len >= bytes) {
5477                                         free(logical);
5478                                         return 0;
5479                                 }
5480                                 bytes -= stripe_len;
5481                                 offset += stripe_len;
5482                         } else if (logical[nr] < offset) {
5483                                 if (logical[nr] + stripe_len >=
5484                                     offset + bytes) {
5485                                         free(logical);
5486                                         return 0;
5487                                 }
5488                                 bytes = (offset + bytes) -
5489                                         (logical[nr] + stripe_len);
5490                                 offset = logical[nr] + stripe_len;
5491                         } else {
5492                                 /*
5493                                  * Could be tricky, the super may land in the
5494                                  * middle of the area we're checking.  First
5495                                  * check the easiest case, it's at the end.
5496                                  */
5497                                 if (logical[nr] + stripe_len >=
5498                                     bytes + offset) {
5499                                         bytes = logical[nr] - offset;
5500                                         continue;
5501                                 }
5502
5503                                 /* Check the left side */
5504                                 ret = check_cache_range(root, cache,
5505                                                         offset,
5506                                                         logical[nr] - offset);
5507                                 if (ret) {
5508                                         free(logical);
5509                                         return ret;
5510                                 }
5511
5512                                 /* Now we continue with the right side */
5513                                 bytes = (offset + bytes) -
5514                                         (logical[nr] + stripe_len);
5515                                 offset = logical[nr] + stripe_len;
5516                         }
5517                 }
5518
5519                 free(logical);
5520         }
5521
5522         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5523         if (!entry) {
5524                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5525                         offset, offset+bytes);
5526                 return -EINVAL;
5527         }
5528
5529         if (entry->offset != offset) {
5530                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5531                         entry->offset);
5532                 return -EINVAL;
5533         }
5534
5535         if (entry->bytes != bytes) {
5536                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5537                         bytes, entry->bytes, offset);
5538                 return -EINVAL;
5539         }
5540
5541         unlink_free_space(cache->free_space_ctl, entry);
5542         free(entry);
5543         return 0;
5544 }
5545
5546 static int verify_space_cache(struct btrfs_root *root,
5547                               struct btrfs_block_group_cache *cache)
5548 {
5549         struct btrfs_path *path;
5550         struct extent_buffer *leaf;
5551         struct btrfs_key key;
5552         u64 last;
5553         int ret = 0;
5554
5555         path = btrfs_alloc_path();
5556         if (!path)
5557                 return -ENOMEM;
5558
5559         root = root->fs_info->extent_root;
5560
5561         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5562
5563         key.objectid = last;
5564         key.offset = 0;
5565         key.type = BTRFS_EXTENT_ITEM_KEY;
5566
5567         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5568         if (ret < 0)
5569                 goto out;
5570         ret = 0;
5571         while (1) {
5572                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573                         ret = btrfs_next_leaf(root, path);
5574                         if (ret < 0)
5575                                 goto out;
5576                         if (ret > 0) {
5577                                 ret = 0;
5578                                 break;
5579                         }
5580                 }
5581                 leaf = path->nodes[0];
5582                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5583                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5584                         break;
5585                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5586                     key.type != BTRFS_METADATA_ITEM_KEY) {
5587                         path->slots[0]++;
5588                         continue;
5589                 }
5590
5591                 if (last == key.objectid) {
5592                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5593                                 last = key.objectid + key.offset;
5594                         else
5595                                 last = key.objectid + root->nodesize;
5596                         path->slots[0]++;
5597                         continue;
5598                 }
5599
5600                 ret = check_cache_range(root, cache, last,
5601                                         key.objectid - last);
5602                 if (ret)
5603                         break;
5604                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5605                         last = key.objectid + key.offset;
5606                 else
5607                         last = key.objectid + root->nodesize;
5608                 path->slots[0]++;
5609         }
5610
5611         if (last < cache->key.objectid + cache->key.offset)
5612                 ret = check_cache_range(root, cache, last,
5613                                         cache->key.objectid +
5614                                         cache->key.offset - last);
5615
5616 out:
5617         btrfs_free_path(path);
5618
5619         if (!ret &&
5620             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5621                 fprintf(stderr, "There are still entries left in the space "
5622                         "cache\n");
5623                 ret = -EINVAL;
5624         }
5625
5626         return ret;
5627 }
5628
5629 static int check_space_cache(struct btrfs_root *root)
5630 {
5631         struct btrfs_block_group_cache *cache;
5632         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5633         int ret;
5634         int error = 0;
5635
5636         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5637             btrfs_super_generation(root->fs_info->super_copy) !=
5638             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5639                 printf("cache and super generation don't match, space cache "
5640                        "will be invalidated\n");
5641                 return 0;
5642         }
5643
5644         if (ctx.progress_enabled) {
5645                 ctx.tp = TASK_FREE_SPACE;
5646                 task_start(ctx.info);
5647         }
5648
5649         while (1) {
5650                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5651                 if (!cache)
5652                         break;
5653
5654                 start = cache->key.objectid + cache->key.offset;
5655                 if (!cache->free_space_ctl) {
5656                         if (btrfs_init_free_space_ctl(cache,
5657                                                       root->sectorsize)) {
5658                                 ret = -ENOMEM;
5659                                 break;
5660                         }
5661                 } else {
5662                         btrfs_remove_free_space_cache(cache);
5663                 }
5664
5665                 if (btrfs_fs_compat_ro(root->fs_info,
5666                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5667                         ret = exclude_super_stripes(root, cache);
5668                         if (ret) {
5669                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5670                                         strerror(-ret));
5671                                 error++;
5672                                 continue;
5673                         }
5674                         ret = load_free_space_tree(root->fs_info, cache);
5675                         free_excluded_extents(root, cache);
5676                         if (ret < 0) {
5677                                 fprintf(stderr, "could not load free space tree: %s\n",
5678                                         strerror(-ret));
5679                                 error++;
5680                                 continue;
5681                         }
5682                         error += ret;
5683                 } else {
5684                         ret = load_free_space_cache(root->fs_info, cache);
5685                         if (!ret)
5686                                 continue;
5687                 }
5688
5689                 ret = verify_space_cache(root, cache);
5690                 if (ret) {
5691                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5692                                 cache->key.objectid);
5693                         error++;
5694                 }
5695         }
5696
5697         task_stop(ctx.info);
5698
5699         return error ? -EINVAL : 0;
5700 }
5701
5702 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5703                         u64 num_bytes, unsigned long leaf_offset,
5704                         struct extent_buffer *eb) {
5705
5706         u64 offset = 0;
5707         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5708         char *data;
5709         unsigned long csum_offset;
5710         u32 csum;
5711         u32 csum_expected;
5712         u64 read_len;
5713         u64 data_checked = 0;
5714         u64 tmp;
5715         int ret = 0;
5716         int mirror;
5717         int num_copies;
5718
5719         if (num_bytes % root->sectorsize)
5720                 return -EINVAL;
5721
5722         data = malloc(num_bytes);
5723         if (!data)
5724                 return -ENOMEM;
5725
5726         while (offset < num_bytes) {
5727                 mirror = 0;
5728 again:
5729                 read_len = num_bytes - offset;
5730                 /* read as much space once a time */
5731                 ret = read_extent_data(root, data + offset,
5732                                 bytenr + offset, &read_len, mirror);
5733                 if (ret)
5734                         goto out;
5735                 data_checked = 0;
5736                 /* verify every 4k data's checksum */
5737                 while (data_checked < read_len) {
5738                         csum = ~(u32)0;
5739                         tmp = offset + data_checked;
5740
5741                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5742                                                csum, root->sectorsize);
5743                         btrfs_csum_final(csum, (u8 *)&csum);
5744
5745                         csum_offset = leaf_offset +
5746                                  tmp / root->sectorsize * csum_size;
5747                         read_extent_buffer(eb, (char *)&csum_expected,
5748                                            csum_offset, csum_size);
5749                         /* try another mirror */
5750                         if (csum != csum_expected) {
5751                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5752                                                 mirror, bytenr + tmp,
5753                                                 csum, csum_expected);
5754                                 num_copies = btrfs_num_copies(
5755                                                 &root->fs_info->mapping_tree,
5756                                                 bytenr, num_bytes);
5757                                 if (mirror < num_copies - 1) {
5758                                         mirror += 1;
5759                                         goto again;
5760                                 }
5761                         }
5762                         data_checked += root->sectorsize;
5763                 }
5764                 offset += read_len;
5765         }
5766 out:
5767         free(data);
5768         return ret;
5769 }
5770
5771 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5772                                u64 num_bytes)
5773 {
5774         struct btrfs_path *path;
5775         struct extent_buffer *leaf;
5776         struct btrfs_key key;
5777         int ret;
5778
5779         path = btrfs_alloc_path();
5780         if (!path) {
5781                 fprintf(stderr, "Error allocating path\n");
5782                 return -ENOMEM;
5783         }
5784
5785         key.objectid = bytenr;
5786         key.type = BTRFS_EXTENT_ITEM_KEY;
5787         key.offset = (u64)-1;
5788
5789 again:
5790         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5791                                 0, 0);
5792         if (ret < 0) {
5793                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5794                 btrfs_free_path(path);
5795                 return ret;
5796         } else if (ret) {
5797                 if (path->slots[0] > 0) {
5798                         path->slots[0]--;
5799                 } else {
5800                         ret = btrfs_prev_leaf(root, path);
5801                         if (ret < 0) {
5802                                 goto out;
5803                         } else if (ret > 0) {
5804                                 ret = 0;
5805                                 goto out;
5806                         }
5807                 }
5808         }
5809
5810         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5811
5812         /*
5813          * Block group items come before extent items if they have the same
5814          * bytenr, so walk back one more just in case.  Dear future traveller,
5815          * first congrats on mastering time travel.  Now if it's not too much
5816          * trouble could you go back to 2006 and tell Chris to make the
5817          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5818          * EXTENT_ITEM_KEY please?
5819          */
5820         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5821                 if (path->slots[0] > 0) {
5822                         path->slots[0]--;
5823                 } else {
5824                         ret = btrfs_prev_leaf(root, path);
5825                         if (ret < 0) {
5826                                 goto out;
5827                         } else if (ret > 0) {
5828                                 ret = 0;
5829                                 goto out;
5830                         }
5831                 }
5832                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5833         }
5834
5835         while (num_bytes) {
5836                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5837                         ret = btrfs_next_leaf(root, path);
5838                         if (ret < 0) {
5839                                 fprintf(stderr, "Error going to next leaf "
5840                                         "%d\n", ret);
5841                                 btrfs_free_path(path);
5842                                 return ret;
5843                         } else if (ret) {
5844                                 break;
5845                         }
5846                 }
5847                 leaf = path->nodes[0];
5848                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5849                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5850                         path->slots[0]++;
5851                         continue;
5852                 }
5853                 if (key.objectid + key.offset < bytenr) {
5854                         path->slots[0]++;
5855                         continue;
5856                 }
5857                 if (key.objectid > bytenr + num_bytes)
5858                         break;
5859
5860                 if (key.objectid == bytenr) {
5861                         if (key.offset >= num_bytes) {
5862                                 num_bytes = 0;
5863                                 break;
5864                         }
5865                         num_bytes -= key.offset;
5866                         bytenr += key.offset;
5867                 } else if (key.objectid < bytenr) {
5868                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5869                                 num_bytes = 0;
5870                                 break;
5871                         }
5872                         num_bytes = (bytenr + num_bytes) -
5873                                 (key.objectid + key.offset);
5874                         bytenr = key.objectid + key.offset;
5875                 } else {
5876                         if (key.objectid + key.offset < bytenr + num_bytes) {
5877                                 u64 new_start = key.objectid + key.offset;
5878                                 u64 new_bytes = bytenr + num_bytes - new_start;
5879
5880                                 /*
5881                                  * Weird case, the extent is in the middle of
5882                                  * our range, we'll have to search one side
5883                                  * and then the other.  Not sure if this happens
5884                                  * in real life, but no harm in coding it up
5885                                  * anyway just in case.
5886                                  */
5887                                 btrfs_release_path(path);
5888                                 ret = check_extent_exists(root, new_start,
5889                                                           new_bytes);
5890                                 if (ret) {
5891                                         fprintf(stderr, "Right section didn't "
5892                                                 "have a record\n");
5893                                         break;
5894                                 }
5895                                 num_bytes = key.objectid - bytenr;
5896                                 goto again;
5897                         }
5898                         num_bytes = key.objectid - bytenr;
5899                 }
5900                 path->slots[0]++;
5901         }
5902         ret = 0;
5903
5904 out:
5905         if (num_bytes && !ret) {
5906                 fprintf(stderr, "There are no extents for csum range "
5907                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5908                 ret = 1;
5909         }
5910
5911         btrfs_free_path(path);
5912         return ret;
5913 }
5914
5915 static int check_csums(struct btrfs_root *root)
5916 {
5917         struct btrfs_path *path;
5918         struct extent_buffer *leaf;
5919         struct btrfs_key key;
5920         u64 offset = 0, num_bytes = 0;
5921         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5922         int errors = 0;
5923         int ret;
5924         u64 data_len;
5925         unsigned long leaf_offset;
5926
5927         root = root->fs_info->csum_root;
5928         if (!extent_buffer_uptodate(root->node)) {
5929                 fprintf(stderr, "No valid csum tree found\n");
5930                 return -ENOENT;
5931         }
5932
5933         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5934         key.type = BTRFS_EXTENT_CSUM_KEY;
5935         key.offset = 0;
5936
5937         path = btrfs_alloc_path();
5938         if (!path)
5939                 return -ENOMEM;
5940
5941         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5942         if (ret < 0) {
5943                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5944                 btrfs_free_path(path);
5945                 return ret;
5946         }
5947
5948         if (ret > 0 && path->slots[0])
5949                 path->slots[0]--;
5950         ret = 0;
5951
5952         while (1) {
5953                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5954                         ret = btrfs_next_leaf(root, path);
5955                         if (ret < 0) {
5956                                 fprintf(stderr, "Error going to next leaf "
5957                                         "%d\n", ret);
5958                                 break;
5959                         }
5960                         if (ret)
5961                                 break;
5962                 }
5963                 leaf = path->nodes[0];
5964
5965                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5966                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5967                         path->slots[0]++;
5968                         continue;
5969                 }
5970
5971                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5972                               csum_size) * root->sectorsize;
5973                 if (!check_data_csum)
5974                         goto skip_csum_check;
5975                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5976                 ret = check_extent_csums(root, key.offset, data_len,
5977                                          leaf_offset, leaf);
5978                 if (ret)
5979                         break;
5980 skip_csum_check:
5981                 if (!num_bytes) {
5982                         offset = key.offset;
5983                 } else if (key.offset != offset + num_bytes) {
5984                         ret = check_extent_exists(root, offset, num_bytes);
5985                         if (ret) {
5986                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5987                                         "there is no extent record\n",
5988                                         offset, offset+num_bytes);
5989                                 errors++;
5990                         }
5991                         offset = key.offset;
5992                         num_bytes = 0;
5993                 }
5994                 num_bytes += data_len;
5995                 path->slots[0]++;
5996         }
5997
5998         btrfs_free_path(path);
5999         return errors;
6000 }
6001
6002 static int is_dropped_key(struct btrfs_key *key,
6003                           struct btrfs_key *drop_key) {
6004         if (key->objectid < drop_key->objectid)
6005                 return 1;
6006         else if (key->objectid == drop_key->objectid) {
6007                 if (key->type < drop_key->type)
6008                         return 1;
6009                 else if (key->type == drop_key->type) {
6010                         if (key->offset < drop_key->offset)
6011                                 return 1;
6012                 }
6013         }
6014         return 0;
6015 }
6016
6017 /*
6018  * Here are the rules for FULL_BACKREF.
6019  *
6020  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6021  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6022  *      FULL_BACKREF set.
6023  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6024  *    if it happened after the relocation occurred since we'll have dropped the
6025  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6026  *    have no real way to know for sure.
6027  *
6028  * We process the blocks one root at a time, and we start from the lowest root
6029  * objectid and go to the highest.  So we can just lookup the owner backref for
6030  * the record and if we don't find it then we know it doesn't exist and we have
6031  * a FULL BACKREF.
6032  *
6033  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6034  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6035  * be set or not and then we can check later once we've gathered all the refs.
6036  */
6037 static int calc_extent_flag(struct btrfs_root *root,
6038                            struct cache_tree *extent_cache,
6039                            struct extent_buffer *buf,
6040                            struct root_item_record *ri,
6041                            u64 *flags)
6042 {
6043         struct extent_record *rec;
6044         struct cache_extent *cache;
6045         struct tree_backref *tback;
6046         u64 owner = 0;
6047
6048         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6049         /* we have added this extent before */
6050         if (!cache)
6051                 return -ENOENT;
6052
6053         rec = container_of(cache, struct extent_record, cache);
6054
6055         /*
6056          * Except file/reloc tree, we can not have
6057          * FULL BACKREF MODE
6058          */
6059         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6060                 goto normal;
6061         /*
6062          * root node
6063          */
6064         if (buf->start == ri->bytenr)
6065                 goto normal;
6066
6067         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6068                 goto full_backref;
6069
6070         owner = btrfs_header_owner(buf);
6071         if (owner == ri->objectid)
6072                 goto normal;
6073
6074         tback = find_tree_backref(rec, 0, owner);
6075         if (!tback)
6076                 goto full_backref;
6077 normal:
6078         *flags = 0;
6079         if (rec->flag_block_full_backref != FLAG_UNSET &&
6080             rec->flag_block_full_backref != 0)
6081                 rec->bad_full_backref = 1;
6082         return 0;
6083 full_backref:
6084         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6085         if (rec->flag_block_full_backref != FLAG_UNSET &&
6086             rec->flag_block_full_backref != 1)
6087                 rec->bad_full_backref = 1;
6088         return 0;
6089 }
6090
6091 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6092 {
6093         fprintf(stderr, "Invalid key type(");
6094         print_key_type(stderr, 0, key_type);
6095         fprintf(stderr, ") found in root(");
6096         print_objectid(stderr, rootid, 0);
6097         fprintf(stderr, ")\n");
6098 }
6099
6100 /*
6101  * Check if the key is valid with its extent buffer.
6102  *
6103  * This is a early check in case invalid key exists in a extent buffer
6104  * This is not comprehensive yet, but should prevent wrong key/item passed
6105  * further
6106  */
6107 static int check_type_with_root(u64 rootid, u8 key_type)
6108 {
6109         switch (key_type) {
6110         /* Only valid in chunk tree */
6111         case BTRFS_DEV_ITEM_KEY:
6112         case BTRFS_CHUNK_ITEM_KEY:
6113                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6114                         goto err;
6115                 break;
6116         /* valid in csum and log tree */
6117         case BTRFS_CSUM_TREE_OBJECTID:
6118                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6119                       is_fstree(rootid)))
6120                         goto err;
6121                 break;
6122         case BTRFS_EXTENT_ITEM_KEY:
6123         case BTRFS_METADATA_ITEM_KEY:
6124         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6125                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6126                         goto err;
6127                 break;
6128         case BTRFS_ROOT_ITEM_KEY:
6129                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6130                         goto err;
6131                 break;
6132         case BTRFS_DEV_EXTENT_KEY:
6133                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6134                         goto err;
6135                 break;
6136         }
6137         return 0;
6138 err:
6139         report_mismatch_key_root(key_type, rootid);
6140         return -EINVAL;
6141 }
6142
6143 static int run_next_block(struct btrfs_root *root,
6144                           struct block_info *bits,
6145                           int bits_nr,
6146                           u64 *last,
6147                           struct cache_tree *pending,
6148                           struct cache_tree *seen,
6149                           struct cache_tree *reada,
6150                           struct cache_tree *nodes,
6151                           struct cache_tree *extent_cache,
6152                           struct cache_tree *chunk_cache,
6153                           struct rb_root *dev_cache,
6154                           struct block_group_tree *block_group_cache,
6155                           struct device_extent_tree *dev_extent_cache,
6156                           struct root_item_record *ri)
6157 {
6158         struct extent_buffer *buf;
6159         struct extent_record *rec = NULL;
6160         u64 bytenr;
6161         u32 size;
6162         u64 parent;
6163         u64 owner;
6164         u64 flags;
6165         u64 ptr;
6166         u64 gen = 0;
6167         int ret = 0;
6168         int i;
6169         int nritems;
6170         struct btrfs_key key;
6171         struct cache_extent *cache;
6172         int reada_bits;
6173
6174         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6175                                     bits_nr, &reada_bits);
6176         if (nritems == 0)
6177                 return 1;
6178
6179         if (!reada_bits) {
6180                 for(i = 0; i < nritems; i++) {
6181                         ret = add_cache_extent(reada, bits[i].start,
6182                                                bits[i].size);
6183                         if (ret == -EEXIST)
6184                                 continue;
6185
6186                         /* fixme, get the parent transid */
6187                         readahead_tree_block(root, bits[i].start,
6188                                              bits[i].size, 0);
6189                 }
6190         }
6191         *last = bits[0].start;
6192         bytenr = bits[0].start;
6193         size = bits[0].size;
6194
6195         cache = lookup_cache_extent(pending, bytenr, size);
6196         if (cache) {
6197                 remove_cache_extent(pending, cache);
6198                 free(cache);
6199         }
6200         cache = lookup_cache_extent(reada, bytenr, size);
6201         if (cache) {
6202                 remove_cache_extent(reada, cache);
6203                 free(cache);
6204         }
6205         cache = lookup_cache_extent(nodes, bytenr, size);
6206         if (cache) {
6207                 remove_cache_extent(nodes, cache);
6208                 free(cache);
6209         }
6210         cache = lookup_cache_extent(extent_cache, bytenr, size);
6211         if (cache) {
6212                 rec = container_of(cache, struct extent_record, cache);
6213                 gen = rec->parent_generation;
6214         }
6215
6216         /* fixme, get the real parent transid */
6217         buf = read_tree_block(root, bytenr, size, gen);
6218         if (!extent_buffer_uptodate(buf)) {
6219                 record_bad_block_io(root->fs_info,
6220                                     extent_cache, bytenr, size);
6221                 goto out;
6222         }
6223
6224         nritems = btrfs_header_nritems(buf);
6225
6226         flags = 0;
6227         if (!init_extent_tree) {
6228                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6229                                        btrfs_header_level(buf), 1, NULL,
6230                                        &flags);
6231                 if (ret < 0) {
6232                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6233                         if (ret < 0) {
6234                                 fprintf(stderr, "Couldn't calc extent flags\n");
6235                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6236                         }
6237                 }
6238         } else {
6239                 flags = 0;
6240                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6241                 if (ret < 0) {
6242                         fprintf(stderr, "Couldn't calc extent flags\n");
6243                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6244                 }
6245         }
6246
6247         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6248                 if (ri != NULL &&
6249                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6250                     ri->objectid == btrfs_header_owner(buf)) {
6251                         /*
6252                          * Ok we got to this block from it's original owner and
6253                          * we have FULL_BACKREF set.  Relocation can leave
6254                          * converted blocks over so this is altogether possible,
6255                          * however it's not possible if the generation > the
6256                          * last snapshot, so check for this case.
6257                          */
6258                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6259                             btrfs_header_generation(buf) > ri->last_snapshot) {
6260                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6261                                 rec->bad_full_backref = 1;
6262                         }
6263                 }
6264         } else {
6265                 if (ri != NULL &&
6266                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6267                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6268                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6269                         rec->bad_full_backref = 1;
6270                 }
6271         }
6272
6273         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6274                 rec->flag_block_full_backref = 1;
6275                 parent = bytenr;
6276                 owner = 0;
6277         } else {
6278                 rec->flag_block_full_backref = 0;
6279                 parent = 0;
6280                 owner = btrfs_header_owner(buf);
6281         }
6282
6283         ret = check_block(root, extent_cache, buf, flags);
6284         if (ret)
6285                 goto out;
6286
6287         if (btrfs_is_leaf(buf)) {
6288                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6289                 for (i = 0; i < nritems; i++) {
6290                         struct btrfs_file_extent_item *fi;
6291                         btrfs_item_key_to_cpu(buf, &key, i);
6292                         /*
6293                          * Check key type against the leaf owner.
6294                          * Could filter quite a lot of early error if
6295                          * owner is correct
6296                          */
6297                         if (check_type_with_root(btrfs_header_owner(buf),
6298                                                  key.type)) {
6299                                 fprintf(stderr, "ignoring invalid key\n");
6300                                 continue;
6301                         }
6302                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6303                                 process_extent_item(root, extent_cache, buf,
6304                                                     i);
6305                                 continue;
6306                         }
6307                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6308                                 process_extent_item(root, extent_cache, buf,
6309                                                     i);
6310                                 continue;
6311                         }
6312                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6313                                 total_csum_bytes +=
6314                                         btrfs_item_size_nr(buf, i);
6315                                 continue;
6316                         }
6317                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6318                                 process_chunk_item(chunk_cache, &key, buf, i);
6319                                 continue;
6320                         }
6321                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6322                                 process_device_item(dev_cache, &key, buf, i);
6323                                 continue;
6324                         }
6325                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6326                                 process_block_group_item(block_group_cache,
6327                                         &key, buf, i);
6328                                 continue;
6329                         }
6330                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6331                                 process_device_extent_item(dev_extent_cache,
6332                                         &key, buf, i);
6333                                 continue;
6334
6335                         }
6336                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6337 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6338                                 process_extent_ref_v0(extent_cache, buf, i);
6339 #else
6340                                 BUG();
6341 #endif
6342                                 continue;
6343                         }
6344
6345                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6346                                 ret = add_tree_backref(extent_cache,
6347                                                 key.objectid, 0, key.offset, 0);
6348                                 if (ret < 0)
6349                                         error("add_tree_backref failed: %s",
6350                                               strerror(-ret));
6351                                 continue;
6352                         }
6353                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6354                                 ret = add_tree_backref(extent_cache,
6355                                                 key.objectid, key.offset, 0, 0);
6356                                 if (ret < 0)
6357                                         error("add_tree_backref failed: %s",
6358                                               strerror(-ret));
6359                                 continue;
6360                         }
6361                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6362                                 struct btrfs_extent_data_ref *ref;
6363                                 ref = btrfs_item_ptr(buf, i,
6364                                                 struct btrfs_extent_data_ref);
6365                                 add_data_backref(extent_cache,
6366                                         key.objectid, 0,
6367                                         btrfs_extent_data_ref_root(buf, ref),
6368                                         btrfs_extent_data_ref_objectid(buf,
6369                                                                        ref),
6370                                         btrfs_extent_data_ref_offset(buf, ref),
6371                                         btrfs_extent_data_ref_count(buf, ref),
6372                                         0, root->sectorsize);
6373                                 continue;
6374                         }
6375                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6376                                 struct btrfs_shared_data_ref *ref;
6377                                 ref = btrfs_item_ptr(buf, i,
6378                                                 struct btrfs_shared_data_ref);
6379                                 add_data_backref(extent_cache,
6380                                         key.objectid, key.offset, 0, 0, 0,
6381                                         btrfs_shared_data_ref_count(buf, ref),
6382                                         0, root->sectorsize);
6383                                 continue;
6384                         }
6385                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6386                                 struct bad_item *bad;
6387
6388                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6389                                         continue;
6390                                 if (!owner)
6391                                         continue;
6392                                 bad = malloc(sizeof(struct bad_item));
6393                                 if (!bad)
6394                                         continue;
6395                                 INIT_LIST_HEAD(&bad->list);
6396                                 memcpy(&bad->key, &key,
6397                                        sizeof(struct btrfs_key));
6398                                 bad->root_id = owner;
6399                                 list_add_tail(&bad->list, &delete_items);
6400                                 continue;
6401                         }
6402                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6403                                 continue;
6404                         fi = btrfs_item_ptr(buf, i,
6405                                             struct btrfs_file_extent_item);
6406                         if (btrfs_file_extent_type(buf, fi) ==
6407                             BTRFS_FILE_EXTENT_INLINE)
6408                                 continue;
6409                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6410                                 continue;
6411
6412                         data_bytes_allocated +=
6413                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6414                         if (data_bytes_allocated < root->sectorsize) {
6415                                 abort();
6416                         }
6417                         data_bytes_referenced +=
6418                                 btrfs_file_extent_num_bytes(buf, fi);
6419                         add_data_backref(extent_cache,
6420                                 btrfs_file_extent_disk_bytenr(buf, fi),
6421                                 parent, owner, key.objectid, key.offset -
6422                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6423                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6424                 }
6425         } else {
6426                 int level;
6427                 struct btrfs_key first_key;
6428
6429                 first_key.objectid = 0;
6430
6431                 if (nritems > 0)
6432                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6433                 level = btrfs_header_level(buf);
6434                 for (i = 0; i < nritems; i++) {
6435                         struct extent_record tmpl;
6436
6437                         ptr = btrfs_node_blockptr(buf, i);
6438                         size = root->nodesize;
6439                         btrfs_node_key_to_cpu(buf, &key, i);
6440                         if (ri != NULL) {
6441                                 if ((level == ri->drop_level)
6442                                     && is_dropped_key(&key, &ri->drop_key)) {
6443                                         continue;
6444                                 }
6445                         }
6446
6447                         memset(&tmpl, 0, sizeof(tmpl));
6448                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6449                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6450                         tmpl.start = ptr;
6451                         tmpl.nr = size;
6452                         tmpl.refs = 1;
6453                         tmpl.metadata = 1;
6454                         tmpl.max_size = size;
6455                         ret = add_extent_rec(extent_cache, &tmpl);
6456                         if (ret < 0)
6457                                 goto out;
6458
6459                         ret = add_tree_backref(extent_cache, ptr, parent,
6460                                         owner, 1);
6461                         if (ret < 0) {
6462                                 error("add_tree_backref failed: %s",
6463                                       strerror(-ret));
6464                                 continue;
6465                         }
6466
6467                         if (level > 1) {
6468                                 add_pending(nodes, seen, ptr, size);
6469                         } else {
6470                                 add_pending(pending, seen, ptr, size);
6471                         }
6472                 }
6473                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6474                                       nritems) * sizeof(struct btrfs_key_ptr);
6475         }
6476         total_btree_bytes += buf->len;
6477         if (fs_root_objectid(btrfs_header_owner(buf)))
6478                 total_fs_tree_bytes += buf->len;
6479         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6480                 total_extent_tree_bytes += buf->len;
6481         if (!found_old_backref &&
6482             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6483             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6484             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6485                 found_old_backref = 1;
6486 out:
6487         free_extent_buffer(buf);
6488         return ret;
6489 }
6490
6491 static int add_root_to_pending(struct extent_buffer *buf,
6492                                struct cache_tree *extent_cache,
6493                                struct cache_tree *pending,
6494                                struct cache_tree *seen,
6495                                struct cache_tree *nodes,
6496                                u64 objectid)
6497 {
6498         struct extent_record tmpl;
6499         int ret;
6500
6501         if (btrfs_header_level(buf) > 0)
6502                 add_pending(nodes, seen, buf->start, buf->len);
6503         else
6504                 add_pending(pending, seen, buf->start, buf->len);
6505
6506         memset(&tmpl, 0, sizeof(tmpl));
6507         tmpl.start = buf->start;
6508         tmpl.nr = buf->len;
6509         tmpl.is_root = 1;
6510         tmpl.refs = 1;
6511         tmpl.metadata = 1;
6512         tmpl.max_size = buf->len;
6513         add_extent_rec(extent_cache, &tmpl);
6514
6515         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6516             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6517                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6518                                 0, 1);
6519         else
6520                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6521                                 1);
6522         return ret;
6523 }
6524
6525 /* as we fix the tree, we might be deleting blocks that
6526  * we're tracking for repair.  This hook makes sure we
6527  * remove any backrefs for blocks as we are fixing them.
6528  */
6529 static int free_extent_hook(struct btrfs_trans_handle *trans,
6530                             struct btrfs_root *root,
6531                             u64 bytenr, u64 num_bytes, u64 parent,
6532                             u64 root_objectid, u64 owner, u64 offset,
6533                             int refs_to_drop)
6534 {
6535         struct extent_record *rec;
6536         struct cache_extent *cache;
6537         int is_data;
6538         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6539
6540         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6541         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6542         if (!cache)
6543                 return 0;
6544
6545         rec = container_of(cache, struct extent_record, cache);
6546         if (is_data) {
6547                 struct data_backref *back;
6548                 back = find_data_backref(rec, parent, root_objectid, owner,
6549                                          offset, 1, bytenr, num_bytes);
6550                 if (!back)
6551                         goto out;
6552                 if (back->node.found_ref) {
6553                         back->found_ref -= refs_to_drop;
6554                         if (rec->refs)
6555                                 rec->refs -= refs_to_drop;
6556                 }
6557                 if (back->node.found_extent_tree) {
6558                         back->num_refs -= refs_to_drop;
6559                         if (rec->extent_item_refs)
6560                                 rec->extent_item_refs -= refs_to_drop;
6561                 }
6562                 if (back->found_ref == 0)
6563                         back->node.found_ref = 0;
6564                 if (back->num_refs == 0)
6565                         back->node.found_extent_tree = 0;
6566
6567                 if (!back->node.found_extent_tree && back->node.found_ref) {
6568                         list_del(&back->node.list);
6569                         free(back);
6570                 }
6571         } else {
6572                 struct tree_backref *back;
6573                 back = find_tree_backref(rec, parent, root_objectid);
6574                 if (!back)
6575                         goto out;
6576                 if (back->node.found_ref) {
6577                         if (rec->refs)
6578                                 rec->refs--;
6579                         back->node.found_ref = 0;
6580                 }
6581                 if (back->node.found_extent_tree) {
6582                         if (rec->extent_item_refs)
6583                                 rec->extent_item_refs--;
6584                         back->node.found_extent_tree = 0;
6585                 }
6586                 if (!back->node.found_extent_tree && back->node.found_ref) {
6587                         list_del(&back->node.list);
6588                         free(back);
6589                 }
6590         }
6591         maybe_free_extent_rec(extent_cache, rec);
6592 out:
6593         return 0;
6594 }
6595
6596 static int delete_extent_records(struct btrfs_trans_handle *trans,
6597                                  struct btrfs_root *root,
6598                                  struct btrfs_path *path,
6599                                  u64 bytenr, u64 new_len)
6600 {
6601         struct btrfs_key key;
6602         struct btrfs_key found_key;
6603         struct extent_buffer *leaf;
6604         int ret;
6605         int slot;
6606
6607
6608         key.objectid = bytenr;
6609         key.type = (u8)-1;
6610         key.offset = (u64)-1;
6611
6612         while(1) {
6613                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6614                                         &key, path, 0, 1);
6615                 if (ret < 0)
6616                         break;
6617
6618                 if (ret > 0) {
6619                         ret = 0;
6620                         if (path->slots[0] == 0)
6621                                 break;
6622                         path->slots[0]--;
6623                 }
6624                 ret = 0;
6625
6626                 leaf = path->nodes[0];
6627                 slot = path->slots[0];
6628
6629                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6630                 if (found_key.objectid != bytenr)
6631                         break;
6632
6633                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6634                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6635                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6636                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6637                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6638                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6639                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6640                         btrfs_release_path(path);
6641                         if (found_key.type == 0) {
6642                                 if (found_key.offset == 0)
6643                                         break;
6644                                 key.offset = found_key.offset - 1;
6645                                 key.type = found_key.type;
6646                         }
6647                         key.type = found_key.type - 1;
6648                         key.offset = (u64)-1;
6649                         continue;
6650                 }
6651
6652                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6653                         found_key.objectid, found_key.type, found_key.offset);
6654
6655                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6656                 if (ret)
6657                         break;
6658                 btrfs_release_path(path);
6659
6660                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6661                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6662                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6663                                 found_key.offset : root->nodesize;
6664
6665                         ret = btrfs_update_block_group(trans, root, bytenr,
6666                                                        bytes, 0, 0);
6667                         if (ret)
6668                                 break;
6669                 }
6670         }
6671
6672         btrfs_release_path(path);
6673         return ret;
6674 }
6675
6676 /*
6677  * for a single backref, this will allocate a new extent
6678  * and add the backref to it.
6679  */
6680 static int record_extent(struct btrfs_trans_handle *trans,
6681                          struct btrfs_fs_info *info,
6682                          struct btrfs_path *path,
6683                          struct extent_record *rec,
6684                          struct extent_backref *back,
6685                          int allocated, u64 flags)
6686 {
6687         int ret;
6688         struct btrfs_root *extent_root = info->extent_root;
6689         struct extent_buffer *leaf;
6690         struct btrfs_key ins_key;
6691         struct btrfs_extent_item *ei;
6692         struct tree_backref *tback;
6693         struct data_backref *dback;
6694         struct btrfs_tree_block_info *bi;
6695
6696         if (!back->is_data)
6697                 rec->max_size = max_t(u64, rec->max_size,
6698                                     info->extent_root->nodesize);
6699
6700         if (!allocated) {
6701                 u32 item_size = sizeof(*ei);
6702
6703                 if (!back->is_data)
6704                         item_size += sizeof(*bi);
6705
6706                 ins_key.objectid = rec->start;
6707                 ins_key.offset = rec->max_size;
6708                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6709
6710                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6711                                         &ins_key, item_size);
6712                 if (ret)
6713                         goto fail;
6714
6715                 leaf = path->nodes[0];
6716                 ei = btrfs_item_ptr(leaf, path->slots[0],
6717                                     struct btrfs_extent_item);
6718
6719                 btrfs_set_extent_refs(leaf, ei, 0);
6720                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6721
6722                 if (back->is_data) {
6723                         btrfs_set_extent_flags(leaf, ei,
6724                                                BTRFS_EXTENT_FLAG_DATA);
6725                 } else {
6726                         struct btrfs_disk_key copy_key;;
6727
6728                         tback = to_tree_backref(back);
6729                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6730                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6731                                              sizeof(*bi));
6732
6733                         btrfs_set_disk_key_objectid(&copy_key,
6734                                                     rec->info_objectid);
6735                         btrfs_set_disk_key_type(&copy_key, 0);
6736                         btrfs_set_disk_key_offset(&copy_key, 0);
6737
6738                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6739                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6740
6741                         btrfs_set_extent_flags(leaf, ei,
6742                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6743                 }
6744
6745                 btrfs_mark_buffer_dirty(leaf);
6746                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6747                                                rec->max_size, 1, 0);
6748                 if (ret)
6749                         goto fail;
6750                 btrfs_release_path(path);
6751         }
6752
6753         if (back->is_data) {
6754                 u64 parent;
6755                 int i;
6756
6757                 dback = to_data_backref(back);
6758                 if (back->full_backref)
6759                         parent = dback->parent;
6760                 else
6761                         parent = 0;
6762
6763                 for (i = 0; i < dback->found_ref; i++) {
6764                         /* if parent != 0, we're doing a full backref
6765                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6766                          * just makes the backref allocator create a data
6767                          * backref
6768                          */
6769                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6770                                                    rec->start, rec->max_size,
6771                                                    parent,
6772                                                    dback->root,
6773                                                    parent ?
6774                                                    BTRFS_FIRST_FREE_OBJECTID :
6775                                                    dback->owner,
6776                                                    dback->offset);
6777                         if (ret)
6778                                 break;
6779                 }
6780                 fprintf(stderr, "adding new data backref"
6781                                 " on %llu %s %llu owner %llu"
6782                                 " offset %llu found %d\n",
6783                                 (unsigned long long)rec->start,
6784                                 back->full_backref ?
6785                                 "parent" : "root",
6786                                 back->full_backref ?
6787                                 (unsigned long long)parent :
6788                                 (unsigned long long)dback->root,
6789                                 (unsigned long long)dback->owner,
6790                                 (unsigned long long)dback->offset,
6791                                 dback->found_ref);
6792         } else {
6793                 u64 parent;
6794
6795                 tback = to_tree_backref(back);
6796                 if (back->full_backref)
6797                         parent = tback->parent;
6798                 else
6799                         parent = 0;
6800
6801                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6802                                            rec->start, rec->max_size,
6803                                            parent, tback->root, 0, 0);
6804                 fprintf(stderr, "adding new tree backref on "
6805                         "start %llu len %llu parent %llu root %llu\n",
6806                         rec->start, rec->max_size, parent, tback->root);
6807         }
6808 fail:
6809         btrfs_release_path(path);
6810         return ret;
6811 }
6812
6813 static struct extent_entry *find_entry(struct list_head *entries,
6814                                        u64 bytenr, u64 bytes)
6815 {
6816         struct extent_entry *entry = NULL;
6817
6818         list_for_each_entry(entry, entries, list) {
6819                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6820                         return entry;
6821         }
6822
6823         return NULL;
6824 }
6825
6826 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6827 {
6828         struct extent_entry *entry, *best = NULL, *prev = NULL;
6829
6830         list_for_each_entry(entry, entries, list) {
6831                 if (!prev) {
6832                         prev = entry;
6833                         continue;
6834                 }
6835
6836                 /*
6837                  * If there are as many broken entries as entries then we know
6838                  * not to trust this particular entry.
6839                  */
6840                 if (entry->broken == entry->count)
6841                         continue;
6842
6843                 /*
6844                  * If our current entry == best then we can't be sure our best
6845                  * is really the best, so we need to keep searching.
6846                  */
6847                 if (best && best->count == entry->count) {
6848                         prev = entry;
6849                         best = NULL;
6850                         continue;
6851                 }
6852
6853                 /* Prev == entry, not good enough, have to keep searching */
6854                 if (!prev->broken && prev->count == entry->count)
6855                         continue;
6856
6857                 if (!best)
6858                         best = (prev->count > entry->count) ? prev : entry;
6859                 else if (best->count < entry->count)
6860                         best = entry;
6861                 prev = entry;
6862         }
6863
6864         return best;
6865 }
6866
6867 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6868                       struct data_backref *dback, struct extent_entry *entry)
6869 {
6870         struct btrfs_trans_handle *trans;
6871         struct btrfs_root *root;
6872         struct btrfs_file_extent_item *fi;
6873         struct extent_buffer *leaf;
6874         struct btrfs_key key;
6875         u64 bytenr, bytes;
6876         int ret, err;
6877
6878         key.objectid = dback->root;
6879         key.type = BTRFS_ROOT_ITEM_KEY;
6880         key.offset = (u64)-1;
6881         root = btrfs_read_fs_root(info, &key);
6882         if (IS_ERR(root)) {
6883                 fprintf(stderr, "Couldn't find root for our ref\n");
6884                 return -EINVAL;
6885         }
6886
6887         /*
6888          * The backref points to the original offset of the extent if it was
6889          * split, so we need to search down to the offset we have and then walk
6890          * forward until we find the backref we're looking for.
6891          */
6892         key.objectid = dback->owner;
6893         key.type = BTRFS_EXTENT_DATA_KEY;
6894         key.offset = dback->offset;
6895         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6896         if (ret < 0) {
6897                 fprintf(stderr, "Error looking up ref %d\n", ret);
6898                 return ret;
6899         }
6900
6901         while (1) {
6902                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6903                         ret = btrfs_next_leaf(root, path);
6904                         if (ret) {
6905                                 fprintf(stderr, "Couldn't find our ref, next\n");
6906                                 return -EINVAL;
6907                         }
6908                 }
6909                 leaf = path->nodes[0];
6910                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6911                 if (key.objectid != dback->owner ||
6912                     key.type != BTRFS_EXTENT_DATA_KEY) {
6913                         fprintf(stderr, "Couldn't find our ref, search\n");
6914                         return -EINVAL;
6915                 }
6916                 fi = btrfs_item_ptr(leaf, path->slots[0],
6917                                     struct btrfs_file_extent_item);
6918                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6919                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6920
6921                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6922                         break;
6923                 path->slots[0]++;
6924         }
6925
6926         btrfs_release_path(path);
6927
6928         trans = btrfs_start_transaction(root, 1);
6929         if (IS_ERR(trans))
6930                 return PTR_ERR(trans);
6931
6932         /*
6933          * Ok we have the key of the file extent we want to fix, now we can cow
6934          * down to the thing and fix it.
6935          */
6936         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6937         if (ret < 0) {
6938                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6939                         key.objectid, key.type, key.offset, ret);
6940                 goto out;
6941         }
6942         if (ret > 0) {
6943                 fprintf(stderr, "Well that's odd, we just found this key "
6944                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6945                         key.offset);
6946                 ret = -EINVAL;
6947                 goto out;
6948         }
6949         leaf = path->nodes[0];
6950         fi = btrfs_item_ptr(leaf, path->slots[0],
6951                             struct btrfs_file_extent_item);
6952
6953         if (btrfs_file_extent_compression(leaf, fi) &&
6954             dback->disk_bytenr != entry->bytenr) {
6955                 fprintf(stderr, "Ref doesn't match the record start and is "
6956                         "compressed, please take a btrfs-image of this file "
6957                         "system and send it to a btrfs developer so they can "
6958                         "complete this functionality for bytenr %Lu\n",
6959                         dback->disk_bytenr);
6960                 ret = -EINVAL;
6961                 goto out;
6962         }
6963
6964         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6965                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6966         } else if (dback->disk_bytenr > entry->bytenr) {
6967                 u64 off_diff, offset;
6968
6969                 off_diff = dback->disk_bytenr - entry->bytenr;
6970                 offset = btrfs_file_extent_offset(leaf, fi);
6971                 if (dback->disk_bytenr + offset +
6972                     btrfs_file_extent_num_bytes(leaf, fi) >
6973                     entry->bytenr + entry->bytes) {
6974                         fprintf(stderr, "Ref is past the entry end, please "
6975                                 "take a btrfs-image of this file system and "
6976                                 "send it to a btrfs developer, ref %Lu\n",
6977                                 dback->disk_bytenr);
6978                         ret = -EINVAL;
6979                         goto out;
6980                 }
6981                 offset += off_diff;
6982                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6983                 btrfs_set_file_extent_offset(leaf, fi, offset);
6984         } else if (dback->disk_bytenr < entry->bytenr) {
6985                 u64 offset;
6986
6987                 offset = btrfs_file_extent_offset(leaf, fi);
6988                 if (dback->disk_bytenr + offset < entry->bytenr) {
6989                         fprintf(stderr, "Ref is before the entry start, please"
6990                                 " take a btrfs-image of this file system and "
6991                                 "send it to a btrfs developer, ref %Lu\n",
6992                                 dback->disk_bytenr);
6993                         ret = -EINVAL;
6994                         goto out;
6995                 }
6996
6997                 offset += dback->disk_bytenr;
6998                 offset -= entry->bytenr;
6999                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7000                 btrfs_set_file_extent_offset(leaf, fi, offset);
7001         }
7002
7003         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7004
7005         /*
7006          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7007          * only do this if we aren't using compression, otherwise it's a
7008          * trickier case.
7009          */
7010         if (!btrfs_file_extent_compression(leaf, fi))
7011                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7012         else
7013                 printf("ram bytes may be wrong?\n");
7014         btrfs_mark_buffer_dirty(leaf);
7015 out:
7016         err = btrfs_commit_transaction(trans, root);
7017         btrfs_release_path(path);
7018         return ret ? ret : err;
7019 }
7020
7021 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7022                            struct extent_record *rec)
7023 {
7024         struct extent_backref *back;
7025         struct data_backref *dback;
7026         struct extent_entry *entry, *best = NULL;
7027         LIST_HEAD(entries);
7028         int nr_entries = 0;
7029         int broken_entries = 0;
7030         int ret = 0;
7031         short mismatch = 0;
7032
7033         /*
7034          * Metadata is easy and the backrefs should always agree on bytenr and
7035          * size, if not we've got bigger issues.
7036          */
7037         if (rec->metadata)
7038                 return 0;
7039
7040         list_for_each_entry(back, &rec->backrefs, list) {
7041                 if (back->full_backref || !back->is_data)
7042                         continue;
7043
7044                 dback = to_data_backref(back);
7045
7046                 /*
7047                  * We only pay attention to backrefs that we found a real
7048                  * backref for.
7049                  */
7050                 if (dback->found_ref == 0)
7051                         continue;
7052
7053                 /*
7054                  * For now we only catch when the bytes don't match, not the
7055                  * bytenr.  We can easily do this at the same time, but I want
7056                  * to have a fs image to test on before we just add repair
7057                  * functionality willy-nilly so we know we won't screw up the
7058                  * repair.
7059                  */
7060
7061                 entry = find_entry(&entries, dback->disk_bytenr,
7062                                    dback->bytes);
7063                 if (!entry) {
7064                         entry = malloc(sizeof(struct extent_entry));
7065                         if (!entry) {
7066                                 ret = -ENOMEM;
7067                                 goto out;
7068                         }
7069                         memset(entry, 0, sizeof(*entry));
7070                         entry->bytenr = dback->disk_bytenr;
7071                         entry->bytes = dback->bytes;
7072                         list_add_tail(&entry->list, &entries);
7073                         nr_entries++;
7074                 }
7075
7076                 /*
7077                  * If we only have on entry we may think the entries agree when
7078                  * in reality they don't so we have to do some extra checking.
7079                  */
7080                 if (dback->disk_bytenr != rec->start ||
7081                     dback->bytes != rec->nr || back->broken)
7082                         mismatch = 1;
7083
7084                 if (back->broken) {
7085                         entry->broken++;
7086                         broken_entries++;
7087                 }
7088
7089                 entry->count++;
7090         }
7091
7092         /* Yay all the backrefs agree, carry on good sir */
7093         if (nr_entries <= 1 && !mismatch)
7094                 goto out;
7095
7096         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7097                 "%Lu\n", rec->start);
7098
7099         /*
7100          * First we want to see if the backrefs can agree amongst themselves who
7101          * is right, so figure out which one of the entries has the highest
7102          * count.
7103          */
7104         best = find_most_right_entry(&entries);
7105
7106         /*
7107          * Ok so we may have an even split between what the backrefs think, so
7108          * this is where we use the extent ref to see what it thinks.
7109          */
7110         if (!best) {
7111                 entry = find_entry(&entries, rec->start, rec->nr);
7112                 if (!entry && (!broken_entries || !rec->found_rec)) {
7113                         fprintf(stderr, "Backrefs don't agree with each other "
7114                                 "and extent record doesn't agree with anybody,"
7115                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7116                                 rec->start, rec->nr);
7117                         ret = -EINVAL;
7118                         goto out;
7119                 } else if (!entry) {
7120                         /*
7121                          * Ok our backrefs were broken, we'll assume this is the
7122                          * correct value and add an entry for this range.
7123                          */
7124                         entry = malloc(sizeof(struct extent_entry));
7125                         if (!entry) {
7126                                 ret = -ENOMEM;
7127                                 goto out;
7128                         }
7129                         memset(entry, 0, sizeof(*entry));
7130                         entry->bytenr = rec->start;
7131                         entry->bytes = rec->nr;
7132                         list_add_tail(&entry->list, &entries);
7133                         nr_entries++;
7134                 }
7135                 entry->count++;
7136                 best = find_most_right_entry(&entries);
7137                 if (!best) {
7138                         fprintf(stderr, "Backrefs and extent record evenly "
7139                                 "split on who is right, this is going to "
7140                                 "require user input to fix bytenr %Lu bytes "
7141                                 "%Lu\n", rec->start, rec->nr);
7142                         ret = -EINVAL;
7143                         goto out;
7144                 }
7145         }
7146
7147         /*
7148          * I don't think this can happen currently as we'll abort() if we catch
7149          * this case higher up, but in case somebody removes that we still can't
7150          * deal with it properly here yet, so just bail out of that's the case.
7151          */
7152         if (best->bytenr != rec->start) {
7153                 fprintf(stderr, "Extent start and backref starts don't match, "
7154                         "please use btrfs-image on this file system and send "
7155                         "it to a btrfs developer so they can make fsck fix "
7156                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7157                         rec->start, rec->nr);
7158                 ret = -EINVAL;
7159                 goto out;
7160         }
7161
7162         /*
7163          * Ok great we all agreed on an extent record, let's go find the real
7164          * references and fix up the ones that don't match.
7165          */
7166         list_for_each_entry(back, &rec->backrefs, list) {
7167                 if (back->full_backref || !back->is_data)
7168                         continue;
7169
7170                 dback = to_data_backref(back);
7171
7172                 /*
7173                  * Still ignoring backrefs that don't have a real ref attached
7174                  * to them.
7175                  */
7176                 if (dback->found_ref == 0)
7177                         continue;
7178
7179                 if (dback->bytes == best->bytes &&
7180                     dback->disk_bytenr == best->bytenr)
7181                         continue;
7182
7183                 ret = repair_ref(info, path, dback, best);
7184                 if (ret)
7185                         goto out;
7186         }
7187
7188         /*
7189          * Ok we messed with the actual refs, which means we need to drop our
7190          * entire cache and go back and rescan.  I know this is a huge pain and
7191          * adds a lot of extra work, but it's the only way to be safe.  Once all
7192          * the backrefs agree we may not need to do anything to the extent
7193          * record itself.
7194          */
7195         ret = -EAGAIN;
7196 out:
7197         while (!list_empty(&entries)) {
7198                 entry = list_entry(entries.next, struct extent_entry, list);
7199                 list_del_init(&entry->list);
7200                 free(entry);
7201         }
7202         return ret;
7203 }
7204
7205 static int process_duplicates(struct btrfs_root *root,
7206                               struct cache_tree *extent_cache,
7207                               struct extent_record *rec)
7208 {
7209         struct extent_record *good, *tmp;
7210         struct cache_extent *cache;
7211         int ret;
7212
7213         /*
7214          * If we found a extent record for this extent then return, or if we
7215          * have more than one duplicate we are likely going to need to delete
7216          * something.
7217          */
7218         if (rec->found_rec || rec->num_duplicates > 1)
7219                 return 0;
7220
7221         /* Shouldn't happen but just in case */
7222         BUG_ON(!rec->num_duplicates);
7223
7224         /*
7225          * So this happens if we end up with a backref that doesn't match the
7226          * actual extent entry.  So either the backref is bad or the extent
7227          * entry is bad.  Either way we want to have the extent_record actually
7228          * reflect what we found in the extent_tree, so we need to take the
7229          * duplicate out and use that as the extent_record since the only way we
7230          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7231          */
7232         remove_cache_extent(extent_cache, &rec->cache);
7233
7234         good = to_extent_record(rec->dups.next);
7235         list_del_init(&good->list);
7236         INIT_LIST_HEAD(&good->backrefs);
7237         INIT_LIST_HEAD(&good->dups);
7238         good->cache.start = good->start;
7239         good->cache.size = good->nr;
7240         good->content_checked = 0;
7241         good->owner_ref_checked = 0;
7242         good->num_duplicates = 0;
7243         good->refs = rec->refs;
7244         list_splice_init(&rec->backrefs, &good->backrefs);
7245         while (1) {
7246                 cache = lookup_cache_extent(extent_cache, good->start,
7247                                             good->nr);
7248                 if (!cache)
7249                         break;
7250                 tmp = container_of(cache, struct extent_record, cache);
7251
7252                 /*
7253                  * If we find another overlapping extent and it's found_rec is
7254                  * set then it's a duplicate and we need to try and delete
7255                  * something.
7256                  */
7257                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7258                         if (list_empty(&good->list))
7259                                 list_add_tail(&good->list,
7260                                               &duplicate_extents);
7261                         good->num_duplicates += tmp->num_duplicates + 1;
7262                         list_splice_init(&tmp->dups, &good->dups);
7263                         list_del_init(&tmp->list);
7264                         list_add_tail(&tmp->list, &good->dups);
7265                         remove_cache_extent(extent_cache, &tmp->cache);
7266                         continue;
7267                 }
7268
7269                 /*
7270                  * Ok we have another non extent item backed extent rec, so lets
7271                  * just add it to this extent and carry on like we did above.
7272                  */
7273                 good->refs += tmp->refs;
7274                 list_splice_init(&tmp->backrefs, &good->backrefs);
7275                 remove_cache_extent(extent_cache, &tmp->cache);
7276                 free(tmp);
7277         }
7278         ret = insert_cache_extent(extent_cache, &good->cache);
7279         BUG_ON(ret);
7280         free(rec);
7281         return good->num_duplicates ? 0 : 1;
7282 }
7283
7284 static int delete_duplicate_records(struct btrfs_root *root,
7285                                     struct extent_record *rec)
7286 {
7287         struct btrfs_trans_handle *trans;
7288         LIST_HEAD(delete_list);
7289         struct btrfs_path *path;
7290         struct extent_record *tmp, *good, *n;
7291         int nr_del = 0;
7292         int ret = 0, err;
7293         struct btrfs_key key;
7294
7295         path = btrfs_alloc_path();
7296         if (!path) {
7297                 ret = -ENOMEM;
7298                 goto out;
7299         }
7300
7301         good = rec;
7302         /* Find the record that covers all of the duplicates. */
7303         list_for_each_entry(tmp, &rec->dups, list) {
7304                 if (good->start < tmp->start)
7305                         continue;
7306                 if (good->nr > tmp->nr)
7307                         continue;
7308
7309                 if (tmp->start + tmp->nr < good->start + good->nr) {
7310                         fprintf(stderr, "Ok we have overlapping extents that "
7311                                 "aren't completely covered by each other, this "
7312                                 "is going to require more careful thought.  "
7313                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7314                                 tmp->start, tmp->nr, good->start, good->nr);
7315                         abort();
7316                 }
7317                 good = tmp;
7318         }
7319
7320         if (good != rec)
7321                 list_add_tail(&rec->list, &delete_list);
7322
7323         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7324                 if (tmp == good)
7325                         continue;
7326                 list_move_tail(&tmp->list, &delete_list);
7327         }
7328
7329         root = root->fs_info->extent_root;
7330         trans = btrfs_start_transaction(root, 1);
7331         if (IS_ERR(trans)) {
7332                 ret = PTR_ERR(trans);
7333                 goto out;
7334         }
7335
7336         list_for_each_entry(tmp, &delete_list, list) {
7337                 if (tmp->found_rec == 0)
7338                         continue;
7339                 key.objectid = tmp->start;
7340                 key.type = BTRFS_EXTENT_ITEM_KEY;
7341                 key.offset = tmp->nr;
7342
7343                 /* Shouldn't happen but just in case */
7344                 if (tmp->metadata) {
7345                         fprintf(stderr, "Well this shouldn't happen, extent "
7346                                 "record overlaps but is metadata? "
7347                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7348                         abort();
7349                 }
7350
7351                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7352                 if (ret) {
7353                         if (ret > 0)
7354                                 ret = -EINVAL;
7355                         break;
7356                 }
7357                 ret = btrfs_del_item(trans, root, path);
7358                 if (ret)
7359                         break;
7360                 btrfs_release_path(path);
7361                 nr_del++;
7362         }
7363         err = btrfs_commit_transaction(trans, root);
7364         if (err && !ret)
7365                 ret = err;
7366 out:
7367         while (!list_empty(&delete_list)) {
7368                 tmp = to_extent_record(delete_list.next);
7369                 list_del_init(&tmp->list);
7370                 if (tmp == rec)
7371                         continue;
7372                 free(tmp);
7373         }
7374
7375         while (!list_empty(&rec->dups)) {
7376                 tmp = to_extent_record(rec->dups.next);
7377                 list_del_init(&tmp->list);
7378                 free(tmp);
7379         }
7380
7381         btrfs_free_path(path);
7382
7383         if (!ret && !nr_del)
7384                 rec->num_duplicates = 0;
7385
7386         return ret ? ret : nr_del;
7387 }
7388
7389 static int find_possible_backrefs(struct btrfs_fs_info *info,
7390                                   struct btrfs_path *path,
7391                                   struct cache_tree *extent_cache,
7392                                   struct extent_record *rec)
7393 {
7394         struct btrfs_root *root;
7395         struct extent_backref *back;
7396         struct data_backref *dback;
7397         struct cache_extent *cache;
7398         struct btrfs_file_extent_item *fi;
7399         struct btrfs_key key;
7400         u64 bytenr, bytes;
7401         int ret;
7402
7403         list_for_each_entry(back, &rec->backrefs, list) {
7404                 /* Don't care about full backrefs (poor unloved backrefs) */
7405                 if (back->full_backref || !back->is_data)
7406                         continue;
7407
7408                 dback = to_data_backref(back);
7409
7410                 /* We found this one, we don't need to do a lookup */
7411                 if (dback->found_ref)
7412                         continue;
7413
7414                 key.objectid = dback->root;
7415                 key.type = BTRFS_ROOT_ITEM_KEY;
7416                 key.offset = (u64)-1;
7417
7418                 root = btrfs_read_fs_root(info, &key);
7419
7420                 /* No root, definitely a bad ref, skip */
7421                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7422                         continue;
7423                 /* Other err, exit */
7424                 if (IS_ERR(root))
7425                         return PTR_ERR(root);
7426
7427                 key.objectid = dback->owner;
7428                 key.type = BTRFS_EXTENT_DATA_KEY;
7429                 key.offset = dback->offset;
7430                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7431                 if (ret) {
7432                         btrfs_release_path(path);
7433                         if (ret < 0)
7434                                 return ret;
7435                         /* Didn't find it, we can carry on */
7436                         ret = 0;
7437                         continue;
7438                 }
7439
7440                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7441                                     struct btrfs_file_extent_item);
7442                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7443                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7444                 btrfs_release_path(path);
7445                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7446                 if (cache) {
7447                         struct extent_record *tmp;
7448                         tmp = container_of(cache, struct extent_record, cache);
7449
7450                         /*
7451                          * If we found an extent record for the bytenr for this
7452                          * particular backref then we can't add it to our
7453                          * current extent record.  We only want to add backrefs
7454                          * that don't have a corresponding extent item in the
7455                          * extent tree since they likely belong to this record
7456                          * and we need to fix it if it doesn't match bytenrs.
7457                          */
7458                         if  (tmp->found_rec)
7459                                 continue;
7460                 }
7461
7462                 dback->found_ref += 1;
7463                 dback->disk_bytenr = bytenr;
7464                 dback->bytes = bytes;
7465
7466                 /*
7467                  * Set this so the verify backref code knows not to trust the
7468                  * values in this backref.
7469                  */
7470                 back->broken = 1;
7471         }
7472
7473         return 0;
7474 }
7475
7476 /*
7477  * Record orphan data ref into corresponding root.
7478  *
7479  * Return 0 if the extent item contains data ref and recorded.
7480  * Return 1 if the extent item contains no useful data ref
7481  *   On that case, it may contains only shared_dataref or metadata backref
7482  *   or the file extent exists(this should be handled by the extent bytenr
7483  *   recovery routine)
7484  * Return <0 if something goes wrong.
7485  */
7486 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7487                                       struct extent_record *rec)
7488 {
7489         struct btrfs_key key;
7490         struct btrfs_root *dest_root;
7491         struct extent_backref *back;
7492         struct data_backref *dback;
7493         struct orphan_data_extent *orphan;
7494         struct btrfs_path *path;
7495         int recorded_data_ref = 0;
7496         int ret = 0;
7497
7498         if (rec->metadata)
7499                 return 1;
7500         path = btrfs_alloc_path();
7501         if (!path)
7502                 return -ENOMEM;
7503         list_for_each_entry(back, &rec->backrefs, list) {
7504                 if (back->full_backref || !back->is_data ||
7505                     !back->found_extent_tree)
7506                         continue;
7507                 dback = to_data_backref(back);
7508                 if (dback->found_ref)
7509                         continue;
7510                 key.objectid = dback->root;
7511                 key.type = BTRFS_ROOT_ITEM_KEY;
7512                 key.offset = (u64)-1;
7513
7514                 dest_root = btrfs_read_fs_root(fs_info, &key);
7515
7516                 /* For non-exist root we just skip it */
7517                 if (IS_ERR(dest_root) || !dest_root)
7518                         continue;
7519
7520                 key.objectid = dback->owner;
7521                 key.type = BTRFS_EXTENT_DATA_KEY;
7522                 key.offset = dback->offset;
7523
7524                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7525                 btrfs_release_path(path);
7526                 /*
7527                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7528                  * we need to record it for inode/file extent rebuild.
7529                  * For ret > 0, we record it only for file extent rebuild.
7530                  * For ret == 0, the file extent exists but only bytenr
7531                  * mismatch, let the original bytenr fix routine to handle,
7532                  * don't record it.
7533                  */
7534                 if (ret == 0)
7535                         continue;
7536                 ret = 0;
7537                 orphan = malloc(sizeof(*orphan));
7538                 if (!orphan) {
7539                         ret = -ENOMEM;
7540                         goto out;
7541                 }
7542                 INIT_LIST_HEAD(&orphan->list);
7543                 orphan->root = dback->root;
7544                 orphan->objectid = dback->owner;
7545                 orphan->offset = dback->offset;
7546                 orphan->disk_bytenr = rec->cache.start;
7547                 orphan->disk_len = rec->cache.size;
7548                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7549                 recorded_data_ref = 1;
7550         }
7551 out:
7552         btrfs_free_path(path);
7553         if (!ret)
7554                 return !recorded_data_ref;
7555         else
7556                 return ret;
7557 }
7558
7559 /*
7560  * when an incorrect extent item is found, this will delete
7561  * all of the existing entries for it and recreate them
7562  * based on what the tree scan found.
7563  */
7564 static int fixup_extent_refs(struct btrfs_fs_info *info,
7565                              struct cache_tree *extent_cache,
7566                              struct extent_record *rec)
7567 {
7568         struct btrfs_trans_handle *trans = NULL;
7569         int ret;
7570         struct btrfs_path *path;
7571         struct list_head *cur = rec->backrefs.next;
7572         struct cache_extent *cache;
7573         struct extent_backref *back;
7574         int allocated = 0;
7575         u64 flags = 0;
7576
7577         if (rec->flag_block_full_backref)
7578                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7579
7580         path = btrfs_alloc_path();
7581         if (!path)
7582                 return -ENOMEM;
7583
7584         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7585                 /*
7586                  * Sometimes the backrefs themselves are so broken they don't
7587                  * get attached to any meaningful rec, so first go back and
7588                  * check any of our backrefs that we couldn't find and throw
7589                  * them into the list if we find the backref so that
7590                  * verify_backrefs can figure out what to do.
7591                  */
7592                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7593                 if (ret < 0)
7594                         goto out;
7595         }
7596
7597         /* step one, make sure all of the backrefs agree */
7598         ret = verify_backrefs(info, path, rec);
7599         if (ret < 0)
7600                 goto out;
7601
7602         trans = btrfs_start_transaction(info->extent_root, 1);
7603         if (IS_ERR(trans)) {
7604                 ret = PTR_ERR(trans);
7605                 goto out;
7606         }
7607
7608         /* step two, delete all the existing records */
7609         ret = delete_extent_records(trans, info->extent_root, path,
7610                                     rec->start, rec->max_size);
7611
7612         if (ret < 0)
7613                 goto out;
7614
7615         /* was this block corrupt?  If so, don't add references to it */
7616         cache = lookup_cache_extent(info->corrupt_blocks,
7617                                     rec->start, rec->max_size);
7618         if (cache) {
7619                 ret = 0;
7620                 goto out;
7621         }
7622
7623         /* step three, recreate all the refs we did find */
7624         while(cur != &rec->backrefs) {
7625                 back = to_extent_backref(cur);
7626                 cur = cur->next;
7627
7628                 /*
7629                  * if we didn't find any references, don't create a
7630                  * new extent record
7631                  */
7632                 if (!back->found_ref)
7633                         continue;
7634
7635                 rec->bad_full_backref = 0;
7636                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7637                 allocated = 1;
7638
7639                 if (ret)
7640                         goto out;
7641         }
7642 out:
7643         if (trans) {
7644                 int err = btrfs_commit_transaction(trans, info->extent_root);
7645                 if (!ret)
7646                         ret = err;
7647         }
7648
7649         btrfs_free_path(path);
7650         return ret;
7651 }
7652
7653 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7654                               struct extent_record *rec)
7655 {
7656         struct btrfs_trans_handle *trans;
7657         struct btrfs_root *root = fs_info->extent_root;
7658         struct btrfs_path *path;
7659         struct btrfs_extent_item *ei;
7660         struct btrfs_key key;
7661         u64 flags;
7662         int ret = 0;
7663
7664         key.objectid = rec->start;
7665         if (rec->metadata) {
7666                 key.type = BTRFS_METADATA_ITEM_KEY;
7667                 key.offset = rec->info_level;
7668         } else {
7669                 key.type = BTRFS_EXTENT_ITEM_KEY;
7670                 key.offset = rec->max_size;
7671         }
7672
7673         path = btrfs_alloc_path();
7674         if (!path)
7675                 return -ENOMEM;
7676
7677         trans = btrfs_start_transaction(root, 0);
7678         if (IS_ERR(trans)) {
7679                 btrfs_free_path(path);
7680                 return PTR_ERR(trans);
7681         }
7682
7683         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7684         if (ret < 0) {
7685                 btrfs_free_path(path);
7686                 btrfs_commit_transaction(trans, root);
7687                 return ret;
7688         } else if (ret) {
7689                 fprintf(stderr, "Didn't find extent for %llu\n",
7690                         (unsigned long long)rec->start);
7691                 btrfs_free_path(path);
7692                 btrfs_commit_transaction(trans, root);
7693                 return -ENOENT;
7694         }
7695
7696         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7697                             struct btrfs_extent_item);
7698         flags = btrfs_extent_flags(path->nodes[0], ei);
7699         if (rec->flag_block_full_backref) {
7700                 fprintf(stderr, "setting full backref on %llu\n",
7701                         (unsigned long long)key.objectid);
7702                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7703         } else {
7704                 fprintf(stderr, "clearing full backref on %llu\n",
7705                         (unsigned long long)key.objectid);
7706                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7707         }
7708         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7709         btrfs_mark_buffer_dirty(path->nodes[0]);
7710         btrfs_free_path(path);
7711         return btrfs_commit_transaction(trans, root);
7712 }
7713
7714 /* right now we only prune from the extent allocation tree */
7715 static int prune_one_block(struct btrfs_trans_handle *trans,
7716                            struct btrfs_fs_info *info,
7717                            struct btrfs_corrupt_block *corrupt)
7718 {
7719         int ret;
7720         struct btrfs_path path;
7721         struct extent_buffer *eb;
7722         u64 found;
7723         int slot;
7724         int nritems;
7725         int level = corrupt->level + 1;
7726
7727         btrfs_init_path(&path);
7728 again:
7729         /* we want to stop at the parent to our busted block */
7730         path.lowest_level = level;
7731
7732         ret = btrfs_search_slot(trans, info->extent_root,
7733                                 &corrupt->key, &path, -1, 1);
7734
7735         if (ret < 0)
7736                 goto out;
7737
7738         eb = path.nodes[level];
7739         if (!eb) {
7740                 ret = -ENOENT;
7741                 goto out;
7742         }
7743
7744         /*
7745          * hopefully the search gave us the block we want to prune,
7746          * lets try that first
7747          */
7748         slot = path.slots[level];
7749         found =  btrfs_node_blockptr(eb, slot);
7750         if (found == corrupt->cache.start)
7751                 goto del_ptr;
7752
7753         nritems = btrfs_header_nritems(eb);
7754
7755         /* the search failed, lets scan this node and hope we find it */
7756         for (slot = 0; slot < nritems; slot++) {
7757                 found =  btrfs_node_blockptr(eb, slot);
7758                 if (found == corrupt->cache.start)
7759                         goto del_ptr;
7760         }
7761         /*
7762          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7763          * to this block
7764          */
7765         if (eb == info->extent_root->node) {
7766                 ret = -ENOENT;
7767                 goto out;
7768         } else {
7769                 level++;
7770                 btrfs_release_path(&path);
7771                 goto again;
7772         }
7773
7774 del_ptr:
7775         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7776         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7777
7778 out:
7779         btrfs_release_path(&path);
7780         return ret;
7781 }
7782
7783 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7784 {
7785         struct btrfs_trans_handle *trans = NULL;
7786         struct cache_extent *cache;
7787         struct btrfs_corrupt_block *corrupt;
7788
7789         while (1) {
7790                 cache = search_cache_extent(info->corrupt_blocks, 0);
7791                 if (!cache)
7792                         break;
7793                 if (!trans) {
7794                         trans = btrfs_start_transaction(info->extent_root, 1);
7795                         if (IS_ERR(trans))
7796                                 return PTR_ERR(trans);
7797                 }
7798                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7799                 prune_one_block(trans, info, corrupt);
7800                 remove_cache_extent(info->corrupt_blocks, cache);
7801         }
7802         if (trans)
7803                 return btrfs_commit_transaction(trans, info->extent_root);
7804         return 0;
7805 }
7806
7807 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7808 {
7809         struct btrfs_block_group_cache *cache;
7810         u64 start, end;
7811         int ret;
7812
7813         while (1) {
7814                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7815                                             &start, &end, EXTENT_DIRTY);
7816                 if (ret)
7817                         break;
7818                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7819                                    GFP_NOFS);
7820         }
7821
7822         start = 0;
7823         while (1) {
7824                 cache = btrfs_lookup_first_block_group(fs_info, start);
7825                 if (!cache)
7826                         break;
7827                 if (cache->cached)
7828                         cache->cached = 0;
7829                 start = cache->key.objectid + cache->key.offset;
7830         }
7831 }
7832
7833 static int check_extent_refs(struct btrfs_root *root,
7834                              struct cache_tree *extent_cache)
7835 {
7836         struct extent_record *rec;
7837         struct cache_extent *cache;
7838         int err = 0;
7839         int ret = 0;
7840         int fixed = 0;
7841         int had_dups = 0;
7842         int recorded = 0;
7843
7844         if (repair) {
7845                 /*
7846                  * if we're doing a repair, we have to make sure
7847                  * we don't allocate from the problem extents.
7848                  * In the worst case, this will be all the
7849                  * extents in the FS
7850                  */
7851                 cache = search_cache_extent(extent_cache, 0);
7852                 while(cache) {
7853                         rec = container_of(cache, struct extent_record, cache);
7854                         set_extent_dirty(root->fs_info->excluded_extents,
7855                                          rec->start,
7856                                          rec->start + rec->max_size - 1,
7857                                          GFP_NOFS);
7858                         cache = next_cache_extent(cache);
7859                 }
7860
7861                 /* pin down all the corrupted blocks too */
7862                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7863                 while(cache) {
7864                         set_extent_dirty(root->fs_info->excluded_extents,
7865                                          cache->start,
7866                                          cache->start + cache->size - 1,
7867                                          GFP_NOFS);
7868                         cache = next_cache_extent(cache);
7869                 }
7870                 prune_corrupt_blocks(root->fs_info);
7871                 reset_cached_block_groups(root->fs_info);
7872         }
7873
7874         reset_cached_block_groups(root->fs_info);
7875
7876         /*
7877          * We need to delete any duplicate entries we find first otherwise we
7878          * could mess up the extent tree when we have backrefs that actually
7879          * belong to a different extent item and not the weird duplicate one.
7880          */
7881         while (repair && !list_empty(&duplicate_extents)) {
7882                 rec = to_extent_record(duplicate_extents.next);
7883                 list_del_init(&rec->list);
7884
7885                 /* Sometimes we can find a backref before we find an actual
7886                  * extent, so we need to process it a little bit to see if there
7887                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7888                  * if this is a backref screwup.  If we need to delete stuff
7889                  * process_duplicates() will return 0, otherwise it will return
7890                  * 1 and we
7891                  */
7892                 if (process_duplicates(root, extent_cache, rec))
7893                         continue;
7894                 ret = delete_duplicate_records(root, rec);
7895                 if (ret < 0)
7896                         return ret;
7897                 /*
7898                  * delete_duplicate_records will return the number of entries
7899                  * deleted, so if it's greater than 0 then we know we actually
7900                  * did something and we need to remove.
7901                  */
7902                 if (ret)
7903                         had_dups = 1;
7904         }
7905
7906         if (had_dups)
7907                 return -EAGAIN;
7908
7909         while(1) {
7910                 int cur_err = 0;
7911
7912                 fixed = 0;
7913                 recorded = 0;
7914                 cache = search_cache_extent(extent_cache, 0);
7915                 if (!cache)
7916                         break;
7917                 rec = container_of(cache, struct extent_record, cache);
7918                 if (rec->num_duplicates) {
7919                         fprintf(stderr, "extent item %llu has multiple extent "
7920                                 "items\n", (unsigned long long)rec->start);
7921                         err = 1;
7922                         cur_err = 1;
7923                 }
7924
7925                 if (rec->refs != rec->extent_item_refs) {
7926                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7927                                 (unsigned long long)rec->start,
7928                                 (unsigned long long)rec->nr);
7929                         fprintf(stderr, "extent item %llu, found %llu\n",
7930                                 (unsigned long long)rec->extent_item_refs,
7931                                 (unsigned long long)rec->refs);
7932                         ret = record_orphan_data_extents(root->fs_info, rec);
7933                         if (ret < 0)
7934                                 goto repair_abort;
7935                         if (ret == 0) {
7936                                 recorded = 1;
7937                         } else {
7938                                 /*
7939                                  * we can't use the extent to repair file
7940                                  * extent, let the fallback method handle it.
7941                                  */
7942                                 if (!fixed && repair) {
7943                                         ret = fixup_extent_refs(
7944                                                         root->fs_info,
7945                                                         extent_cache, rec);
7946                                         if (ret)
7947                                                 goto repair_abort;
7948                                         fixed = 1;
7949                                 }
7950                         }
7951                         err = 1;
7952                         cur_err = 1;
7953                 }
7954                 if (all_backpointers_checked(rec, 1)) {
7955                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7956                                 (unsigned long long)rec->start,
7957                                 (unsigned long long)rec->nr);
7958
7959                         if (!fixed && !recorded && repair) {
7960                                 ret = fixup_extent_refs(root->fs_info,
7961                                                         extent_cache, rec);
7962                                 if (ret)
7963                                         goto repair_abort;
7964                                 fixed = 1;
7965                         }
7966                         cur_err = 1;
7967                         err = 1;
7968                 }
7969                 if (!rec->owner_ref_checked) {
7970                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7971                                 (unsigned long long)rec->start,
7972                                 (unsigned long long)rec->nr);
7973                         if (!fixed && !recorded && repair) {
7974                                 ret = fixup_extent_refs(root->fs_info,
7975                                                         extent_cache, rec);
7976                                 if (ret)
7977                                         goto repair_abort;
7978                                 fixed = 1;
7979                         }
7980                         err = 1;
7981                         cur_err = 1;
7982                 }
7983                 if (rec->bad_full_backref) {
7984                         fprintf(stderr, "bad full backref, on [%llu]\n",
7985                                 (unsigned long long)rec->start);
7986                         if (repair) {
7987                                 ret = fixup_extent_flags(root->fs_info, rec);
7988                                 if (ret)
7989                                         goto repair_abort;
7990                                 fixed = 1;
7991                         }
7992                         err = 1;
7993                         cur_err = 1;
7994                 }
7995                 /*
7996                  * Although it's not a extent ref's problem, we reuse this
7997                  * routine for error reporting.
7998                  * No repair function yet.
7999                  */
8000                 if (rec->crossing_stripes) {
8001                         fprintf(stderr,
8002                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8003                                 rec->start, rec->start + rec->max_size);
8004                         err = 1;
8005                         cur_err = 1;
8006                 }
8007
8008                 if (rec->wrong_chunk_type) {
8009                         fprintf(stderr,
8010                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8011                                 rec->start, rec->start + rec->max_size);
8012                         err = 1;
8013                         cur_err = 1;
8014                 }
8015
8016                 remove_cache_extent(extent_cache, cache);
8017                 free_all_extent_backrefs(rec);
8018                 if (!init_extent_tree && repair && (!cur_err || fixed))
8019                         clear_extent_dirty(root->fs_info->excluded_extents,
8020                                            rec->start,
8021                                            rec->start + rec->max_size - 1,
8022                                            GFP_NOFS);
8023                 free(rec);
8024         }
8025 repair_abort:
8026         if (repair) {
8027                 if (ret && ret != -EAGAIN) {
8028                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8029                         exit(1);
8030                 } else if (!ret) {
8031                         struct btrfs_trans_handle *trans;
8032
8033                         root = root->fs_info->extent_root;
8034                         trans = btrfs_start_transaction(root, 1);
8035                         if (IS_ERR(trans)) {
8036                                 ret = PTR_ERR(trans);
8037                                 goto repair_abort;
8038                         }
8039
8040                         btrfs_fix_block_accounting(trans, root);
8041                         ret = btrfs_commit_transaction(trans, root);
8042                         if (ret)
8043                                 goto repair_abort;
8044                 }
8045                 if (err)
8046                         fprintf(stderr, "repaired damaged extent references\n");
8047                 return ret;
8048         }
8049         return err;
8050 }
8051
8052 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8053 {
8054         u64 stripe_size;
8055
8056         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8057                 stripe_size = length;
8058                 stripe_size /= num_stripes;
8059         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8060                 stripe_size = length * 2;
8061                 stripe_size /= num_stripes;
8062         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8063                 stripe_size = length;
8064                 stripe_size /= (num_stripes - 1);
8065         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8066                 stripe_size = length;
8067                 stripe_size /= (num_stripes - 2);
8068         } else {
8069                 stripe_size = length;
8070         }
8071         return stripe_size;
8072 }
8073
8074 /*
8075  * Check the chunk with its block group/dev list ref:
8076  * Return 0 if all refs seems valid.
8077  * Return 1 if part of refs seems valid, need later check for rebuild ref
8078  * like missing block group and needs to search extent tree to rebuild them.
8079  * Return -1 if essential refs are missing and unable to rebuild.
8080  */
8081 static int check_chunk_refs(struct chunk_record *chunk_rec,
8082                             struct block_group_tree *block_group_cache,
8083                             struct device_extent_tree *dev_extent_cache,
8084                             int silent)
8085 {
8086         struct cache_extent *block_group_item;
8087         struct block_group_record *block_group_rec;
8088         struct cache_extent *dev_extent_item;
8089         struct device_extent_record *dev_extent_rec;
8090         u64 devid;
8091         u64 offset;
8092         u64 length;
8093         int metadump_v2 = 0;
8094         int i;
8095         int ret = 0;
8096
8097         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8098                                                chunk_rec->offset,
8099                                                chunk_rec->length);
8100         if (block_group_item) {
8101                 block_group_rec = container_of(block_group_item,
8102                                                struct block_group_record,
8103                                                cache);
8104                 if (chunk_rec->length != block_group_rec->offset ||
8105                     chunk_rec->offset != block_group_rec->objectid ||
8106                     (!metadump_v2 &&
8107                      chunk_rec->type_flags != block_group_rec->flags)) {
8108                         if (!silent)
8109                                 fprintf(stderr,
8110                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8111                                         chunk_rec->objectid,
8112                                         chunk_rec->type,
8113                                         chunk_rec->offset,
8114                                         chunk_rec->length,
8115                                         chunk_rec->offset,
8116                                         chunk_rec->type_flags,
8117                                         block_group_rec->objectid,
8118                                         block_group_rec->type,
8119                                         block_group_rec->offset,
8120                                         block_group_rec->offset,
8121                                         block_group_rec->objectid,
8122                                         block_group_rec->flags);
8123                         ret = -1;
8124                 } else {
8125                         list_del_init(&block_group_rec->list);
8126                         chunk_rec->bg_rec = block_group_rec;
8127                 }
8128         } else {
8129                 if (!silent)
8130                         fprintf(stderr,
8131                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8132                                 chunk_rec->objectid,
8133                                 chunk_rec->type,
8134                                 chunk_rec->offset,
8135                                 chunk_rec->length,
8136                                 chunk_rec->offset,
8137                                 chunk_rec->type_flags);
8138                 ret = 1;
8139         }
8140
8141         if (metadump_v2)
8142                 return ret;
8143
8144         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8145                                     chunk_rec->num_stripes);
8146         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8147                 devid = chunk_rec->stripes[i].devid;
8148                 offset = chunk_rec->stripes[i].offset;
8149                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8150                                                        devid, offset, length);
8151                 if (dev_extent_item) {
8152                         dev_extent_rec = container_of(dev_extent_item,
8153                                                 struct device_extent_record,
8154                                                 cache);
8155                         if (dev_extent_rec->objectid != devid ||
8156                             dev_extent_rec->offset != offset ||
8157                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8158                             dev_extent_rec->length != length) {
8159                                 if (!silent)
8160                                         fprintf(stderr,
8161                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8162                                                 chunk_rec->objectid,
8163                                                 chunk_rec->type,
8164                                                 chunk_rec->offset,
8165                                                 chunk_rec->stripes[i].devid,
8166                                                 chunk_rec->stripes[i].offset,
8167                                                 dev_extent_rec->objectid,
8168                                                 dev_extent_rec->offset,
8169                                                 dev_extent_rec->length);
8170                                 ret = -1;
8171                         } else {
8172                                 list_move(&dev_extent_rec->chunk_list,
8173                                           &chunk_rec->dextents);
8174                         }
8175                 } else {
8176                         if (!silent)
8177                                 fprintf(stderr,
8178                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8179                                         chunk_rec->objectid,
8180                                         chunk_rec->type,
8181                                         chunk_rec->offset,
8182                                         chunk_rec->stripes[i].devid,
8183                                         chunk_rec->stripes[i].offset);
8184                         ret = -1;
8185                 }
8186         }
8187         return ret;
8188 }
8189
8190 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8191 int check_chunks(struct cache_tree *chunk_cache,
8192                  struct block_group_tree *block_group_cache,
8193                  struct device_extent_tree *dev_extent_cache,
8194                  struct list_head *good, struct list_head *bad,
8195                  struct list_head *rebuild, int silent)
8196 {
8197         struct cache_extent *chunk_item;
8198         struct chunk_record *chunk_rec;
8199         struct block_group_record *bg_rec;
8200         struct device_extent_record *dext_rec;
8201         int err;
8202         int ret = 0;
8203
8204         chunk_item = first_cache_extent(chunk_cache);
8205         while (chunk_item) {
8206                 chunk_rec = container_of(chunk_item, struct chunk_record,
8207                                          cache);
8208                 err = check_chunk_refs(chunk_rec, block_group_cache,
8209                                        dev_extent_cache, silent);
8210                 if (err < 0)
8211                         ret = err;
8212                 if (err == 0 && good)
8213                         list_add_tail(&chunk_rec->list, good);
8214                 if (err > 0 && rebuild)
8215                         list_add_tail(&chunk_rec->list, rebuild);
8216                 if (err < 0 && bad)
8217                         list_add_tail(&chunk_rec->list, bad);
8218                 chunk_item = next_cache_extent(chunk_item);
8219         }
8220
8221         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8222                 if (!silent)
8223                         fprintf(stderr,
8224                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8225                                 bg_rec->objectid,
8226                                 bg_rec->offset,
8227                                 bg_rec->flags);
8228                 if (!ret)
8229                         ret = 1;
8230         }
8231
8232         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8233                             chunk_list) {
8234                 if (!silent)
8235                         fprintf(stderr,
8236                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8237                                 dext_rec->objectid,
8238                                 dext_rec->offset,
8239                                 dext_rec->length);
8240                 if (!ret)
8241                         ret = 1;
8242         }
8243         return ret;
8244 }
8245
8246
8247 static int check_device_used(struct device_record *dev_rec,
8248                              struct device_extent_tree *dext_cache)
8249 {
8250         struct cache_extent *cache;
8251         struct device_extent_record *dev_extent_rec;
8252         u64 total_byte = 0;
8253
8254         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8255         while (cache) {
8256                 dev_extent_rec = container_of(cache,
8257                                               struct device_extent_record,
8258                                               cache);
8259                 if (dev_extent_rec->objectid != dev_rec->devid)
8260                         break;
8261
8262                 list_del_init(&dev_extent_rec->device_list);
8263                 total_byte += dev_extent_rec->length;
8264                 cache = next_cache_extent(cache);
8265         }
8266
8267         if (total_byte != dev_rec->byte_used) {
8268                 fprintf(stderr,
8269                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8270                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8271                         dev_rec->type, dev_rec->offset);
8272                 return -1;
8273         } else {
8274                 return 0;
8275         }
8276 }
8277
8278 /* check btrfs_dev_item -> btrfs_dev_extent */
8279 static int check_devices(struct rb_root *dev_cache,
8280                          struct device_extent_tree *dev_extent_cache)
8281 {
8282         struct rb_node *dev_node;
8283         struct device_record *dev_rec;
8284         struct device_extent_record *dext_rec;
8285         int err;
8286         int ret = 0;
8287
8288         dev_node = rb_first(dev_cache);
8289         while (dev_node) {
8290                 dev_rec = container_of(dev_node, struct device_record, node);
8291                 err = check_device_used(dev_rec, dev_extent_cache);
8292                 if (err)
8293                         ret = err;
8294
8295                 dev_node = rb_next(dev_node);
8296         }
8297         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8298                             device_list) {
8299                 fprintf(stderr,
8300                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8301                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8302                 if (!ret)
8303                         ret = 1;
8304         }
8305         return ret;
8306 }
8307
8308 static int add_root_item_to_list(struct list_head *head,
8309                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8310                                   u8 level, u8 drop_level,
8311                                   int level_size, struct btrfs_key *drop_key)
8312 {
8313
8314         struct root_item_record *ri_rec;
8315         ri_rec = malloc(sizeof(*ri_rec));
8316         if (!ri_rec)
8317                 return -ENOMEM;
8318         ri_rec->bytenr = bytenr;
8319         ri_rec->objectid = objectid;
8320         ri_rec->level = level;
8321         ri_rec->level_size = level_size;
8322         ri_rec->drop_level = drop_level;
8323         ri_rec->last_snapshot = last_snapshot;
8324         if (drop_key)
8325                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8326         list_add_tail(&ri_rec->list, head);
8327
8328         return 0;
8329 }
8330
8331 static void free_root_item_list(struct list_head *list)
8332 {
8333         struct root_item_record *ri_rec;
8334
8335         while (!list_empty(list)) {
8336                 ri_rec = list_first_entry(list, struct root_item_record,
8337                                           list);
8338                 list_del_init(&ri_rec->list);
8339                 free(ri_rec);
8340         }
8341 }
8342
8343 static int deal_root_from_list(struct list_head *list,
8344                                struct btrfs_root *root,
8345                                struct block_info *bits,
8346                                int bits_nr,
8347                                struct cache_tree *pending,
8348                                struct cache_tree *seen,
8349                                struct cache_tree *reada,
8350                                struct cache_tree *nodes,
8351                                struct cache_tree *extent_cache,
8352                                struct cache_tree *chunk_cache,
8353                                struct rb_root *dev_cache,
8354                                struct block_group_tree *block_group_cache,
8355                                struct device_extent_tree *dev_extent_cache)
8356 {
8357         int ret = 0;
8358         u64 last;
8359
8360         while (!list_empty(list)) {
8361                 struct root_item_record *rec;
8362                 struct extent_buffer *buf;
8363                 rec = list_entry(list->next,
8364                                  struct root_item_record, list);
8365                 last = 0;
8366                 buf = read_tree_block(root->fs_info->tree_root,
8367                                       rec->bytenr, rec->level_size, 0);
8368                 if (!extent_buffer_uptodate(buf)) {
8369                         free_extent_buffer(buf);
8370                         ret = -EIO;
8371                         break;
8372                 }
8373                 ret = add_root_to_pending(buf, extent_cache, pending,
8374                                     seen, nodes, rec->objectid);
8375                 if (ret < 0)
8376                         break;
8377                 /*
8378                  * To rebuild extent tree, we need deal with snapshot
8379                  * one by one, otherwise we deal with node firstly which
8380                  * can maximize readahead.
8381                  */
8382                 while (1) {
8383                         ret = run_next_block(root, bits, bits_nr, &last,
8384                                              pending, seen, reada, nodes,
8385                                              extent_cache, chunk_cache,
8386                                              dev_cache, block_group_cache,
8387                                              dev_extent_cache, rec);
8388                         if (ret != 0)
8389                                 break;
8390                 }
8391                 free_extent_buffer(buf);
8392                 list_del(&rec->list);
8393                 free(rec);
8394                 if (ret < 0)
8395                         break;
8396         }
8397         while (ret >= 0) {
8398                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8399                                      reada, nodes, extent_cache, chunk_cache,
8400                                      dev_cache, block_group_cache,
8401                                      dev_extent_cache, NULL);
8402                 if (ret != 0) {
8403                         if (ret > 0)
8404                                 ret = 0;
8405                         break;
8406                 }
8407         }
8408         return ret;
8409 }
8410
8411 static int check_chunks_and_extents(struct btrfs_root *root)
8412 {
8413         struct rb_root dev_cache;
8414         struct cache_tree chunk_cache;
8415         struct block_group_tree block_group_cache;
8416         struct device_extent_tree dev_extent_cache;
8417         struct cache_tree extent_cache;
8418         struct cache_tree seen;
8419         struct cache_tree pending;
8420         struct cache_tree reada;
8421         struct cache_tree nodes;
8422         struct extent_io_tree excluded_extents;
8423         struct cache_tree corrupt_blocks;
8424         struct btrfs_path path;
8425         struct btrfs_key key;
8426         struct btrfs_key found_key;
8427         int ret, err = 0;
8428         struct block_info *bits;
8429         int bits_nr;
8430         struct extent_buffer *leaf;
8431         int slot;
8432         struct btrfs_root_item ri;
8433         struct list_head dropping_trees;
8434         struct list_head normal_trees;
8435         struct btrfs_root *root1;
8436         u64 objectid;
8437         u32 level_size;
8438         u8 level;
8439
8440         dev_cache = RB_ROOT;
8441         cache_tree_init(&chunk_cache);
8442         block_group_tree_init(&block_group_cache);
8443         device_extent_tree_init(&dev_extent_cache);
8444
8445         cache_tree_init(&extent_cache);
8446         cache_tree_init(&seen);
8447         cache_tree_init(&pending);
8448         cache_tree_init(&nodes);
8449         cache_tree_init(&reada);
8450         cache_tree_init(&corrupt_blocks);
8451         extent_io_tree_init(&excluded_extents);
8452         INIT_LIST_HEAD(&dropping_trees);
8453         INIT_LIST_HEAD(&normal_trees);
8454
8455         if (repair) {
8456                 root->fs_info->excluded_extents = &excluded_extents;
8457                 root->fs_info->fsck_extent_cache = &extent_cache;
8458                 root->fs_info->free_extent_hook = free_extent_hook;
8459                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8460         }
8461
8462         bits_nr = 1024;
8463         bits = malloc(bits_nr * sizeof(struct block_info));
8464         if (!bits) {
8465                 perror("malloc");
8466                 exit(1);
8467         }
8468
8469         if (ctx.progress_enabled) {
8470                 ctx.tp = TASK_EXTENTS;
8471                 task_start(ctx.info);
8472         }
8473
8474 again:
8475         root1 = root->fs_info->tree_root;
8476         level = btrfs_header_level(root1->node);
8477         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8478                                     root1->node->start, 0, level, 0,
8479                                     root1->nodesize, NULL);
8480         if (ret < 0)
8481                 goto out;
8482         root1 = root->fs_info->chunk_root;
8483         level = btrfs_header_level(root1->node);
8484         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8485                                     root1->node->start, 0, level, 0,
8486                                     root1->nodesize, NULL);
8487         if (ret < 0)
8488                 goto out;
8489         btrfs_init_path(&path);
8490         key.offset = 0;
8491         key.objectid = 0;
8492         key.type = BTRFS_ROOT_ITEM_KEY;
8493         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8494                                         &key, &path, 0, 0);
8495         if (ret < 0)
8496                 goto out;
8497         while(1) {
8498                 leaf = path.nodes[0];
8499                 slot = path.slots[0];
8500                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8501                         ret = btrfs_next_leaf(root, &path);
8502                         if (ret != 0)
8503                                 break;
8504                         leaf = path.nodes[0];
8505                         slot = path.slots[0];
8506                 }
8507                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8508                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8509                         unsigned long offset;
8510                         u64 last_snapshot;
8511
8512                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8513                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8514                         last_snapshot = btrfs_root_last_snapshot(&ri);
8515                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8516                                 level = btrfs_root_level(&ri);
8517                                 level_size = root->nodesize;
8518                                 ret = add_root_item_to_list(&normal_trees,
8519                                                 found_key.objectid,
8520                                                 btrfs_root_bytenr(&ri),
8521                                                 last_snapshot, level,
8522                                                 0, level_size, NULL);
8523                                 if (ret < 0)
8524                                         goto out;
8525                         } else {
8526                                 level = btrfs_root_level(&ri);
8527                                 level_size = root->nodesize;
8528                                 objectid = found_key.objectid;
8529                                 btrfs_disk_key_to_cpu(&found_key,
8530                                                       &ri.drop_progress);
8531                                 ret = add_root_item_to_list(&dropping_trees,
8532                                                 objectid,
8533                                                 btrfs_root_bytenr(&ri),
8534                                                 last_snapshot, level,
8535                                                 ri.drop_level,
8536                                                 level_size, &found_key);
8537                                 if (ret < 0)
8538                                         goto out;
8539                         }
8540                 }
8541                 path.slots[0]++;
8542         }
8543         btrfs_release_path(&path);
8544
8545         /*
8546          * check_block can return -EAGAIN if it fixes something, please keep
8547          * this in mind when dealing with return values from these functions, if
8548          * we get -EAGAIN we want to fall through and restart the loop.
8549          */
8550         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8551                                   &seen, &reada, &nodes, &extent_cache,
8552                                   &chunk_cache, &dev_cache, &block_group_cache,
8553                                   &dev_extent_cache);
8554         if (ret < 0) {
8555                 if (ret == -EAGAIN)
8556                         goto loop;
8557                 goto out;
8558         }
8559         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8560                                   &pending, &seen, &reada, &nodes,
8561                                   &extent_cache, &chunk_cache, &dev_cache,
8562                                   &block_group_cache, &dev_extent_cache);
8563         if (ret < 0) {
8564                 if (ret == -EAGAIN)
8565                         goto loop;
8566                 goto out;
8567         }
8568
8569         ret = check_chunks(&chunk_cache, &block_group_cache,
8570                            &dev_extent_cache, NULL, NULL, NULL, 0);
8571         if (ret) {
8572                 if (ret == -EAGAIN)
8573                         goto loop;
8574                 err = ret;
8575         }
8576
8577         ret = check_extent_refs(root, &extent_cache);
8578         if (ret < 0) {
8579                 if (ret == -EAGAIN)
8580                         goto loop;
8581                 goto out;
8582         }
8583
8584         ret = check_devices(&dev_cache, &dev_extent_cache);
8585         if (ret && err)
8586                 ret = err;
8587
8588 out:
8589         task_stop(ctx.info);
8590         if (repair) {
8591                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8592                 extent_io_tree_cleanup(&excluded_extents);
8593                 root->fs_info->fsck_extent_cache = NULL;
8594                 root->fs_info->free_extent_hook = NULL;
8595                 root->fs_info->corrupt_blocks = NULL;
8596                 root->fs_info->excluded_extents = NULL;
8597         }
8598         free(bits);
8599         free_chunk_cache_tree(&chunk_cache);
8600         free_device_cache_tree(&dev_cache);
8601         free_block_group_tree(&block_group_cache);
8602         free_device_extent_tree(&dev_extent_cache);
8603         free_extent_cache_tree(&seen);
8604         free_extent_cache_tree(&pending);
8605         free_extent_cache_tree(&reada);
8606         free_extent_cache_tree(&nodes);
8607         return ret;
8608 loop:
8609         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8610         free_extent_cache_tree(&seen);
8611         free_extent_cache_tree(&pending);
8612         free_extent_cache_tree(&reada);
8613         free_extent_cache_tree(&nodes);
8614         free_chunk_cache_tree(&chunk_cache);
8615         free_block_group_tree(&block_group_cache);
8616         free_device_cache_tree(&dev_cache);
8617         free_device_extent_tree(&dev_extent_cache);
8618         free_extent_record_cache(root->fs_info, &extent_cache);
8619         free_root_item_list(&normal_trees);
8620         free_root_item_list(&dropping_trees);
8621         extent_io_tree_cleanup(&excluded_extents);
8622         goto again;
8623 }
8624
8625 /*
8626  * Check backrefs of a tree block given by @bytenr or @eb.
8627  *
8628  * @root:       the root containing the @bytenr or @eb
8629  * @eb:         tree block extent buffer, can be NULL
8630  * @bytenr:     bytenr of the tree block to search
8631  * @level:      tree level of the tree block
8632  * @owner:      owner of the tree block
8633  *
8634  * Return >0 for any error found and output error message
8635  * Return 0 for no error found
8636  */
8637 static int check_tree_block_ref(struct btrfs_root *root,
8638                                 struct extent_buffer *eb, u64 bytenr,
8639                                 int level, u64 owner)
8640 {
8641         struct btrfs_key key;
8642         struct btrfs_root *extent_root = root->fs_info->extent_root;
8643         struct btrfs_path path;
8644         struct btrfs_extent_item *ei;
8645         struct btrfs_extent_inline_ref *iref;
8646         struct extent_buffer *leaf;
8647         unsigned long end;
8648         unsigned long ptr;
8649         int slot;
8650         int skinny_level;
8651         int type;
8652         u32 nodesize = root->nodesize;
8653         u32 item_size;
8654         u64 offset;
8655         int found_ref = 0;
8656         int err = 0;
8657         int ret;
8658
8659         btrfs_init_path(&path);
8660         key.objectid = bytenr;
8661         if (btrfs_fs_incompat(root->fs_info,
8662                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8663                 key.type = BTRFS_METADATA_ITEM_KEY;
8664         else
8665                 key.type = BTRFS_EXTENT_ITEM_KEY;
8666         key.offset = (u64)-1;
8667
8668         /* Search for the backref in extent tree */
8669         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8670         if (ret < 0) {
8671                 err |= BACKREF_MISSING;
8672                 goto out;
8673         }
8674         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8675         if (ret) {
8676                 err |= BACKREF_MISSING;
8677                 goto out;
8678         }
8679
8680         leaf = path.nodes[0];
8681         slot = path.slots[0];
8682         btrfs_item_key_to_cpu(leaf, &key, slot);
8683
8684         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8685
8686         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8687                 skinny_level = (int)key.offset;
8688                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8689         } else {
8690                 struct btrfs_tree_block_info *info;
8691
8692                 info = (struct btrfs_tree_block_info *)(ei + 1);
8693                 skinny_level = btrfs_tree_block_level(leaf, info);
8694                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8695         }
8696
8697         if (eb) {
8698                 u64 header_gen;
8699                 u64 extent_gen;
8700
8701                 if (!(btrfs_extent_flags(leaf, ei) &
8702                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8703                         error(
8704                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8705                                 key.objectid, nodesize,
8706                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8707                         err = BACKREF_MISMATCH;
8708                 }
8709                 header_gen = btrfs_header_generation(eb);
8710                 extent_gen = btrfs_extent_generation(leaf, ei);
8711                 if (header_gen != extent_gen) {
8712                         error(
8713         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8714                                 key.objectid, nodesize, header_gen,
8715                                 extent_gen);
8716                         err = BACKREF_MISMATCH;
8717                 }
8718                 if (level != skinny_level) {
8719                         error(
8720                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8721                                 key.objectid, nodesize, level, skinny_level);
8722                         err = BACKREF_MISMATCH;
8723                 }
8724                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8725                         error(
8726                         "extent[%llu %u] is referred by other roots than %llu",
8727                                 key.objectid, nodesize, root->objectid);
8728                         err = BACKREF_MISMATCH;
8729                 }
8730         }
8731
8732         /*
8733          * Iterate the extent/metadata item to find the exact backref
8734          */
8735         item_size = btrfs_item_size_nr(leaf, slot);
8736         ptr = (unsigned long)iref;
8737         end = (unsigned long)ei + item_size;
8738         while (ptr < end) {
8739                 iref = (struct btrfs_extent_inline_ref *)ptr;
8740                 type = btrfs_extent_inline_ref_type(leaf, iref);
8741                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8742
8743                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8744                         (offset == root->objectid || offset == owner)) {
8745                         found_ref = 1;
8746                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8747                         /* Check if the backref points to valid referencer */
8748                         found_ref = !check_tree_block_ref(root, NULL, offset,
8749                                                           level + 1, owner);
8750                 }
8751
8752                 if (found_ref)
8753                         break;
8754                 ptr += btrfs_extent_inline_ref_size(type);
8755         }
8756
8757         /*
8758          * Inlined extent item doesn't have what we need, check
8759          * TREE_BLOCK_REF_KEY
8760          */
8761         if (!found_ref) {
8762                 btrfs_release_path(&path);
8763                 key.objectid = bytenr;
8764                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8765                 key.offset = root->objectid;
8766
8767                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8768                 if (!ret)
8769                         found_ref = 1;
8770         }
8771         if (!found_ref)
8772                 err |= BACKREF_MISSING;
8773 out:
8774         btrfs_release_path(&path);
8775         if (eb && (err & BACKREF_MISSING))
8776                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8777                         bytenr, nodesize, owner, level);
8778         return err;
8779 }
8780
8781 /*
8782  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8783  *
8784  * Return >0 any error found and output error message
8785  * Return 0 for no error found
8786  */
8787 static int check_extent_data_item(struct btrfs_root *root,
8788                                   struct extent_buffer *eb, int slot)
8789 {
8790         struct btrfs_file_extent_item *fi;
8791         struct btrfs_path path;
8792         struct btrfs_root *extent_root = root->fs_info->extent_root;
8793         struct btrfs_key fi_key;
8794         struct btrfs_key dbref_key;
8795         struct extent_buffer *leaf;
8796         struct btrfs_extent_item *ei;
8797         struct btrfs_extent_inline_ref *iref;
8798         struct btrfs_extent_data_ref *dref;
8799         u64 owner;
8800         u64 file_extent_gen;
8801         u64 disk_bytenr;
8802         u64 disk_num_bytes;
8803         u64 extent_num_bytes;
8804         u64 extent_flags;
8805         u64 extent_gen;
8806         u32 item_size;
8807         unsigned long end;
8808         unsigned long ptr;
8809         int type;
8810         u64 ref_root;
8811         int found_dbackref = 0;
8812         int err = 0;
8813         int ret;
8814
8815         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8816         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8817         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8818
8819         /* Nothing to check for hole and inline data extents */
8820         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8821             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8822                 return 0;
8823
8824         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8825         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8826         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8827
8828         /* Check unaligned disk_num_bytes and num_bytes */
8829         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8830                 error(
8831 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8832                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8833                         root->sectorsize);
8834                 err |= BYTES_UNALIGNED;
8835         } else {
8836                 data_bytes_allocated += disk_num_bytes;
8837         }
8838         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8839                 error(
8840 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8841                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8842                         root->sectorsize);
8843                 err |= BYTES_UNALIGNED;
8844         } else {
8845                 data_bytes_referenced += extent_num_bytes;
8846         }
8847         owner = btrfs_header_owner(eb);
8848
8849         /* Check the extent item of the file extent in extent tree */
8850         btrfs_init_path(&path);
8851         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8852         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8853         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8854
8855         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8856         if (ret) {
8857                 err |= BACKREF_MISSING;
8858                 goto error;
8859         }
8860
8861         leaf = path.nodes[0];
8862         slot = path.slots[0];
8863         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8864
8865         extent_flags = btrfs_extent_flags(leaf, ei);
8866         extent_gen = btrfs_extent_generation(leaf, ei);
8867
8868         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8869                 error(
8870                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8871                     disk_bytenr, disk_num_bytes,
8872                     BTRFS_EXTENT_FLAG_DATA);
8873                 err |= BACKREF_MISMATCH;
8874         }
8875
8876         if (file_extent_gen < extent_gen) {
8877                 error(
8878 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8879                         disk_bytenr, disk_num_bytes, file_extent_gen,
8880                         extent_gen);
8881                 err |= BACKREF_MISMATCH;
8882         }
8883
8884         /* Check data backref inside that extent item */
8885         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8886         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8887         ptr = (unsigned long)iref;
8888         end = (unsigned long)ei + item_size;
8889         while (ptr < end) {
8890                 iref = (struct btrfs_extent_inline_ref *)ptr;
8891                 type = btrfs_extent_inline_ref_type(leaf, iref);
8892                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8893
8894                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8895                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8896                         if (ref_root == owner || ref_root == root->objectid)
8897                                 found_dbackref = 1;
8898                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8899                         found_dbackref = !check_tree_block_ref(root, NULL,
8900                                 btrfs_extent_inline_ref_offset(leaf, iref),
8901                                 0, owner);
8902                 }
8903
8904                 if (found_dbackref)
8905                         break;
8906                 ptr += btrfs_extent_inline_ref_size(type);
8907         }
8908
8909         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8910         if (!found_dbackref) {
8911                 btrfs_release_path(&path);
8912
8913                 btrfs_init_path(&path);
8914                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8915                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8916                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8917                                 fi_key.objectid, fi_key.offset);
8918
8919                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8920                                         &dbref_key, &path, 0, 0);
8921                 if (!ret)
8922                         found_dbackref = 1;
8923         }
8924
8925         if (!found_dbackref)
8926                 err |= BACKREF_MISSING;
8927 error:
8928         btrfs_release_path(&path);
8929         if (err & BACKREF_MISSING) {
8930                 error("data extent[%llu %llu] backref lost",
8931                       disk_bytenr, disk_num_bytes);
8932         }
8933         return err;
8934 }
8935
8936 /*
8937  * Get real tree block level for the case like shared block
8938  * Return >= 0 as tree level
8939  * Return <0 for error
8940  */
8941 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8942 {
8943         struct extent_buffer *eb;
8944         struct btrfs_path path;
8945         struct btrfs_key key;
8946         struct btrfs_extent_item *ei;
8947         u64 flags;
8948         u64 transid;
8949         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8950         u8 backref_level;
8951         u8 header_level;
8952         int ret;
8953
8954         /* Search extent tree for extent generation and level */
8955         key.objectid = bytenr;
8956         key.type = BTRFS_METADATA_ITEM_KEY;
8957         key.offset = (u64)-1;
8958
8959         btrfs_init_path(&path);
8960         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8961         if (ret < 0)
8962                 goto release_out;
8963         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8964         if (ret < 0)
8965                 goto release_out;
8966         if (ret > 0) {
8967                 ret = -ENOENT;
8968                 goto release_out;
8969         }
8970
8971         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8972         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8973                             struct btrfs_extent_item);
8974         flags = btrfs_extent_flags(path.nodes[0], ei);
8975         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8976                 ret = -ENOENT;
8977                 goto release_out;
8978         }
8979
8980         /* Get transid for later read_tree_block() check */
8981         transid = btrfs_extent_generation(path.nodes[0], ei);
8982
8983         /* Get backref level as one source */
8984         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8985                 backref_level = key.offset;
8986         } else {
8987                 struct btrfs_tree_block_info *info;
8988
8989                 info = (struct btrfs_tree_block_info *)(ei + 1);
8990                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8991         }
8992         btrfs_release_path(&path);
8993
8994         /* Get level from tree block as an alternative source */
8995         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8996         if (!extent_buffer_uptodate(eb)) {
8997                 free_extent_buffer(eb);
8998                 return -EIO;
8999         }
9000         header_level = btrfs_header_level(eb);
9001         free_extent_buffer(eb);
9002
9003         if (header_level != backref_level)
9004                 return -EIO;
9005         return header_level;
9006
9007 release_out:
9008         btrfs_release_path(&path);
9009         return ret;
9010 }
9011
9012 /*
9013  * Check if a tree block backref is valid (points to a valid tree block)
9014  * if level == -1, level will be resolved
9015  * Return >0 for any error found and print error message
9016  */
9017 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9018                                     u64 bytenr, int level)
9019 {
9020         struct btrfs_root *root;
9021         struct btrfs_key key;
9022         struct btrfs_path path;
9023         struct extent_buffer *eb;
9024         struct extent_buffer *node;
9025         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9026         int err = 0;
9027         int ret;
9028
9029         /* Query level for level == -1 special case */
9030         if (level == -1)
9031                 level = query_tree_block_level(fs_info, bytenr);
9032         if (level < 0) {
9033                 err |= REFERENCER_MISSING;
9034                 goto out;
9035         }
9036
9037         key.objectid = root_id;
9038         key.type = BTRFS_ROOT_ITEM_KEY;
9039         key.offset = (u64)-1;
9040
9041         root = btrfs_read_fs_root(fs_info, &key);
9042         if (IS_ERR(root)) {
9043                 err |= REFERENCER_MISSING;
9044                 goto out;
9045         }
9046
9047         /* Read out the tree block to get item/node key */
9048         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9049         if (!extent_buffer_uptodate(eb)) {
9050                 err |= REFERENCER_MISSING;
9051                 free_extent_buffer(eb);
9052                 goto out;
9053         }
9054
9055         /* Empty tree, no need to check key */
9056         if (!btrfs_header_nritems(eb) && !level) {
9057                 free_extent_buffer(eb);
9058                 goto out;
9059         }
9060
9061         if (level)
9062                 btrfs_node_key_to_cpu(eb, &key, 0);
9063         else
9064                 btrfs_item_key_to_cpu(eb, &key, 0);
9065
9066         free_extent_buffer(eb);
9067
9068         btrfs_init_path(&path);
9069         path.lowest_level = level;
9070         /* Search with the first key, to ensure we can reach it */
9071         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9072         if (ret < 0) {
9073                 err |= REFERENCER_MISSING;
9074                 goto release_out;
9075         }
9076
9077         node = path.nodes[level];
9078         if (btrfs_header_bytenr(node) != bytenr) {
9079                 error(
9080         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9081                         bytenr, nodesize, bytenr,
9082                         btrfs_header_bytenr(node));
9083                 err |= REFERENCER_MISMATCH;
9084         }
9085         if (btrfs_header_level(node) != level) {
9086                 error(
9087         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9088                         bytenr, nodesize, level,
9089                         btrfs_header_level(node));
9090                 err |= REFERENCER_MISMATCH;
9091         }
9092
9093 release_out:
9094         btrfs_release_path(&path);
9095 out:
9096         if (err & REFERENCER_MISSING) {
9097                 if (level < 0)
9098                         error("extent [%llu %d] lost referencer (owner: %llu)",
9099                                 bytenr, nodesize, root_id);
9100                 else
9101                         error(
9102                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9103                                 bytenr, nodesize, root_id, level);
9104         }
9105
9106         return err;
9107 }
9108
9109 /*
9110  * Check referencer for shared block backref
9111  * If level == -1, this function will resolve the level.
9112  */
9113 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9114                                      u64 parent, u64 bytenr, int level)
9115 {
9116         struct extent_buffer *eb;
9117         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9118         u32 nr;
9119         int found_parent = 0;
9120         int i;
9121
9122         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9123         if (!extent_buffer_uptodate(eb))
9124                 goto out;
9125
9126         if (level == -1)
9127                 level = query_tree_block_level(fs_info, bytenr);
9128         if (level < 0)
9129                 goto out;
9130
9131         if (level + 1 != btrfs_header_level(eb))
9132                 goto out;
9133
9134         nr = btrfs_header_nritems(eb);
9135         for (i = 0; i < nr; i++) {
9136                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9137                         found_parent = 1;
9138                         break;
9139                 }
9140         }
9141 out:
9142         free_extent_buffer(eb);
9143         if (!found_parent) {
9144                 error(
9145         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9146                         bytenr, nodesize, parent, level);
9147                 return REFERENCER_MISSING;
9148         }
9149         return 0;
9150 }
9151
9152 /*
9153  * Check referencer for normal (inlined) data ref
9154  * If len == 0, it will be resolved by searching in extent tree
9155  */
9156 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9157                                      u64 root_id, u64 objectid, u64 offset,
9158                                      u64 bytenr, u64 len, u32 count)
9159 {
9160         struct btrfs_root *root;
9161         struct btrfs_root *extent_root = fs_info->extent_root;
9162         struct btrfs_key key;
9163         struct btrfs_path path;
9164         struct extent_buffer *leaf;
9165         struct btrfs_file_extent_item *fi;
9166         u32 found_count = 0;
9167         int slot;
9168         int ret = 0;
9169
9170         if (!len) {
9171                 key.objectid = bytenr;
9172                 key.type = BTRFS_EXTENT_ITEM_KEY;
9173                 key.offset = (u64)-1;
9174
9175                 btrfs_init_path(&path);
9176                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9177                 if (ret < 0)
9178                         goto out;
9179                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9180                 if (ret)
9181                         goto out;
9182                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9183                 if (key.objectid != bytenr ||
9184                     key.type != BTRFS_EXTENT_ITEM_KEY)
9185                         goto out;
9186                 len = key.offset;
9187                 btrfs_release_path(&path);
9188         }
9189         key.objectid = root_id;
9190         key.type = BTRFS_ROOT_ITEM_KEY;
9191         key.offset = (u64)-1;
9192         btrfs_init_path(&path);
9193
9194         root = btrfs_read_fs_root(fs_info, &key);
9195         if (IS_ERR(root))
9196                 goto out;
9197
9198         key.objectid = objectid;
9199         key.type = BTRFS_EXTENT_DATA_KEY;
9200         /*
9201          * It can be nasty as data backref offset is
9202          * file offset - file extent offset, which is smaller or
9203          * equal to original backref offset.  The only special case is
9204          * overflow.  So we need to special check and do further search.
9205          */
9206         key.offset = offset & (1ULL << 63) ? 0 : offset;
9207
9208         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9209         if (ret < 0)
9210                 goto out;
9211
9212         /*
9213          * Search afterwards to get correct one
9214          * NOTE: As we must do a comprehensive check on the data backref to
9215          * make sure the dref count also matches, we must iterate all file
9216          * extents for that inode.
9217          */
9218         while (1) {
9219                 leaf = path.nodes[0];
9220                 slot = path.slots[0];
9221
9222                 btrfs_item_key_to_cpu(leaf, &key, slot);
9223                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9224                         break;
9225                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9226                 /*
9227                  * Except normal disk bytenr and disk num bytes, we still
9228                  * need to do extra check on dbackref offset as
9229                  * dbackref offset = file_offset - file_extent_offset
9230                  */
9231                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9232                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9233                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9234                     offset)
9235                         found_count++;
9236
9237                 ret = btrfs_next_item(root, &path);
9238                 if (ret)
9239                         break;
9240         }
9241 out:
9242         btrfs_release_path(&path);
9243         if (found_count != count) {
9244                 error(
9245 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9246                         bytenr, len, root_id, objectid, offset, count, found_count);
9247                 return REFERENCER_MISSING;
9248         }
9249         return 0;
9250 }
9251
9252 /*
9253  * Check if the referencer of a shared data backref exists
9254  */
9255 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9256                                      u64 parent, u64 bytenr)
9257 {
9258         struct extent_buffer *eb;
9259         struct btrfs_key key;
9260         struct btrfs_file_extent_item *fi;
9261         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9262         u32 nr;
9263         int found_parent = 0;
9264         int i;
9265
9266         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9267         if (!extent_buffer_uptodate(eb))
9268                 goto out;
9269
9270         nr = btrfs_header_nritems(eb);
9271         for (i = 0; i < nr; i++) {
9272                 btrfs_item_key_to_cpu(eb, &key, i);
9273                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9274                         continue;
9275
9276                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9277                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9278                         continue;
9279
9280                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9281                         found_parent = 1;
9282                         break;
9283                 }
9284         }
9285
9286 out:
9287         free_extent_buffer(eb);
9288         if (!found_parent) {
9289                 error("shared extent %llu referencer lost (parent: %llu)",
9290                         bytenr, parent);
9291                 return REFERENCER_MISSING;
9292         }
9293         return 0;
9294 }
9295
9296 /*
9297  * This function will check a given extent item, including its backref and
9298  * itself (like crossing stripe boundary and type)
9299  *
9300  * Since we don't use extent_record anymore, introduce new error bit
9301  */
9302 static int check_extent_item(struct btrfs_fs_info *fs_info,
9303                              struct extent_buffer *eb, int slot)
9304 {
9305         struct btrfs_extent_item *ei;
9306         struct btrfs_extent_inline_ref *iref;
9307         struct btrfs_extent_data_ref *dref;
9308         unsigned long end;
9309         unsigned long ptr;
9310         int type;
9311         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9312         u32 item_size = btrfs_item_size_nr(eb, slot);
9313         u64 flags;
9314         u64 offset;
9315         int metadata = 0;
9316         int level;
9317         struct btrfs_key key;
9318         int ret;
9319         int err = 0;
9320
9321         btrfs_item_key_to_cpu(eb, &key, slot);
9322         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9323                 bytes_used += key.offset;
9324         else
9325                 bytes_used += nodesize;
9326
9327         if (item_size < sizeof(*ei)) {
9328                 /*
9329                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9330                  * old thing when on disk format is still un-determined.
9331                  * No need to care about it anymore
9332                  */
9333                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9334                 return -ENOTTY;
9335         }
9336
9337         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9338         flags = btrfs_extent_flags(eb, ei);
9339
9340         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9341                 metadata = 1;
9342         if (metadata && check_crossing_stripes(global_info, key.objectid,
9343                                                eb->len)) {
9344                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9345                       key.objectid, key.objectid + nodesize);
9346                 err |= CROSSING_STRIPE_BOUNDARY;
9347         }
9348
9349         ptr = (unsigned long)(ei + 1);
9350
9351         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9352                 /* Old EXTENT_ITEM metadata */
9353                 struct btrfs_tree_block_info *info;
9354
9355                 info = (struct btrfs_tree_block_info *)ptr;
9356                 level = btrfs_tree_block_level(eb, info);
9357                 ptr += sizeof(struct btrfs_tree_block_info);
9358         } else {
9359                 /* New METADATA_ITEM */
9360                 level = key.offset;
9361         }
9362         end = (unsigned long)ei + item_size;
9363
9364         if (ptr >= end) {
9365                 err |= ITEM_SIZE_MISMATCH;
9366                 goto out;
9367         }
9368
9369         /* Now check every backref in this extent item */
9370 next:
9371         iref = (struct btrfs_extent_inline_ref *)ptr;
9372         type = btrfs_extent_inline_ref_type(eb, iref);
9373         offset = btrfs_extent_inline_ref_offset(eb, iref);
9374         switch (type) {
9375         case BTRFS_TREE_BLOCK_REF_KEY:
9376                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9377                                                level);
9378                 err |= ret;
9379                 break;
9380         case BTRFS_SHARED_BLOCK_REF_KEY:
9381                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9382                                                  level);
9383                 err |= ret;
9384                 break;
9385         case BTRFS_EXTENT_DATA_REF_KEY:
9386                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9387                 ret = check_extent_data_backref(fs_info,
9388                                 btrfs_extent_data_ref_root(eb, dref),
9389                                 btrfs_extent_data_ref_objectid(eb, dref),
9390                                 btrfs_extent_data_ref_offset(eb, dref),
9391                                 key.objectid, key.offset,
9392                                 btrfs_extent_data_ref_count(eb, dref));
9393                 err |= ret;
9394                 break;
9395         case BTRFS_SHARED_DATA_REF_KEY:
9396                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9397                 err |= ret;
9398                 break;
9399         default:
9400                 error("extent[%llu %d %llu] has unknown ref type: %d",
9401                         key.objectid, key.type, key.offset, type);
9402                 err |= UNKNOWN_TYPE;
9403                 goto out;
9404         }
9405
9406         ptr += btrfs_extent_inline_ref_size(type);
9407         if (ptr < end)
9408                 goto next;
9409
9410 out:
9411         return err;
9412 }
9413
9414 /*
9415  * Check if a dev extent item is referred correctly by its chunk
9416  */
9417 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9418                                  struct extent_buffer *eb, int slot)
9419 {
9420         struct btrfs_root *chunk_root = fs_info->chunk_root;
9421         struct btrfs_dev_extent *ptr;
9422         struct btrfs_path path;
9423         struct btrfs_key chunk_key;
9424         struct btrfs_key devext_key;
9425         struct btrfs_chunk *chunk;
9426         struct extent_buffer *l;
9427         int num_stripes;
9428         u64 length;
9429         int i;
9430         int found_chunk = 0;
9431         int ret;
9432
9433         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9434         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9435         length = btrfs_dev_extent_length(eb, ptr);
9436
9437         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9438         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9439         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9440
9441         btrfs_init_path(&path);
9442         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9443         if (ret)
9444                 goto out;
9445
9446         l = path.nodes[0];
9447         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9448         if (btrfs_chunk_length(l, chunk) != length)
9449                 goto out;
9450
9451         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9452         for (i = 0; i < num_stripes; i++) {
9453                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9454                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9455
9456                 if (devid == devext_key.objectid &&
9457                     offset == devext_key.offset) {
9458                         found_chunk = 1;
9459                         break;
9460                 }
9461         }
9462 out:
9463         btrfs_release_path(&path);
9464         if (!found_chunk) {
9465                 error(
9466                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9467                         devext_key.objectid, devext_key.offset, length);
9468                 return REFERENCER_MISSING;
9469         }
9470         return 0;
9471 }
9472
9473 /*
9474  * Check if the used space is correct with the dev item
9475  */
9476 static int check_dev_item(struct btrfs_fs_info *fs_info,
9477                           struct extent_buffer *eb, int slot)
9478 {
9479         struct btrfs_root *dev_root = fs_info->dev_root;
9480         struct btrfs_dev_item *dev_item;
9481         struct btrfs_path path;
9482         struct btrfs_key key;
9483         struct btrfs_dev_extent *ptr;
9484         u64 dev_id;
9485         u64 used;
9486         u64 total = 0;
9487         int ret;
9488
9489         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9490         dev_id = btrfs_device_id(eb, dev_item);
9491         used = btrfs_device_bytes_used(eb, dev_item);
9492
9493         key.objectid = dev_id;
9494         key.type = BTRFS_DEV_EXTENT_KEY;
9495         key.offset = 0;
9496
9497         btrfs_init_path(&path);
9498         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9499         if (ret < 0) {
9500                 btrfs_item_key_to_cpu(eb, &key, slot);
9501                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9502                         key.objectid, key.type, key.offset);
9503                 btrfs_release_path(&path);
9504                 return REFERENCER_MISSING;
9505         }
9506
9507         /* Iterate dev_extents to calculate the used space of a device */
9508         while (1) {
9509                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9510
9511                 if (key.objectid > dev_id)
9512                         break;
9513                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9514                         goto next;
9515
9516                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9517                                      struct btrfs_dev_extent);
9518                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9519 next:
9520                 ret = btrfs_next_item(dev_root, &path);
9521                 if (ret)
9522                         break;
9523         }
9524         btrfs_release_path(&path);
9525
9526         if (used != total) {
9527                 btrfs_item_key_to_cpu(eb, &key, slot);
9528                 error(
9529 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9530                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9531                         BTRFS_DEV_EXTENT_KEY, dev_id);
9532                 return ACCOUNTING_MISMATCH;
9533         }
9534         return 0;
9535 }
9536
9537 /*
9538  * Check a block group item with its referener (chunk) and its used space
9539  * with extent/metadata item
9540  */
9541 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9542                                   struct extent_buffer *eb, int slot)
9543 {
9544         struct btrfs_root *extent_root = fs_info->extent_root;
9545         struct btrfs_root *chunk_root = fs_info->chunk_root;
9546         struct btrfs_block_group_item *bi;
9547         struct btrfs_block_group_item bg_item;
9548         struct btrfs_path path;
9549         struct btrfs_key bg_key;
9550         struct btrfs_key chunk_key;
9551         struct btrfs_key extent_key;
9552         struct btrfs_chunk *chunk;
9553         struct extent_buffer *leaf;
9554         struct btrfs_extent_item *ei;
9555         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9556         u64 flags;
9557         u64 bg_flags;
9558         u64 used;
9559         u64 total = 0;
9560         int ret;
9561         int err = 0;
9562
9563         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9564         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9565         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9566         used = btrfs_block_group_used(&bg_item);
9567         bg_flags = btrfs_block_group_flags(&bg_item);
9568
9569         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9570         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9571         chunk_key.offset = bg_key.objectid;
9572
9573         btrfs_init_path(&path);
9574         /* Search for the referencer chunk */
9575         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9576         if (ret) {
9577                 error(
9578                 "block group[%llu %llu] did not find the related chunk item",
9579                         bg_key.objectid, bg_key.offset);
9580                 err |= REFERENCER_MISSING;
9581         } else {
9582                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9583                                         struct btrfs_chunk);
9584                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9585                                                 bg_key.offset) {
9586                         error(
9587         "block group[%llu %llu] related chunk item length does not match",
9588                                 bg_key.objectid, bg_key.offset);
9589                         err |= REFERENCER_MISMATCH;
9590                 }
9591         }
9592         btrfs_release_path(&path);
9593
9594         /* Search from the block group bytenr */
9595         extent_key.objectid = bg_key.objectid;
9596         extent_key.type = 0;
9597         extent_key.offset = 0;
9598
9599         btrfs_init_path(&path);
9600         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9601         if (ret < 0)
9602                 goto out;
9603
9604         /* Iterate extent tree to account used space */
9605         while (1) {
9606                 leaf = path.nodes[0];
9607                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9608                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9609                         break;
9610
9611                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9612                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9613                         goto next;
9614                 if (extent_key.objectid < bg_key.objectid)
9615                         goto next;
9616
9617                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9618                         total += nodesize;
9619                 else
9620                         total += extent_key.offset;
9621
9622                 ei = btrfs_item_ptr(leaf, path.slots[0],
9623                                     struct btrfs_extent_item);
9624                 flags = btrfs_extent_flags(leaf, ei);
9625                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9626                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9627                                 error(
9628                         "bad extent[%llu, %llu) type mismatch with chunk",
9629                                         extent_key.objectid,
9630                                         extent_key.objectid + extent_key.offset);
9631                                 err |= CHUNK_TYPE_MISMATCH;
9632                         }
9633                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9634                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9635                                     BTRFS_BLOCK_GROUP_METADATA))) {
9636                                 error(
9637                         "bad extent[%llu, %llu) type mismatch with chunk",
9638                                         extent_key.objectid,
9639                                         extent_key.objectid + nodesize);
9640                                 err |= CHUNK_TYPE_MISMATCH;
9641                         }
9642                 }
9643 next:
9644                 ret = btrfs_next_item(extent_root, &path);
9645                 if (ret)
9646                         break;
9647         }
9648
9649 out:
9650         btrfs_release_path(&path);
9651
9652         if (total != used) {
9653                 error(
9654                 "block group[%llu %llu] used %llu but extent items used %llu",
9655                         bg_key.objectid, bg_key.offset, used, total);
9656                 err |= ACCOUNTING_MISMATCH;
9657         }
9658         return err;
9659 }
9660
9661 /*
9662  * Check a chunk item.
9663  * Including checking all referred dev_extents and block group
9664  */
9665 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9666                             struct extent_buffer *eb, int slot)
9667 {
9668         struct btrfs_root *extent_root = fs_info->extent_root;
9669         struct btrfs_root *dev_root = fs_info->dev_root;
9670         struct btrfs_path path;
9671         struct btrfs_key chunk_key;
9672         struct btrfs_key bg_key;
9673         struct btrfs_key devext_key;
9674         struct btrfs_chunk *chunk;
9675         struct extent_buffer *leaf;
9676         struct btrfs_block_group_item *bi;
9677         struct btrfs_block_group_item bg_item;
9678         struct btrfs_dev_extent *ptr;
9679         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9680         u64 length;
9681         u64 chunk_end;
9682         u64 type;
9683         u64 profile;
9684         int num_stripes;
9685         u64 offset;
9686         u64 objectid;
9687         int i;
9688         int ret;
9689         int err = 0;
9690
9691         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9692         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9693         length = btrfs_chunk_length(eb, chunk);
9694         chunk_end = chunk_key.offset + length;
9695         if (!IS_ALIGNED(length, sectorsize)) {
9696                 error("chunk[%llu %llu) not aligned to %u",
9697                         chunk_key.offset, chunk_end, sectorsize);
9698                 err |= BYTES_UNALIGNED;
9699                 goto out;
9700         }
9701
9702         type = btrfs_chunk_type(eb, chunk);
9703         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9704         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9705                 error("chunk[%llu %llu) has no chunk type",
9706                         chunk_key.offset, chunk_end);
9707                 err |= UNKNOWN_TYPE;
9708         }
9709         if (profile && (profile & (profile - 1))) {
9710                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9711                         chunk_key.offset, chunk_end, profile);
9712                 err |= UNKNOWN_TYPE;
9713         }
9714
9715         bg_key.objectid = chunk_key.offset;
9716         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9717         bg_key.offset = length;
9718
9719         btrfs_init_path(&path);
9720         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9721         if (ret) {
9722                 error(
9723                 "chunk[%llu %llu) did not find the related block group item",
9724                         chunk_key.offset, chunk_end);
9725                 err |= REFERENCER_MISSING;
9726         } else{
9727                 leaf = path.nodes[0];
9728                 bi = btrfs_item_ptr(leaf, path.slots[0],
9729                                     struct btrfs_block_group_item);
9730                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9731                                    sizeof(bg_item));
9732                 if (btrfs_block_group_flags(&bg_item) != type) {
9733                         error(
9734 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9735                                 chunk_key.offset, chunk_end, type,
9736                                 btrfs_block_group_flags(&bg_item));
9737                         err |= REFERENCER_MISSING;
9738                 }
9739         }
9740
9741         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9742         for (i = 0; i < num_stripes; i++) {
9743                 btrfs_release_path(&path);
9744                 btrfs_init_path(&path);
9745                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9746                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9747                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9748
9749                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9750                                         0, 0);
9751                 if (ret)
9752                         goto not_match_dev;
9753
9754                 leaf = path.nodes[0];
9755                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9756                                      struct btrfs_dev_extent);
9757                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9758                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9759                 if (objectid != chunk_key.objectid ||
9760                     offset != chunk_key.offset ||
9761                     btrfs_dev_extent_length(leaf, ptr) != length)
9762                         goto not_match_dev;
9763                 continue;
9764 not_match_dev:
9765                 err |= BACKREF_MISSING;
9766                 error(
9767                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9768                         chunk_key.objectid, chunk_end, i);
9769                 continue;
9770         }
9771         btrfs_release_path(&path);
9772 out:
9773         return err;
9774 }
9775
9776 /*
9777  * Main entry function to check known items and update related accounting info
9778  */
9779 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9780 {
9781         struct btrfs_fs_info *fs_info = root->fs_info;
9782         struct btrfs_key key;
9783         int slot = 0;
9784         int type;
9785         struct btrfs_extent_data_ref *dref;
9786         int ret;
9787         int err = 0;
9788
9789 next:
9790         btrfs_item_key_to_cpu(eb, &key, slot);
9791         type = key.type;
9792
9793         switch (type) {
9794         case BTRFS_EXTENT_DATA_KEY:
9795                 ret = check_extent_data_item(root, eb, slot);
9796                 err |= ret;
9797                 break;
9798         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9799                 ret = check_block_group_item(fs_info, eb, slot);
9800                 err |= ret;
9801                 break;
9802         case BTRFS_DEV_ITEM_KEY:
9803                 ret = check_dev_item(fs_info, eb, slot);
9804                 err |= ret;
9805                 break;
9806         case BTRFS_CHUNK_ITEM_KEY:
9807                 ret = check_chunk_item(fs_info, eb, slot);
9808                 err |= ret;
9809                 break;
9810         case BTRFS_DEV_EXTENT_KEY:
9811                 ret = check_dev_extent_item(fs_info, eb, slot);
9812                 err |= ret;
9813                 break;
9814         case BTRFS_EXTENT_ITEM_KEY:
9815         case BTRFS_METADATA_ITEM_KEY:
9816                 ret = check_extent_item(fs_info, eb, slot);
9817                 err |= ret;
9818                 break;
9819         case BTRFS_EXTENT_CSUM_KEY:
9820                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9821                 break;
9822         case BTRFS_TREE_BLOCK_REF_KEY:
9823                 ret = check_tree_block_backref(fs_info, key.offset,
9824                                                key.objectid, -1);
9825                 err |= ret;
9826                 break;
9827         case BTRFS_EXTENT_DATA_REF_KEY:
9828                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9829                 ret = check_extent_data_backref(fs_info,
9830                                 btrfs_extent_data_ref_root(eb, dref),
9831                                 btrfs_extent_data_ref_objectid(eb, dref),
9832                                 btrfs_extent_data_ref_offset(eb, dref),
9833                                 key.objectid, 0,
9834                                 btrfs_extent_data_ref_count(eb, dref));
9835                 err |= ret;
9836                 break;
9837         case BTRFS_SHARED_BLOCK_REF_KEY:
9838                 ret = check_shared_block_backref(fs_info, key.offset,
9839                                                  key.objectid, -1);
9840                 err |= ret;
9841                 break;
9842         case BTRFS_SHARED_DATA_REF_KEY:
9843                 ret = check_shared_data_backref(fs_info, key.offset,
9844                                                 key.objectid);
9845                 err |= ret;
9846                 break;
9847         default:
9848                 break;
9849         }
9850
9851         if (++slot < btrfs_header_nritems(eb))
9852                 goto next;
9853
9854         return err;
9855 }
9856
9857 /*
9858  * Helper function for later fs/subvol tree check.  To determine if a tree
9859  * block should be checked.
9860  * This function will ensure only the direct referencer with lowest rootid to
9861  * check a fs/subvolume tree block.
9862  *
9863  * Backref check at extent tree would detect errors like missing subvolume
9864  * tree, so we can do aggressive check to reduce duplicated checks.
9865  */
9866 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9867 {
9868         struct btrfs_root *extent_root = root->fs_info->extent_root;
9869         struct btrfs_key key;
9870         struct btrfs_path path;
9871         struct extent_buffer *leaf;
9872         int slot;
9873         struct btrfs_extent_item *ei;
9874         unsigned long ptr;
9875         unsigned long end;
9876         int type;
9877         u32 item_size;
9878         u64 offset;
9879         struct btrfs_extent_inline_ref *iref;
9880         int ret;
9881
9882         btrfs_init_path(&path);
9883         key.objectid = btrfs_header_bytenr(eb);
9884         key.type = BTRFS_METADATA_ITEM_KEY;
9885         key.offset = (u64)-1;
9886
9887         /*
9888          * Any failure in backref resolving means we can't determine
9889          * whom the tree block belongs to.
9890          * So in that case, we need to check that tree block
9891          */
9892         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9893         if (ret < 0)
9894                 goto need_check;
9895
9896         ret = btrfs_previous_extent_item(extent_root, &path,
9897                                          btrfs_header_bytenr(eb));
9898         if (ret)
9899                 goto need_check;
9900
9901         leaf = path.nodes[0];
9902         slot = path.slots[0];
9903         btrfs_item_key_to_cpu(leaf, &key, slot);
9904         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9905
9906         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9907                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9908         } else {
9909                 struct btrfs_tree_block_info *info;
9910
9911                 info = (struct btrfs_tree_block_info *)(ei + 1);
9912                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9913         }
9914
9915         item_size = btrfs_item_size_nr(leaf, slot);
9916         ptr = (unsigned long)iref;
9917         end = (unsigned long)ei + item_size;
9918         while (ptr < end) {
9919                 iref = (struct btrfs_extent_inline_ref *)ptr;
9920                 type = btrfs_extent_inline_ref_type(leaf, iref);
9921                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9922
9923                 /*
9924                  * We only check the tree block if current root is
9925                  * the lowest referencer of it.
9926                  */
9927                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9928                     offset < root->objectid) {
9929                         btrfs_release_path(&path);
9930                         return 0;
9931                 }
9932
9933                 ptr += btrfs_extent_inline_ref_size(type);
9934         }
9935         /*
9936          * Normally we should also check keyed tree block ref, but that may be
9937          * very time consuming.  Inlined ref should already make us skip a lot
9938          * of refs now.  So skip search keyed tree block ref.
9939          */
9940
9941 need_check:
9942         btrfs_release_path(&path);
9943         return 1;
9944 }
9945
9946 /*
9947  * Traversal function for tree block. We will do:
9948  * 1) Skip shared fs/subvolume tree blocks
9949  * 2) Update related bytes accounting
9950  * 3) Pre-order traversal
9951  */
9952 static int traverse_tree_block(struct btrfs_root *root,
9953                                 struct extent_buffer *node)
9954 {
9955         struct extent_buffer *eb;
9956         struct btrfs_key key;
9957         struct btrfs_key drop_key;
9958         int level;
9959         u64 nr;
9960         int i;
9961         int err = 0;
9962         int ret;
9963
9964         /*
9965          * Skip shared fs/subvolume tree block, in that case they will
9966          * be checked by referencer with lowest rootid
9967          */
9968         if (is_fstree(root->objectid) && !should_check(root, node))
9969                 return 0;
9970
9971         /* Update bytes accounting */
9972         total_btree_bytes += node->len;
9973         if (fs_root_objectid(btrfs_header_owner(node)))
9974                 total_fs_tree_bytes += node->len;
9975         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9976                 total_extent_tree_bytes += node->len;
9977         if (!found_old_backref &&
9978             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9979             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9980             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9981                 found_old_backref = 1;
9982
9983         /* pre-order tranversal, check itself first */
9984         level = btrfs_header_level(node);
9985         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9986                                    btrfs_header_level(node),
9987                                    btrfs_header_owner(node));
9988         err |= ret;
9989         if (err)
9990                 error(
9991         "check %s failed root %llu bytenr %llu level %d, force continue check",
9992                         level ? "node":"leaf", root->objectid,
9993                         btrfs_header_bytenr(node), btrfs_header_level(node));
9994
9995         if (!level) {
9996                 btree_space_waste += btrfs_leaf_free_space(root, node);
9997                 ret = check_leaf_items(root, node);
9998                 err |= ret;
9999                 return err;
10000         }
10001
10002         nr = btrfs_header_nritems(node);
10003         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10004         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10005                 sizeof(struct btrfs_key_ptr);
10006
10007         /* Then check all its children */
10008         for (i = 0; i < nr; i++) {
10009                 u64 blocknr = btrfs_node_blockptr(node, i);
10010
10011                 btrfs_node_key_to_cpu(node, &key, i);
10012                 if (level == root->root_item.drop_level &&
10013                     is_dropped_key(&key, &drop_key))
10014                         continue;
10015
10016                 /*
10017                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10018                  * to call the function itself.
10019                  */
10020                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10021                 if (extent_buffer_uptodate(eb)) {
10022                         ret = traverse_tree_block(root, eb);
10023                         err |= ret;
10024                 }
10025                 free_extent_buffer(eb);
10026         }
10027
10028         return err;
10029 }
10030
10031 /*
10032  * Low memory usage version check_chunks_and_extents.
10033  */
10034 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10035 {
10036         struct btrfs_path path;
10037         struct btrfs_key key;
10038         struct btrfs_root *root1;
10039         struct btrfs_root *cur_root;
10040         int err = 0;
10041         int ret;
10042
10043         root1 = root->fs_info->chunk_root;
10044         ret = traverse_tree_block(root1, root1->node);
10045         err |= ret;
10046
10047         root1 = root->fs_info->tree_root;
10048         ret = traverse_tree_block(root1, root1->node);
10049         err |= ret;
10050
10051         btrfs_init_path(&path);
10052         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10053         key.offset = 0;
10054         key.type = BTRFS_ROOT_ITEM_KEY;
10055
10056         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10057         if (ret) {
10058                 error("cannot find extent treet in tree_root");
10059                 goto out;
10060         }
10061
10062         while (1) {
10063                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10064                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10065                         goto next;
10066                 key.offset = (u64)-1;
10067
10068                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10069                 if (IS_ERR(cur_root) || !cur_root) {
10070                         error("failed to read tree: %lld", key.objectid);
10071                         goto next;
10072                 }
10073
10074                 ret = traverse_tree_block(cur_root, cur_root->node);
10075                 err |= ret;
10076
10077 next:
10078                 ret = btrfs_next_item(root1, &path);
10079                 if (ret)
10080                         goto out;
10081         }
10082
10083 out:
10084         btrfs_release_path(&path);
10085         return err;
10086 }
10087
10088 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10089                            struct btrfs_root *root, int overwrite)
10090 {
10091         struct extent_buffer *c;
10092         struct extent_buffer *old = root->node;
10093         int level;
10094         int ret;
10095         struct btrfs_disk_key disk_key = {0,0,0};
10096
10097         level = 0;
10098
10099         if (overwrite) {
10100                 c = old;
10101                 extent_buffer_get(c);
10102                 goto init;
10103         }
10104         c = btrfs_alloc_free_block(trans, root,
10105                                    root->nodesize,
10106                                    root->root_key.objectid,
10107                                    &disk_key, level, 0, 0);
10108         if (IS_ERR(c)) {
10109                 c = old;
10110                 extent_buffer_get(c);
10111                 overwrite = 1;
10112         }
10113 init:
10114         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10115         btrfs_set_header_level(c, level);
10116         btrfs_set_header_bytenr(c, c->start);
10117         btrfs_set_header_generation(c, trans->transid);
10118         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10119         btrfs_set_header_owner(c, root->root_key.objectid);
10120
10121         write_extent_buffer(c, root->fs_info->fsid,
10122                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10123
10124         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10125                             btrfs_header_chunk_tree_uuid(c),
10126                             BTRFS_UUID_SIZE);
10127
10128         btrfs_mark_buffer_dirty(c);
10129         /*
10130          * this case can happen in the following case:
10131          *
10132          * 1.overwrite previous root.
10133          *
10134          * 2.reinit reloc data root, this is because we skip pin
10135          * down reloc data tree before which means we can allocate
10136          * same block bytenr here.
10137          */
10138         if (old->start == c->start) {
10139                 btrfs_set_root_generation(&root->root_item,
10140                                           trans->transid);
10141                 root->root_item.level = btrfs_header_level(root->node);
10142                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10143                                         &root->root_key, &root->root_item);
10144                 if (ret) {
10145                         free_extent_buffer(c);
10146                         return ret;
10147                 }
10148         }
10149         free_extent_buffer(old);
10150         root->node = c;
10151         add_root_to_dirty_list(root);
10152         return 0;
10153 }
10154
10155 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10156                                 struct extent_buffer *eb, int tree_root)
10157 {
10158         struct extent_buffer *tmp;
10159         struct btrfs_root_item *ri;
10160         struct btrfs_key key;
10161         u64 bytenr;
10162         u32 nodesize;
10163         int level = btrfs_header_level(eb);
10164         int nritems;
10165         int ret;
10166         int i;
10167
10168         /*
10169          * If we have pinned this block before, don't pin it again.
10170          * This can not only avoid forever loop with broken filesystem
10171          * but also give us some speedups.
10172          */
10173         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10174                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10175                 return 0;
10176
10177         btrfs_pin_extent(fs_info, eb->start, eb->len);
10178
10179         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10180         nritems = btrfs_header_nritems(eb);
10181         for (i = 0; i < nritems; i++) {
10182                 if (level == 0) {
10183                         btrfs_item_key_to_cpu(eb, &key, i);
10184                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10185                                 continue;
10186                         /* Skip the extent root and reloc roots */
10187                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10188                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10189                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10190                                 continue;
10191                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10192                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10193
10194                         /*
10195                          * If at any point we start needing the real root we
10196                          * will have to build a stump root for the root we are
10197                          * in, but for now this doesn't actually use the root so
10198                          * just pass in extent_root.
10199                          */
10200                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10201                                               nodesize, 0);
10202                         if (!extent_buffer_uptodate(tmp)) {
10203                                 fprintf(stderr, "Error reading root block\n");
10204                                 return -EIO;
10205                         }
10206                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10207                         free_extent_buffer(tmp);
10208                         if (ret)
10209                                 return ret;
10210                 } else {
10211                         bytenr = btrfs_node_blockptr(eb, i);
10212
10213                         /* If we aren't the tree root don't read the block */
10214                         if (level == 1 && !tree_root) {
10215                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10216                                 continue;
10217                         }
10218
10219                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10220                                               nodesize, 0);
10221                         if (!extent_buffer_uptodate(tmp)) {
10222                                 fprintf(stderr, "Error reading tree block\n");
10223                                 return -EIO;
10224                         }
10225                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10226                         free_extent_buffer(tmp);
10227                         if (ret)
10228                                 return ret;
10229                 }
10230         }
10231
10232         return 0;
10233 }
10234
10235 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10236 {
10237         int ret;
10238
10239         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10240         if (ret)
10241                 return ret;
10242
10243         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10244 }
10245
10246 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10247 {
10248         struct btrfs_block_group_cache *cache;
10249         struct btrfs_path *path;
10250         struct extent_buffer *leaf;
10251         struct btrfs_chunk *chunk;
10252         struct btrfs_key key;
10253         int ret;
10254         u64 start;
10255
10256         path = btrfs_alloc_path();
10257         if (!path)
10258                 return -ENOMEM;
10259
10260         key.objectid = 0;
10261         key.type = BTRFS_CHUNK_ITEM_KEY;
10262         key.offset = 0;
10263
10264         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10265         if (ret < 0) {
10266                 btrfs_free_path(path);
10267                 return ret;
10268         }
10269
10270         /*
10271          * We do this in case the block groups were screwed up and had alloc
10272          * bits that aren't actually set on the chunks.  This happens with
10273          * restored images every time and could happen in real life I guess.
10274          */
10275         fs_info->avail_data_alloc_bits = 0;
10276         fs_info->avail_metadata_alloc_bits = 0;
10277         fs_info->avail_system_alloc_bits = 0;
10278
10279         /* First we need to create the in-memory block groups */
10280         while (1) {
10281                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10282                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10283                         if (ret < 0) {
10284                                 btrfs_free_path(path);
10285                                 return ret;
10286                         }
10287                         if (ret) {
10288                                 ret = 0;
10289                                 break;
10290                         }
10291                 }
10292                 leaf = path->nodes[0];
10293                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10294                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10295                         path->slots[0]++;
10296                         continue;
10297                 }
10298
10299                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10300                                        struct btrfs_chunk);
10301                 btrfs_add_block_group(fs_info, 0,
10302                                       btrfs_chunk_type(leaf, chunk),
10303                                       key.objectid, key.offset,
10304                                       btrfs_chunk_length(leaf, chunk));
10305                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10306                                  key.offset + btrfs_chunk_length(leaf, chunk),
10307                                  GFP_NOFS);
10308                 path->slots[0]++;
10309         }
10310         start = 0;
10311         while (1) {
10312                 cache = btrfs_lookup_first_block_group(fs_info, start);
10313                 if (!cache)
10314                         break;
10315                 cache->cached = 1;
10316                 start = cache->key.objectid + cache->key.offset;
10317         }
10318
10319         btrfs_free_path(path);
10320         return 0;
10321 }
10322
10323 static int reset_balance(struct btrfs_trans_handle *trans,
10324                          struct btrfs_fs_info *fs_info)
10325 {
10326         struct btrfs_root *root = fs_info->tree_root;
10327         struct btrfs_path *path;
10328         struct extent_buffer *leaf;
10329         struct btrfs_key key;
10330         int del_slot, del_nr = 0;
10331         int ret;
10332         int found = 0;
10333
10334         path = btrfs_alloc_path();
10335         if (!path)
10336                 return -ENOMEM;
10337
10338         key.objectid = BTRFS_BALANCE_OBJECTID;
10339         key.type = BTRFS_BALANCE_ITEM_KEY;
10340         key.offset = 0;
10341
10342         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10343         if (ret) {
10344                 if (ret > 0)
10345                         ret = 0;
10346                 if (!ret)
10347                         goto reinit_data_reloc;
10348                 else
10349                         goto out;
10350         }
10351
10352         ret = btrfs_del_item(trans, root, path);
10353         if (ret)
10354                 goto out;
10355         btrfs_release_path(path);
10356
10357         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10358         key.type = BTRFS_ROOT_ITEM_KEY;
10359         key.offset = 0;
10360
10361         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10362         if (ret < 0)
10363                 goto out;
10364         while (1) {
10365                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10366                         if (!found)
10367                                 break;
10368
10369                         if (del_nr) {
10370                                 ret = btrfs_del_items(trans, root, path,
10371                                                       del_slot, del_nr);
10372                                 del_nr = 0;
10373                                 if (ret)
10374                                         goto out;
10375                         }
10376                         key.offset++;
10377                         btrfs_release_path(path);
10378
10379                         found = 0;
10380                         ret = btrfs_search_slot(trans, root, &key, path,
10381                                                 -1, 1);
10382                         if (ret < 0)
10383                                 goto out;
10384                         continue;
10385                 }
10386                 found = 1;
10387                 leaf = path->nodes[0];
10388                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10389                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10390                         break;
10391                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10392                         path->slots[0]++;
10393                         continue;
10394                 }
10395                 if (!del_nr) {
10396                         del_slot = path->slots[0];
10397                         del_nr = 1;
10398                 } else {
10399                         del_nr++;
10400                 }
10401                 path->slots[0]++;
10402         }
10403
10404         if (del_nr) {
10405                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10406                 if (ret)
10407                         goto out;
10408         }
10409         btrfs_release_path(path);
10410
10411 reinit_data_reloc:
10412         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10413         key.type = BTRFS_ROOT_ITEM_KEY;
10414         key.offset = (u64)-1;
10415         root = btrfs_read_fs_root(fs_info, &key);
10416         if (IS_ERR(root)) {
10417                 fprintf(stderr, "Error reading data reloc tree\n");
10418                 ret = PTR_ERR(root);
10419                 goto out;
10420         }
10421         record_root_in_trans(trans, root);
10422         ret = btrfs_fsck_reinit_root(trans, root, 0);
10423         if (ret)
10424                 goto out;
10425         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10426 out:
10427         btrfs_free_path(path);
10428         return ret;
10429 }
10430
10431 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10432                               struct btrfs_fs_info *fs_info)
10433 {
10434         u64 start = 0;
10435         int ret;
10436
10437         /*
10438          * The only reason we don't do this is because right now we're just
10439          * walking the trees we find and pinning down their bytes, we don't look
10440          * at any of the leaves.  In order to do mixed groups we'd have to check
10441          * the leaves of any fs roots and pin down the bytes for any file
10442          * extents we find.  Not hard but why do it if we don't have to?
10443          */
10444         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10445                 fprintf(stderr, "We don't support re-initing the extent tree "
10446                         "for mixed block groups yet, please notify a btrfs "
10447                         "developer you want to do this so they can add this "
10448                         "functionality.\n");
10449                 return -EINVAL;
10450         }
10451
10452         /*
10453          * first we need to walk all of the trees except the extent tree and pin
10454          * down the bytes that are in use so we don't overwrite any existing
10455          * metadata.
10456          */
10457         ret = pin_metadata_blocks(fs_info);
10458         if (ret) {
10459                 fprintf(stderr, "error pinning down used bytes\n");
10460                 return ret;
10461         }
10462
10463         /*
10464          * Need to drop all the block groups since we're going to recreate all
10465          * of them again.
10466          */
10467         btrfs_free_block_groups(fs_info);
10468         ret = reset_block_groups(fs_info);
10469         if (ret) {
10470                 fprintf(stderr, "error resetting the block groups\n");
10471                 return ret;
10472         }
10473
10474         /* Ok we can allocate now, reinit the extent root */
10475         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10476         if (ret) {
10477                 fprintf(stderr, "extent root initialization failed\n");
10478                 /*
10479                  * When the transaction code is updated we should end the
10480                  * transaction, but for now progs only knows about commit so
10481                  * just return an error.
10482                  */
10483                 return ret;
10484         }
10485
10486         /*
10487          * Now we have all the in-memory block groups setup so we can make
10488          * allocations properly, and the metadata we care about is safe since we
10489          * pinned all of it above.
10490          */
10491         while (1) {
10492                 struct btrfs_block_group_cache *cache;
10493
10494                 cache = btrfs_lookup_first_block_group(fs_info, start);
10495                 if (!cache)
10496                         break;
10497                 start = cache->key.objectid + cache->key.offset;
10498                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10499                                         &cache->key, &cache->item,
10500                                         sizeof(cache->item));
10501                 if (ret) {
10502                         fprintf(stderr, "Error adding block group\n");
10503                         return ret;
10504                 }
10505                 btrfs_extent_post_op(trans, fs_info->extent_root);
10506         }
10507
10508         ret = reset_balance(trans, fs_info);
10509         if (ret)
10510                 fprintf(stderr, "error resetting the pending balance\n");
10511
10512         return ret;
10513 }
10514
10515 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10516 {
10517         struct btrfs_path *path;
10518         struct btrfs_trans_handle *trans;
10519         struct btrfs_key key;
10520         int ret;
10521
10522         printf("Recowing metadata block %llu\n", eb->start);
10523         key.objectid = btrfs_header_owner(eb);
10524         key.type = BTRFS_ROOT_ITEM_KEY;
10525         key.offset = (u64)-1;
10526
10527         root = btrfs_read_fs_root(root->fs_info, &key);
10528         if (IS_ERR(root)) {
10529                 fprintf(stderr, "Couldn't find owner root %llu\n",
10530                         key.objectid);
10531                 return PTR_ERR(root);
10532         }
10533
10534         path = btrfs_alloc_path();
10535         if (!path)
10536                 return -ENOMEM;
10537
10538         trans = btrfs_start_transaction(root, 1);
10539         if (IS_ERR(trans)) {
10540                 btrfs_free_path(path);
10541                 return PTR_ERR(trans);
10542         }
10543
10544         path->lowest_level = btrfs_header_level(eb);
10545         if (path->lowest_level)
10546                 btrfs_node_key_to_cpu(eb, &key, 0);
10547         else
10548                 btrfs_item_key_to_cpu(eb, &key, 0);
10549
10550         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10551         btrfs_commit_transaction(trans, root);
10552         btrfs_free_path(path);
10553         return ret;
10554 }
10555
10556 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10557 {
10558         struct btrfs_path *path;
10559         struct btrfs_trans_handle *trans;
10560         struct btrfs_key key;
10561         int ret;
10562
10563         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10564                bad->key.type, bad->key.offset);
10565         key.objectid = bad->root_id;
10566         key.type = BTRFS_ROOT_ITEM_KEY;
10567         key.offset = (u64)-1;
10568
10569         root = btrfs_read_fs_root(root->fs_info, &key);
10570         if (IS_ERR(root)) {
10571                 fprintf(stderr, "Couldn't find owner root %llu\n",
10572                         key.objectid);
10573                 return PTR_ERR(root);
10574         }
10575
10576         path = btrfs_alloc_path();
10577         if (!path)
10578                 return -ENOMEM;
10579
10580         trans = btrfs_start_transaction(root, 1);
10581         if (IS_ERR(trans)) {
10582                 btrfs_free_path(path);
10583                 return PTR_ERR(trans);
10584         }
10585
10586         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10587         if (ret) {
10588                 if (ret > 0)
10589                         ret = 0;
10590                 goto out;
10591         }
10592         ret = btrfs_del_item(trans, root, path);
10593 out:
10594         btrfs_commit_transaction(trans, root);
10595         btrfs_free_path(path);
10596         return ret;
10597 }
10598
10599 static int zero_log_tree(struct btrfs_root *root)
10600 {
10601         struct btrfs_trans_handle *trans;
10602         int ret;
10603
10604         trans = btrfs_start_transaction(root, 1);
10605         if (IS_ERR(trans)) {
10606                 ret = PTR_ERR(trans);
10607                 return ret;
10608         }
10609         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10610         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10611         ret = btrfs_commit_transaction(trans, root);
10612         return ret;
10613 }
10614
10615 static int populate_csum(struct btrfs_trans_handle *trans,
10616                          struct btrfs_root *csum_root, char *buf, u64 start,
10617                          u64 len)
10618 {
10619         u64 offset = 0;
10620         u64 sectorsize;
10621         int ret = 0;
10622
10623         while (offset < len) {
10624                 sectorsize = csum_root->sectorsize;
10625                 ret = read_extent_data(csum_root, buf, start + offset,
10626                                        &sectorsize, 0);
10627                 if (ret)
10628                         break;
10629                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10630                                             start + offset, buf, sectorsize);
10631                 if (ret)
10632                         break;
10633                 offset += sectorsize;
10634         }
10635         return ret;
10636 }
10637
10638 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10639                                       struct btrfs_root *csum_root,
10640                                       struct btrfs_root *cur_root)
10641 {
10642         struct btrfs_path *path;
10643         struct btrfs_key key;
10644         struct extent_buffer *node;
10645         struct btrfs_file_extent_item *fi;
10646         char *buf = NULL;
10647         u64 start = 0;
10648         u64 len = 0;
10649         int slot = 0;
10650         int ret = 0;
10651
10652         path = btrfs_alloc_path();
10653         if (!path)
10654                 return -ENOMEM;
10655         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10656         if (!buf) {
10657                 ret = -ENOMEM;
10658                 goto out;
10659         }
10660
10661         key.objectid = 0;
10662         key.offset = 0;
10663         key.type = 0;
10664
10665         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10666         if (ret < 0)
10667                 goto out;
10668         /* Iterate all regular file extents and fill its csum */
10669         while (1) {
10670                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10671
10672                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10673                         goto next;
10674                 node = path->nodes[0];
10675                 slot = path->slots[0];
10676                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10677                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10678                         goto next;
10679                 start = btrfs_file_extent_disk_bytenr(node, fi);
10680                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10681
10682                 ret = populate_csum(trans, csum_root, buf, start, len);
10683                 if (ret == -EEXIST)
10684                         ret = 0;
10685                 if (ret < 0)
10686                         goto out;
10687 next:
10688                 /*
10689                  * TODO: if next leaf is corrupted, jump to nearest next valid
10690                  * leaf.
10691                  */
10692                 ret = btrfs_next_item(cur_root, path);
10693                 if (ret < 0)
10694                         goto out;
10695                 if (ret > 0) {
10696                         ret = 0;
10697                         goto out;
10698                 }
10699         }
10700
10701 out:
10702         btrfs_free_path(path);
10703         free(buf);
10704         return ret;
10705 }
10706
10707 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10708                                   struct btrfs_root *csum_root)
10709 {
10710         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10711         struct btrfs_path *path;
10712         struct btrfs_root *tree_root = fs_info->tree_root;
10713         struct btrfs_root *cur_root;
10714         struct extent_buffer *node;
10715         struct btrfs_key key;
10716         int slot = 0;
10717         int ret = 0;
10718
10719         path = btrfs_alloc_path();
10720         if (!path)
10721                 return -ENOMEM;
10722
10723         key.objectid = BTRFS_FS_TREE_OBJECTID;
10724         key.offset = 0;
10725         key.type = BTRFS_ROOT_ITEM_KEY;
10726
10727         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10728         if (ret < 0)
10729                 goto out;
10730         if (ret > 0) {
10731                 ret = -ENOENT;
10732                 goto out;
10733         }
10734
10735         while (1) {
10736                 node = path->nodes[0];
10737                 slot = path->slots[0];
10738                 btrfs_item_key_to_cpu(node, &key, slot);
10739                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10740                         goto out;
10741                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10742                         goto next;
10743                 if (!is_fstree(key.objectid))
10744                         goto next;
10745                 key.offset = (u64)-1;
10746
10747                 cur_root = btrfs_read_fs_root(fs_info, &key);
10748                 if (IS_ERR(cur_root) || !cur_root) {
10749                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10750                                 key.objectid);
10751                         goto out;
10752                 }
10753                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10754                                 cur_root);
10755                 if (ret < 0)
10756                         goto out;
10757 next:
10758                 ret = btrfs_next_item(tree_root, path);
10759                 if (ret > 0) {
10760                         ret = 0;
10761                         goto out;
10762                 }
10763                 if (ret < 0)
10764                         goto out;
10765         }
10766
10767 out:
10768         btrfs_free_path(path);
10769         return ret;
10770 }
10771
10772 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10773                                       struct btrfs_root *csum_root)
10774 {
10775         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10776         struct btrfs_path *path;
10777         struct btrfs_extent_item *ei;
10778         struct extent_buffer *leaf;
10779         char *buf;
10780         struct btrfs_key key;
10781         int ret;
10782
10783         path = btrfs_alloc_path();
10784         if (!path)
10785                 return -ENOMEM;
10786
10787         key.objectid = 0;
10788         key.type = BTRFS_EXTENT_ITEM_KEY;
10789         key.offset = 0;
10790
10791         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10792         if (ret < 0) {
10793                 btrfs_free_path(path);
10794                 return ret;
10795         }
10796
10797         buf = malloc(csum_root->sectorsize);
10798         if (!buf) {
10799                 btrfs_free_path(path);
10800                 return -ENOMEM;
10801         }
10802
10803         while (1) {
10804                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10805                         ret = btrfs_next_leaf(extent_root, path);
10806                         if (ret < 0)
10807                                 break;
10808                         if (ret) {
10809                                 ret = 0;
10810                                 break;
10811                         }
10812                 }
10813                 leaf = path->nodes[0];
10814
10815                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10816                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10817                         path->slots[0]++;
10818                         continue;
10819                 }
10820
10821                 ei = btrfs_item_ptr(leaf, path->slots[0],
10822                                     struct btrfs_extent_item);
10823                 if (!(btrfs_extent_flags(leaf, ei) &
10824                       BTRFS_EXTENT_FLAG_DATA)) {
10825                         path->slots[0]++;
10826                         continue;
10827                 }
10828
10829                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10830                                     key.offset);
10831                 if (ret)
10832                         break;
10833                 path->slots[0]++;
10834         }
10835
10836         btrfs_free_path(path);
10837         free(buf);
10838         return ret;
10839 }
10840
10841 /*
10842  * Recalculate the csum and put it into the csum tree.
10843  *
10844  * Extent tree init will wipe out all the extent info, so in that case, we
10845  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10846  * will use fs/subvol trees to init the csum tree.
10847  */
10848 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10849                           struct btrfs_root *csum_root,
10850                           int search_fs_tree)
10851 {
10852         if (search_fs_tree)
10853                 return fill_csum_tree_from_fs(trans, csum_root);
10854         else
10855                 return fill_csum_tree_from_extent(trans, csum_root);
10856 }
10857
10858 static void free_roots_info_cache(void)
10859 {
10860         if (!roots_info_cache)
10861                 return;
10862
10863         while (!cache_tree_empty(roots_info_cache)) {
10864                 struct cache_extent *entry;
10865                 struct root_item_info *rii;
10866
10867                 entry = first_cache_extent(roots_info_cache);
10868                 if (!entry)
10869                         break;
10870                 remove_cache_extent(roots_info_cache, entry);
10871                 rii = container_of(entry, struct root_item_info, cache_extent);
10872                 free(rii);
10873         }
10874
10875         free(roots_info_cache);
10876         roots_info_cache = NULL;
10877 }
10878
10879 static int build_roots_info_cache(struct btrfs_fs_info *info)
10880 {
10881         int ret = 0;
10882         struct btrfs_key key;
10883         struct extent_buffer *leaf;
10884         struct btrfs_path *path;
10885
10886         if (!roots_info_cache) {
10887                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10888                 if (!roots_info_cache)
10889                         return -ENOMEM;
10890                 cache_tree_init(roots_info_cache);
10891         }
10892
10893         path = btrfs_alloc_path();
10894         if (!path)
10895                 return -ENOMEM;
10896
10897         key.objectid = 0;
10898         key.type = BTRFS_EXTENT_ITEM_KEY;
10899         key.offset = 0;
10900
10901         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10902         if (ret < 0)
10903                 goto out;
10904         leaf = path->nodes[0];
10905
10906         while (1) {
10907                 struct btrfs_key found_key;
10908                 struct btrfs_extent_item *ei;
10909                 struct btrfs_extent_inline_ref *iref;
10910                 int slot = path->slots[0];
10911                 int type;
10912                 u64 flags;
10913                 u64 root_id;
10914                 u8 level;
10915                 struct cache_extent *entry;
10916                 struct root_item_info *rii;
10917
10918                 if (slot >= btrfs_header_nritems(leaf)) {
10919                         ret = btrfs_next_leaf(info->extent_root, path);
10920                         if (ret < 0) {
10921                                 break;
10922                         } else if (ret) {
10923                                 ret = 0;
10924                                 break;
10925                         }
10926                         leaf = path->nodes[0];
10927                         slot = path->slots[0];
10928                 }
10929
10930                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10931
10932                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10933                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10934                         goto next;
10935
10936                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10937                 flags = btrfs_extent_flags(leaf, ei);
10938
10939                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10940                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10941                         goto next;
10942
10943                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10944                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10945                         level = found_key.offset;
10946                 } else {
10947                         struct btrfs_tree_block_info *binfo;
10948
10949                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10950                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10951                         level = btrfs_tree_block_level(leaf, binfo);
10952                 }
10953
10954                 /*
10955                  * For a root extent, it must be of the following type and the
10956                  * first (and only one) iref in the item.
10957                  */
10958                 type = btrfs_extent_inline_ref_type(leaf, iref);
10959                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10960                         goto next;
10961
10962                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10963                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10964                 if (!entry) {
10965                         rii = malloc(sizeof(struct root_item_info));
10966                         if (!rii) {
10967                                 ret = -ENOMEM;
10968                                 goto out;
10969                         }
10970                         rii->cache_extent.start = root_id;
10971                         rii->cache_extent.size = 1;
10972                         rii->level = (u8)-1;
10973                         entry = &rii->cache_extent;
10974                         ret = insert_cache_extent(roots_info_cache, entry);
10975                         ASSERT(ret == 0);
10976                 } else {
10977                         rii = container_of(entry, struct root_item_info,
10978                                            cache_extent);
10979                 }
10980
10981                 ASSERT(rii->cache_extent.start == root_id);
10982                 ASSERT(rii->cache_extent.size == 1);
10983
10984                 if (level > rii->level || rii->level == (u8)-1) {
10985                         rii->level = level;
10986                         rii->bytenr = found_key.objectid;
10987                         rii->gen = btrfs_extent_generation(leaf, ei);
10988                         rii->node_count = 1;
10989                 } else if (level == rii->level) {
10990                         rii->node_count++;
10991                 }
10992 next:
10993                 path->slots[0]++;
10994         }
10995
10996 out:
10997         btrfs_free_path(path);
10998
10999         return ret;
11000 }
11001
11002 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11003                                   struct btrfs_path *path,
11004                                   const struct btrfs_key *root_key,
11005                                   const int read_only_mode)
11006 {
11007         const u64 root_id = root_key->objectid;
11008         struct cache_extent *entry;
11009         struct root_item_info *rii;
11010         struct btrfs_root_item ri;
11011         unsigned long offset;
11012
11013         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11014         if (!entry) {
11015                 fprintf(stderr,
11016                         "Error: could not find extent items for root %llu\n",
11017                         root_key->objectid);
11018                 return -ENOENT;
11019         }
11020
11021         rii = container_of(entry, struct root_item_info, cache_extent);
11022         ASSERT(rii->cache_extent.start == root_id);
11023         ASSERT(rii->cache_extent.size == 1);
11024
11025         if (rii->node_count != 1) {
11026                 fprintf(stderr,
11027                         "Error: could not find btree root extent for root %llu\n",
11028                         root_id);
11029                 return -ENOENT;
11030         }
11031
11032         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11033         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11034
11035         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11036             btrfs_root_level(&ri) != rii->level ||
11037             btrfs_root_generation(&ri) != rii->gen) {
11038
11039                 /*
11040                  * If we're in repair mode but our caller told us to not update
11041                  * the root item, i.e. just check if it needs to be updated, don't
11042                  * print this message, since the caller will call us again shortly
11043                  * for the same root item without read only mode (the caller will
11044                  * open a transaction first).
11045                  */
11046                 if (!(read_only_mode && repair))
11047                         fprintf(stderr,
11048                                 "%sroot item for root %llu,"
11049                                 " current bytenr %llu, current gen %llu, current level %u,"
11050                                 " new bytenr %llu, new gen %llu, new level %u\n",
11051                                 (read_only_mode ? "" : "fixing "),
11052                                 root_id,
11053                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11054                                 btrfs_root_level(&ri),
11055                                 rii->bytenr, rii->gen, rii->level);
11056
11057                 if (btrfs_root_generation(&ri) > rii->gen) {
11058                         fprintf(stderr,
11059                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11060                                 root_id, btrfs_root_generation(&ri), rii->gen);
11061                         return -EINVAL;
11062                 }
11063
11064                 if (!read_only_mode) {
11065                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11066                         btrfs_set_root_level(&ri, rii->level);
11067                         btrfs_set_root_generation(&ri, rii->gen);
11068                         write_extent_buffer(path->nodes[0], &ri,
11069                                             offset, sizeof(ri));
11070                 }
11071
11072                 return 1;
11073         }
11074
11075         return 0;
11076 }
11077
11078 /*
11079  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11080  * caused read-only snapshots to be corrupted if they were created at a moment
11081  * when the source subvolume/snapshot had orphan items. The issue was that the
11082  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11083  * node instead of the post orphan cleanup root node.
11084  * So this function, and its callees, just detects and fixes those cases. Even
11085  * though the regression was for read-only snapshots, this function applies to
11086  * any snapshot/subvolume root.
11087  * This must be run before any other repair code - not doing it so, makes other
11088  * repair code delete or modify backrefs in the extent tree for example, which
11089  * will result in an inconsistent fs after repairing the root items.
11090  */
11091 static int repair_root_items(struct btrfs_fs_info *info)
11092 {
11093         struct btrfs_path *path = NULL;
11094         struct btrfs_key key;
11095         struct extent_buffer *leaf;
11096         struct btrfs_trans_handle *trans = NULL;
11097         int ret = 0;
11098         int bad_roots = 0;
11099         int need_trans = 0;
11100
11101         ret = build_roots_info_cache(info);
11102         if (ret)
11103                 goto out;
11104
11105         path = btrfs_alloc_path();
11106         if (!path) {
11107                 ret = -ENOMEM;
11108                 goto out;
11109         }
11110
11111         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11112         key.type = BTRFS_ROOT_ITEM_KEY;
11113         key.offset = 0;
11114
11115 again:
11116         /*
11117          * Avoid opening and committing transactions if a leaf doesn't have
11118          * any root items that need to be fixed, so that we avoid rotating
11119          * backup roots unnecessarily.
11120          */
11121         if (need_trans) {
11122                 trans = btrfs_start_transaction(info->tree_root, 1);
11123                 if (IS_ERR(trans)) {
11124                         ret = PTR_ERR(trans);
11125                         goto out;
11126                 }
11127         }
11128
11129         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11130                                 0, trans ? 1 : 0);
11131         if (ret < 0)
11132                 goto out;
11133         leaf = path->nodes[0];
11134
11135         while (1) {
11136                 struct btrfs_key found_key;
11137
11138                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11139                         int no_more_keys = find_next_key(path, &key);
11140
11141                         btrfs_release_path(path);
11142                         if (trans) {
11143                                 ret = btrfs_commit_transaction(trans,
11144                                                                info->tree_root);
11145                                 trans = NULL;
11146                                 if (ret < 0)
11147                                         goto out;
11148                         }
11149                         need_trans = 0;
11150                         if (no_more_keys)
11151                                 break;
11152                         goto again;
11153                 }
11154
11155                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11156
11157                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11158                         goto next;
11159                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11160                         goto next;
11161
11162                 ret = maybe_repair_root_item(info, path, &found_key,
11163                                              trans ? 0 : 1);
11164                 if (ret < 0)
11165                         goto out;
11166                 if (ret) {
11167                         if (!trans && repair) {
11168                                 need_trans = 1;
11169                                 key = found_key;
11170                                 btrfs_release_path(path);
11171                                 goto again;
11172                         }
11173                         bad_roots++;
11174                 }
11175 next:
11176                 path->slots[0]++;
11177         }
11178         ret = 0;
11179 out:
11180         free_roots_info_cache();
11181         btrfs_free_path(path);
11182         if (trans)
11183                 btrfs_commit_transaction(trans, info->tree_root);
11184         if (ret < 0)
11185                 return ret;
11186
11187         return bad_roots;
11188 }
11189
11190 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11191 {
11192         struct btrfs_trans_handle *trans;
11193         struct btrfs_block_group_cache *bg_cache;
11194         u64 current = 0;
11195         int ret = 0;
11196
11197         /* Clear all free space cache inodes and its extent data */
11198         while (1) {
11199                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11200                 if (!bg_cache)
11201                         break;
11202                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11203                 if (ret < 0)
11204                         return ret;
11205                 current = bg_cache->key.objectid + bg_cache->key.offset;
11206         }
11207
11208         /* Don't forget to set cache_generation to -1 */
11209         trans = btrfs_start_transaction(fs_info->tree_root, 0);
11210         if (IS_ERR(trans)) {
11211                 error("failed to update super block cache generation");
11212                 return PTR_ERR(trans);
11213         }
11214         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11215         btrfs_commit_transaction(trans, fs_info->tree_root);
11216
11217         return ret;
11218 }
11219
11220 const char * const cmd_check_usage[] = {
11221         "btrfs check [options] <device>",
11222         "Check structural integrity of a filesystem (unmounted).",
11223         "Check structural integrity of an unmounted filesystem. Verify internal",
11224         "trees' consistency and item connectivity. In the repair mode try to",
11225         "fix the problems found. ",
11226         "WARNING: the repair mode is considered dangerous",
11227         "",
11228         "-s|--super <superblock>     use this superblock copy",
11229         "-b|--backup                 use the first valid backup root copy",
11230         "--repair                    try to repair the filesystem",
11231         "--readonly                  run in read-only mode (default)",
11232         "--init-csum-tree            create a new CRC tree",
11233         "--init-extent-tree          create a new extent tree",
11234         "--mode <MODE>               allows choice of memory/IO trade-offs",
11235         "                            where MODE is one of:",
11236         "                            original - read inodes and extents to memory (requires",
11237         "                                       more memory, does less IO)",
11238         "                            lowmem   - try to use less memory but read blocks again",
11239         "                                       when needed",
11240         "--check-data-csum           verify checksums of data blocks",
11241         "-Q|--qgroup-report          print a report on qgroup consistency",
11242         "-E|--subvol-extents <subvolid>",
11243         "                            print subvolume extents and sharing state",
11244         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11245         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11246         "-p|--progress               indicate progress",
11247         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
11248         "                            NOTE: v1 support implemented",
11249         NULL
11250 };
11251
11252 int cmd_check(int argc, char **argv)
11253 {
11254         struct cache_tree root_cache;
11255         struct btrfs_root *root;
11256         struct btrfs_fs_info *info;
11257         u64 bytenr = 0;
11258         u64 subvolid = 0;
11259         u64 tree_root_bytenr = 0;
11260         u64 chunk_root_bytenr = 0;
11261         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11262         int ret;
11263         u64 num;
11264         int init_csum_tree = 0;
11265         int readonly = 0;
11266         int clear_space_cache = 0;
11267         int qgroup_report = 0;
11268         int qgroups_repaired = 0;
11269         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11270
11271         while(1) {
11272                 int c;
11273                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11274                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11275                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11276                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11277                 static const struct option long_options[] = {
11278                         { "super", required_argument, NULL, 's' },
11279                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11280                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11281                         { "init-csum-tree", no_argument, NULL,
11282                                 GETOPT_VAL_INIT_CSUM },
11283                         { "init-extent-tree", no_argument, NULL,
11284                                 GETOPT_VAL_INIT_EXTENT },
11285                         { "check-data-csum", no_argument, NULL,
11286                                 GETOPT_VAL_CHECK_CSUM },
11287                         { "backup", no_argument, NULL, 'b' },
11288                         { "subvol-extents", required_argument, NULL, 'E' },
11289                         { "qgroup-report", no_argument, NULL, 'Q' },
11290                         { "tree-root", required_argument, NULL, 'r' },
11291                         { "chunk-root", required_argument, NULL,
11292                                 GETOPT_VAL_CHUNK_TREE },
11293                         { "progress", no_argument, NULL, 'p' },
11294                         { "mode", required_argument, NULL,
11295                                 GETOPT_VAL_MODE },
11296                         { "clear-space-cache", required_argument, NULL,
11297                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
11298                         { NULL, 0, NULL, 0}
11299                 };
11300
11301                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11302                 if (c < 0)
11303                         break;
11304                 switch(c) {
11305                         case 'a': /* ignored */ break;
11306                         case 'b':
11307                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11308                                 break;
11309                         case 's':
11310                                 num = arg_strtou64(optarg);
11311                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11312                                         error(
11313                                         "super mirror should be less than %d",
11314                                                 BTRFS_SUPER_MIRROR_MAX);
11315                                         exit(1);
11316                                 }
11317                                 bytenr = btrfs_sb_offset(((int)num));
11318                                 printf("using SB copy %llu, bytenr %llu\n", num,
11319                                        (unsigned long long)bytenr);
11320                                 break;
11321                         case 'Q':
11322                                 qgroup_report = 1;
11323                                 break;
11324                         case 'E':
11325                                 subvolid = arg_strtou64(optarg);
11326                                 break;
11327                         case 'r':
11328                                 tree_root_bytenr = arg_strtou64(optarg);
11329                                 break;
11330                         case GETOPT_VAL_CHUNK_TREE:
11331                                 chunk_root_bytenr = arg_strtou64(optarg);
11332                                 break;
11333                         case 'p':
11334                                 ctx.progress_enabled = true;
11335                                 break;
11336                         case '?':
11337                         case 'h':
11338                                 usage(cmd_check_usage);
11339                         case GETOPT_VAL_REPAIR:
11340                                 printf("enabling repair mode\n");
11341                                 repair = 1;
11342                                 ctree_flags |= OPEN_CTREE_WRITES;
11343                                 break;
11344                         case GETOPT_VAL_READONLY:
11345                                 readonly = 1;
11346                                 break;
11347                         case GETOPT_VAL_INIT_CSUM:
11348                                 printf("Creating a new CRC tree\n");
11349                                 init_csum_tree = 1;
11350                                 repair = 1;
11351                                 ctree_flags |= OPEN_CTREE_WRITES;
11352                                 break;
11353                         case GETOPT_VAL_INIT_EXTENT:
11354                                 init_extent_tree = 1;
11355                                 ctree_flags |= (OPEN_CTREE_WRITES |
11356                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11357                                 repair = 1;
11358                                 break;
11359                         case GETOPT_VAL_CHECK_CSUM:
11360                                 check_data_csum = 1;
11361                                 break;
11362                         case GETOPT_VAL_MODE:
11363                                 check_mode = parse_check_mode(optarg);
11364                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11365                                         error("unknown mode: %s", optarg);
11366                                         exit(1);
11367                                 }
11368                                 break;
11369                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
11370                                 if (strcmp(optarg, "v1") != 0) {
11371                                         error(
11372                         "only v1 support implmented, unrecognized value %s",
11373                         optarg);
11374                                         exit(1);
11375                                 }
11376                                 clear_space_cache = 1;
11377                                 ctree_flags |= OPEN_CTREE_WRITES;
11378                                 break;
11379                 }
11380         }
11381
11382         if (check_argc_exact(argc - optind, 1))
11383                 usage(cmd_check_usage);
11384
11385         if (ctx.progress_enabled) {
11386                 ctx.tp = TASK_NOTHING;
11387                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11388         }
11389
11390         /* This check is the only reason for --readonly to exist */
11391         if (readonly && repair) {
11392                 error("repair options are not compatible with --readonly");
11393                 exit(1);
11394         }
11395
11396         /*
11397          * Not supported yet
11398          */
11399         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11400                 error("low memory mode doesn't support repair yet");
11401                 exit(1);
11402         }
11403
11404         radix_tree_init();
11405         cache_tree_init(&root_cache);
11406
11407         if((ret = check_mounted(argv[optind])) < 0) {
11408                 error("could not check mount status: %s", strerror(-ret));
11409                 goto err_out;
11410         } else if(ret) {
11411                 error("%s is currently mounted, aborting", argv[optind]);
11412                 ret = -EBUSY;
11413                 goto err_out;
11414         }
11415
11416         /* only allow partial opening under repair mode */
11417         if (repair)
11418                 ctree_flags |= OPEN_CTREE_PARTIAL;
11419
11420         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11421                                   chunk_root_bytenr, ctree_flags);
11422         if (!info) {
11423                 error("cannot open file system");
11424                 ret = -EIO;
11425                 goto err_out;
11426         }
11427
11428         global_info = info;
11429         root = info->fs_root;
11430         if (clear_space_cache) {
11431                 if (btrfs_fs_compat_ro(info,
11432                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11433                         error(
11434                         "free space cache v2 detected, clearing not implemented");
11435                         ret = 1;
11436                         goto close_out;
11437                 }
11438                 printf("Clearing free space cache\n");
11439                 ret = clear_free_space_cache(info);
11440                 if (ret) {
11441                         error("failed to clear free space cache");
11442                         ret = 1;
11443                 } else {
11444                         printf("Free space cache cleared\n");
11445                 }
11446                 goto close_out;
11447         }
11448
11449         /*
11450          * repair mode will force us to commit transaction which
11451          * will make us fail to load log tree when mounting.
11452          */
11453         if (repair && btrfs_super_log_root(info->super_copy)) {
11454                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11455                 if (!ret) {
11456                         ret = 1;
11457                         goto close_out;
11458                 }
11459                 ret = zero_log_tree(root);
11460                 if (ret) {
11461                         error("failed to zero log tree: %d", ret);
11462                         goto close_out;
11463                 }
11464         }
11465
11466         uuid_unparse(info->super_copy->fsid, uuidbuf);
11467         if (qgroup_report) {
11468                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11469                        uuidbuf);
11470                 ret = qgroup_verify_all(info);
11471                 if (ret == 0)
11472                         report_qgroups(1);
11473                 goto close_out;
11474         }
11475         if (subvolid) {
11476                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11477                        subvolid, argv[optind], uuidbuf);
11478                 ret = print_extent_state(info, subvolid);
11479                 goto close_out;
11480         }
11481         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11482
11483         if (!extent_buffer_uptodate(info->tree_root->node) ||
11484             !extent_buffer_uptodate(info->dev_root->node) ||
11485             !extent_buffer_uptodate(info->chunk_root->node)) {
11486                 error("critical roots corrupted, unable to check the filesystem");
11487                 ret = -EIO;
11488                 goto close_out;
11489         }
11490
11491         if (init_extent_tree || init_csum_tree) {
11492                 struct btrfs_trans_handle *trans;
11493
11494                 trans = btrfs_start_transaction(info->extent_root, 0);
11495                 if (IS_ERR(trans)) {
11496                         error("error starting transaction");
11497                         ret = PTR_ERR(trans);
11498                         goto close_out;
11499                 }
11500
11501                 if (init_extent_tree) {
11502                         printf("Creating a new extent tree\n");
11503                         ret = reinit_extent_tree(trans, info);
11504                         if (ret)
11505                                 goto close_out;
11506                 }
11507
11508                 if (init_csum_tree) {
11509                         printf("Reinitialize checksum tree\n");
11510                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11511                         if (ret) {
11512                                 error("checksum tree initialization failed: %d",
11513                                                 ret);
11514                                 ret = -EIO;
11515                                 goto close_out;
11516                         }
11517
11518                         ret = fill_csum_tree(trans, info->csum_root,
11519                                              init_extent_tree);
11520                         if (ret) {
11521                                 error("checksum tree refilling failed: %d", ret);
11522                                 return -EIO;
11523                         }
11524                 }
11525                 /*
11526                  * Ok now we commit and run the normal fsck, which will add
11527                  * extent entries for all of the items it finds.
11528                  */
11529                 ret = btrfs_commit_transaction(trans, info->extent_root);
11530                 if (ret)
11531                         goto close_out;
11532         }
11533         if (!extent_buffer_uptodate(info->extent_root->node)) {
11534                 error("critical: extent_root, unable to check the filesystem");
11535                 ret = -EIO;
11536                 goto close_out;
11537         }
11538         if (!extent_buffer_uptodate(info->csum_root->node)) {
11539                 error("critical: csum_root, unable to check the filesystem");
11540                 ret = -EIO;
11541                 goto close_out;
11542         }
11543
11544         if (!ctx.progress_enabled)
11545                 printf("checking extents");
11546         if (check_mode == CHECK_MODE_LOWMEM)
11547                 ret = check_chunks_and_extents_v2(root);
11548         else
11549                 ret = check_chunks_and_extents(root);
11550         if (ret)
11551                 printf("Errors found in extent allocation tree or chunk allocation");
11552
11553         ret = repair_root_items(info);
11554         if (ret < 0)
11555                 goto close_out;
11556         if (repair) {
11557                 fprintf(stderr, "Fixed %d roots.\n", ret);
11558                 ret = 0;
11559         } else if (ret > 0) {
11560                 fprintf(stderr,
11561                        "Found %d roots with an outdated root item.\n",
11562                        ret);
11563                 fprintf(stderr,
11564                         "Please run a filesystem check with the option --repair to fix them.\n");
11565                 ret = 1;
11566                 goto close_out;
11567         }
11568
11569         if (!ctx.progress_enabled) {
11570                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11571                         fprintf(stderr, "checking free space tree\n");
11572                 else
11573                         fprintf(stderr, "checking free space cache\n");
11574         }
11575         ret = check_space_cache(root);
11576         if (ret)
11577                 goto out;
11578
11579         /*
11580          * We used to have to have these hole extents in between our real
11581          * extents so if we don't have this flag set we need to make sure there
11582          * are no gaps in the file extents for inodes, otherwise we can just
11583          * ignore it when this happens.
11584          */
11585         no_holes = btrfs_fs_incompat(root->fs_info,
11586                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11587         if (!ctx.progress_enabled)
11588                 fprintf(stderr, "checking fs roots\n");
11589         ret = check_fs_roots(root, &root_cache);
11590         if (ret)
11591                 goto out;
11592
11593         fprintf(stderr, "checking csums\n");
11594         ret = check_csums(root);
11595         if (ret)
11596                 goto out;
11597
11598         fprintf(stderr, "checking root refs\n");
11599         ret = check_root_refs(root, &root_cache);
11600         if (ret)
11601                 goto out;
11602
11603         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11604                 struct extent_buffer *eb;
11605
11606                 eb = list_first_entry(&root->fs_info->recow_ebs,
11607                                       struct extent_buffer, recow);
11608                 list_del_init(&eb->recow);
11609                 ret = recow_extent_buffer(root, eb);
11610                 if (ret)
11611                         break;
11612         }
11613
11614         while (!list_empty(&delete_items)) {
11615                 struct bad_item *bad;
11616
11617                 bad = list_first_entry(&delete_items, struct bad_item, list);
11618                 list_del_init(&bad->list);
11619                 if (repair)
11620                         ret = delete_bad_item(root, bad);
11621                 free(bad);
11622         }
11623
11624         if (info->quota_enabled) {
11625                 int err;
11626                 fprintf(stderr, "checking quota groups\n");
11627                 err = qgroup_verify_all(info);
11628                 if (err)
11629                         goto out;
11630                 report_qgroups(0);
11631                 err = repair_qgroups(info, &qgroups_repaired);
11632                 if (err)
11633                         goto out;
11634         }
11635
11636         if (!list_empty(&root->fs_info->recow_ebs)) {
11637                 error("transid errors in file system");
11638                 ret = 1;
11639         }
11640 out:
11641         /* Don't override original ret */
11642         if (!ret && qgroups_repaired)
11643                 ret = qgroups_repaired;
11644
11645         if (found_old_backref) { /*
11646                  * there was a disk format change when mixed
11647                  * backref was in testing tree. The old format
11648                  * existed about one week.
11649                  */
11650                 printf("\n * Found old mixed backref format. "
11651                        "The old format is not supported! *"
11652                        "\n * Please mount the FS in readonly mode, "
11653                        "backup data and re-format the FS. *\n\n");
11654                 ret = 1;
11655         }
11656         printf("found %llu bytes used err is %d\n",
11657                (unsigned long long)bytes_used, ret);
11658         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11659         printf("total tree bytes: %llu\n",
11660                (unsigned long long)total_btree_bytes);
11661         printf("total fs tree bytes: %llu\n",
11662                (unsigned long long)total_fs_tree_bytes);
11663         printf("total extent tree bytes: %llu\n",
11664                (unsigned long long)total_extent_tree_bytes);
11665         printf("btree space waste bytes: %llu\n",
11666                (unsigned long long)btree_space_waste);
11667         printf("file data blocks allocated: %llu\n referenced %llu\n",
11668                 (unsigned long long)data_bytes_allocated,
11669                 (unsigned long long)data_bytes_referenced);
11670
11671         free_qgroup_counts();
11672         free_root_recs_tree(&root_cache);
11673 close_out:
11674         close_ctree(root);
11675 err_out:
11676         if (ctx.progress_enabled)
11677                 task_deinit(ctx.info);
11678
11679         return ret;
11680 }