btrfs-progs: dump-tree: fix crash on unrecognized tree id
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
96 {
97         return list_entry(entry, struct extent_backref, list);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 /*
121  * Much like data_backref, just removed the undetermined members
122  * and change it to use list_head.
123  * During extent scan, it is stored in root->orphan_data_extent.
124  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
125  */
126 struct orphan_data_extent {
127         struct list_head list;
128         u64 root;
129         u64 objectid;
130         u64 offset;
131         u64 disk_bytenr;
132         u64 disk_len;
133 };
134
135 struct tree_backref {
136         struct extent_backref node;
137         union {
138                 u64 parent;
139                 u64 root;
140         };
141 };
142
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
144 {
145         return container_of(back, struct tree_backref, node);
146 }
147
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
150
151 struct extent_record {
152         struct list_head backrefs;
153         struct list_head dups;
154         struct list_head list;
155         struct cache_extent cache;
156         struct btrfs_disk_key parent_key;
157         u64 start;
158         u64 max_size;
159         u64 nr;
160         u64 refs;
161         u64 extent_item_refs;
162         u64 generation;
163         u64 parent_generation;
164         u64 info_objectid;
165         u32 num_duplicates;
166         u8 info_level;
167         unsigned int flag_block_full_backref:2;
168         unsigned int found_rec:1;
169         unsigned int content_checked:1;
170         unsigned int owner_ref_checked:1;
171         unsigned int is_root:1;
172         unsigned int metadata:1;
173         unsigned int bad_full_backref:1;
174         unsigned int crossing_stripes:1;
175         unsigned int wrong_chunk_type:1;
176 };
177
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
179 {
180         return container_of(entry, struct extent_record, list);
181 }
182
183 struct inode_backref {
184         struct list_head list;
185         unsigned int found_dir_item:1;
186         unsigned int found_dir_index:1;
187         unsigned int found_inode_ref:1;
188         unsigned int filetype:8;
189         int errors;
190         unsigned int ref_type;
191         u64 dir;
192         u64 index;
193         u16 namelen;
194         char name[0];
195 };
196
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
198 {
199         return list_entry(entry, struct inode_backref, list);
200 }
201
202 struct root_item_record {
203         struct list_head list;
204         u64 objectid;
205         u64 bytenr;
206         u64 last_snapshot;
207         u8 level;
208         u8 drop_level;
209         int level_size;
210         struct btrfs_key drop_key;
211 };
212
213 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
215 #define REF_ERR_NO_INODE_REF            (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
218 #define REF_ERR_DUP_INODE_REF           (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF             (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
226
227 struct file_extent_hole {
228         struct rb_node node;
229         u64 start;
230         u64 len;
231 };
232
233 struct inode_record {
234         struct list_head backrefs;
235         unsigned int checked:1;
236         unsigned int merging:1;
237         unsigned int found_inode_item:1;
238         unsigned int found_dir_item:1;
239         unsigned int found_file_extent:1;
240         unsigned int found_csum_item:1;
241         unsigned int some_csum_missing:1;
242         unsigned int nodatasum:1;
243         int errors;
244
245         u64 ino;
246         u32 nlink;
247         u32 imode;
248         u64 isize;
249         u64 nbytes;
250
251         u32 found_link;
252         u64 found_size;
253         u64 extent_start;
254         u64 extent_end;
255         struct rb_root holes;
256         struct list_head orphan_extents;
257
258         u32 refs;
259 };
260
261 #define I_ERR_NO_INODE_ITEM             (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
276
277 struct root_backref {
278         struct list_head list;
279         unsigned int found_dir_item:1;
280         unsigned int found_dir_index:1;
281         unsigned int found_back_ref:1;
282         unsigned int found_forward_ref:1;
283         unsigned int reachable:1;
284         int errors;
285         u64 ref_root;
286         u64 dir;
287         u64 index;
288         u16 namelen;
289         char name[0];
290 };
291
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
293 {
294         return list_entry(entry, struct root_backref, list);
295 }
296
297 struct root_record {
298         struct list_head backrefs;
299         struct cache_extent cache;
300         unsigned int found_root_item:1;
301         u64 objectid;
302         u32 found_ref;
303 };
304
305 struct ptr_node {
306         struct cache_extent cache;
307         void *data;
308 };
309
310 struct shared_node {
311         struct cache_extent cache;
312         struct cache_tree root_cache;
313         struct cache_tree inode_cache;
314         struct inode_record *current;
315         u32 refs;
316 };
317
318 struct block_info {
319         u64 start;
320         u32 size;
321 };
322
323 struct walk_control {
324         struct cache_tree shared;
325         struct shared_node *nodes[BTRFS_MAX_LEVEL];
326         int active_node;
327         int root_level;
328 };
329
330 struct bad_item {
331         struct btrfs_key key;
332         u64 root_id;
333         struct list_head list;
334 };
335
336 struct extent_entry {
337         u64 bytenr;
338         u64 bytes;
339         int count;
340         int broken;
341         struct list_head list;
342 };
343
344 struct root_item_info {
345         /* level of the root */
346         u8 level;
347         /* number of nodes at this level, must be 1 for a root */
348         int node_count;
349         u64 bytenr;
350         u64 gen;
351         struct cache_extent cache_extent;
352 };
353
354 /*
355  * Error bit for low memory mode check.
356  *
357  * Currently no caller cares about it yet.  Just internal use for error
358  * classification.
359  */
360 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH     (1 << 8)
370
371 static void *print_status_check(void *p)
372 {
373         struct task_ctx *priv = p;
374         const char work_indicator[] = { '.', 'o', 'O', 'o' };
375         uint32_t count = 0;
376         static char *task_position_string[] = {
377                 "checking extents",
378                 "checking free space cache",
379                 "checking fs roots",
380         };
381
382         task_period_start(priv->info, 1000 /* 1s */);
383
384         if (priv->tp == TASK_NOTHING)
385                 return NULL;
386
387         while (1) {
388                 printf("%s [%c]\r", task_position_string[priv->tp],
389                                 work_indicator[count % 4]);
390                 count++;
391                 fflush(stdout);
392                 task_period_wait(priv->info);
393         }
394         return NULL;
395 }
396
397 static int print_status_return(void *p)
398 {
399         printf("\n");
400         fflush(stdout);
401
402         return 0;
403 }
404
405 static enum btrfs_check_mode parse_check_mode(const char *str)
406 {
407         if (strcmp(str, "lowmem") == 0)
408                 return CHECK_MODE_LOWMEM;
409         if (strcmp(str, "orig") == 0)
410                 return CHECK_MODE_ORIGINAL;
411         if (strcmp(str, "original") == 0)
412                 return CHECK_MODE_ORIGINAL;
413
414         return CHECK_MODE_UNKNOWN;
415 }
416
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
419 {
420         struct file_extent_hole *hole;
421
422         if (RB_EMPTY_ROOT(holes))
423                 return (u64)-1;
424
425         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
426         return hole->start;
427 }
428
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
430 {
431         struct file_extent_hole *hole1;
432         struct file_extent_hole *hole2;
433
434         hole1 = rb_entry(node1, struct file_extent_hole, node);
435         hole2 = rb_entry(node2, struct file_extent_hole, node);
436
437         if (hole1->start > hole2->start)
438                 return -1;
439         if (hole1->start < hole2->start)
440                 return 1;
441         /* Now hole1->start == hole2->start */
442         if (hole1->len >= hole2->len)
443                 /*
444                  * Hole 1 will be merge center
445                  * Same hole will be merged later
446                  */
447                 return -1;
448         /* Hole 2 will be merge center */
449         return 1;
450 }
451
452 /*
453  * Add a hole to the record
454  *
455  * This will do hole merge for copy_file_extent_holes(),
456  * which will ensure there won't be continuous holes.
457  */
458 static int add_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole *prev = NULL;
463         struct file_extent_hole *next = NULL;
464
465         hole = malloc(sizeof(*hole));
466         if (!hole)
467                 return -ENOMEM;
468         hole->start = start;
469         hole->len = len;
470         /* Since compare will not return 0, no -EEXIST will happen */
471         rb_insert(holes, &hole->node, compare_hole);
472
473         /* simple merge with previous hole */
474         if (rb_prev(&hole->node))
475                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
476                                 node);
477         if (prev && prev->start + prev->len >= hole->start) {
478                 hole->len = hole->start + hole->len - prev->start;
479                 hole->start = prev->start;
480                 rb_erase(&prev->node, holes);
481                 free(prev);
482                 prev = NULL;
483         }
484
485         /* iterate merge with next holes */
486         while (1) {
487                 if (!rb_next(&hole->node))
488                         break;
489                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
490                                         node);
491                 if (hole->start + hole->len >= next->start) {
492                         if (hole->start + hole->len <= next->start + next->len)
493                                 hole->len = next->start + next->len -
494                                             hole->start;
495                         rb_erase(&next->node, holes);
496                         free(next);
497                         next = NULL;
498                 } else
499                         break;
500         }
501         return 0;
502 }
503
504 static int compare_hole_range(struct rb_node *node, void *data)
505 {
506         struct file_extent_hole *hole;
507         u64 start;
508
509         hole = (struct file_extent_hole *)data;
510         start = hole->start;
511
512         hole = rb_entry(node, struct file_extent_hole, node);
513         if (start < hole->start)
514                 return -1;
515         if (start >= hole->start && start < hole->start + hole->len)
516                 return 0;
517         return 1;
518 }
519
520 /*
521  * Delete a hole in the record
522  *
523  * This will do the hole split and is much restrict than add.
524  */
525 static int del_file_extent_hole(struct rb_root *holes,
526                                 u64 start, u64 len)
527 {
528         struct file_extent_hole *hole;
529         struct file_extent_hole tmp;
530         u64 prev_start = 0;
531         u64 prev_len = 0;
532         u64 next_start = 0;
533         u64 next_len = 0;
534         struct rb_node *node;
535         int have_prev = 0;
536         int have_next = 0;
537         int ret = 0;
538
539         tmp.start = start;
540         tmp.len = len;
541         node = rb_search(holes, &tmp, compare_hole_range, NULL);
542         if (!node)
543                 return -EEXIST;
544         hole = rb_entry(node, struct file_extent_hole, node);
545         if (start + len > hole->start + hole->len)
546                 return -EEXIST;
547
548         /*
549          * Now there will be no overlap, delete the hole and re-add the
550          * split(s) if they exists.
551          */
552         if (start > hole->start) {
553                 prev_start = hole->start;
554                 prev_len = start - hole->start;
555                 have_prev = 1;
556         }
557         if (hole->start + hole->len > start + len) {
558                 next_start = start + len;
559                 next_len = hole->start + hole->len - start - len;
560                 have_next = 1;
561         }
562         rb_erase(node, holes);
563         free(hole);
564         if (have_prev) {
565                 ret = add_file_extent_hole(holes, prev_start, prev_len);
566                 if (ret < 0)
567                         return ret;
568         }
569         if (have_next) {
570                 ret = add_file_extent_hole(holes, next_start, next_len);
571                 if (ret < 0)
572                         return ret;
573         }
574         return 0;
575 }
576
577 static int copy_file_extent_holes(struct rb_root *dst,
578                                   struct rb_root *src)
579 {
580         struct file_extent_hole *hole;
581         struct rb_node *node;
582         int ret = 0;
583
584         node = rb_first(src);
585         while (node) {
586                 hole = rb_entry(node, struct file_extent_hole, node);
587                 ret = add_file_extent_hole(dst, hole->start, hole->len);
588                 if (ret)
589                         break;
590                 node = rb_next(node);
591         }
592         return ret;
593 }
594
595 static void free_file_extent_holes(struct rb_root *holes)
596 {
597         struct rb_node *node;
598         struct file_extent_hole *hole;
599
600         node = rb_first(holes);
601         while (node) {
602                 hole = rb_entry(node, struct file_extent_hole, node);
603                 rb_erase(node, holes);
604                 free(hole);
605                 node = rb_first(holes);
606         }
607 }
608
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
610
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612                                  struct btrfs_root *root)
613 {
614         if (root->last_trans != trans->transid) {
615                 root->track_dirty = 1;
616                 root->last_trans = trans->transid;
617                 root->commit_root = root->node;
618                 extent_buffer_get(root->node);
619         }
620 }
621
622 static u8 imode_to_type(u32 imode)
623 {
624 #define S_SHIFT 12
625         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
627                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
628                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
629                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
630                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
631                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
632                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
633         };
634
635         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
636 #undef S_SHIFT
637 }
638
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
640 {
641         struct device_record *rec1;
642         struct device_record *rec2;
643
644         rec1 = rb_entry(node1, struct device_record, node);
645         rec2 = rb_entry(node2, struct device_record, node);
646         if (rec1->devid > rec2->devid)
647                 return -1;
648         else if (rec1->devid < rec2->devid)
649                 return 1;
650         else
651                 return 0;
652 }
653
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
655 {
656         struct inode_record *rec;
657         struct inode_backref *backref;
658         struct inode_backref *orig;
659         struct inode_backref *tmp;
660         struct orphan_data_extent *src_orphan;
661         struct orphan_data_extent *dst_orphan;
662         struct rb_node *rb;
663         size_t size;
664         int ret;
665
666         rec = malloc(sizeof(*rec));
667         if (!rec)
668                 return ERR_PTR(-ENOMEM);
669         memcpy(rec, orig_rec, sizeof(*rec));
670         rec->refs = 1;
671         INIT_LIST_HEAD(&rec->backrefs);
672         INIT_LIST_HEAD(&rec->orphan_extents);
673         rec->holes = RB_ROOT;
674
675         list_for_each_entry(orig, &orig_rec->backrefs, list) {
676                 size = sizeof(*orig) + orig->namelen + 1;
677                 backref = malloc(size);
678                 if (!backref) {
679                         ret = -ENOMEM;
680                         goto cleanup;
681                 }
682                 memcpy(backref, orig, size);
683                 list_add_tail(&backref->list, &rec->backrefs);
684         }
685         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686                 dst_orphan = malloc(sizeof(*dst_orphan));
687                 if (!dst_orphan) {
688                         ret = -ENOMEM;
689                         goto cleanup;
690                 }
691                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693         }
694         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
695         if (ret < 0)
696                 goto cleanup_rb;
697
698         return rec;
699
700 cleanup_rb:
701         rb = rb_first(&rec->holes);
702         while (rb) {
703                 struct file_extent_hole *hole;
704
705                 hole = rb_entry(rb, struct file_extent_hole, node);
706                 rb = rb_next(rb);
707                 free(hole);
708         }
709
710 cleanup:
711         if (!list_empty(&rec->backrefs))
712                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713                         list_del(&orig->list);
714                         free(orig);
715                 }
716
717         if (!list_empty(&rec->orphan_extents))
718                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719                         list_del(&orig->list);
720                         free(orig);
721                 }
722
723         free(rec);
724
725         return ERR_PTR(ret);
726 }
727
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
729                                       u64 objectid)
730 {
731         struct orphan_data_extent *orphan;
732
733         if (list_empty(orphan_extents))
734                 return;
735         printf("The following data extent is lost in tree %llu:\n",
736                objectid);
737         list_for_each_entry(orphan, orphan_extents, list) {
738                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
740                        orphan->disk_len);
741         }
742 }
743
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
745 {
746         u64 root_objectid = root->root_key.objectid;
747         int errors = rec->errors;
748
749         if (!errors)
750                 return;
751         /* reloc root errors, we print its corresponding fs root objectid*/
752         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753                 root_objectid = root->root_key.offset;
754                 fprintf(stderr, "reloc");
755         }
756         fprintf(stderr, "root %llu inode %llu errors %x",
757                 (unsigned long long) root_objectid,
758                 (unsigned long long) rec->ino, rec->errors);
759
760         if (errors & I_ERR_NO_INODE_ITEM)
761                 fprintf(stderr, ", no inode item");
762         if (errors & I_ERR_NO_ORPHAN_ITEM)
763                 fprintf(stderr, ", no orphan item");
764         if (errors & I_ERR_DUP_INODE_ITEM)
765                 fprintf(stderr, ", dup inode item");
766         if (errors & I_ERR_DUP_DIR_INDEX)
767                 fprintf(stderr, ", dup dir index");
768         if (errors & I_ERR_ODD_DIR_ITEM)
769                 fprintf(stderr, ", odd dir item");
770         if (errors & I_ERR_ODD_FILE_EXTENT)
771                 fprintf(stderr, ", odd file extent");
772         if (errors & I_ERR_BAD_FILE_EXTENT)
773                 fprintf(stderr, ", bad file extent");
774         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775                 fprintf(stderr, ", file extent overlap");
776         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777                 fprintf(stderr, ", file extent discount");
778         if (errors & I_ERR_DIR_ISIZE_WRONG)
779                 fprintf(stderr, ", dir isize wrong");
780         if (errors & I_ERR_FILE_NBYTES_WRONG)
781                 fprintf(stderr, ", nbytes wrong");
782         if (errors & I_ERR_ODD_CSUM_ITEM)
783                 fprintf(stderr, ", odd csum item");
784         if (errors & I_ERR_SOME_CSUM_MISSING)
785                 fprintf(stderr, ", some csum missing");
786         if (errors & I_ERR_LINK_COUNT_WRONG)
787                 fprintf(stderr, ", link count wrong");
788         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789                 fprintf(stderr, ", orphan file extent");
790         fprintf(stderr, "\n");
791         /* Print the orphan extents if needed */
792         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
794
795         /* Print the holes if needed */
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797                 struct file_extent_hole *hole;
798                 struct rb_node *node;
799                 int found = 0;
800
801                 node = rb_first(&rec->holes);
802                 fprintf(stderr, "Found file extent holes:\n");
803                 while (node) {
804                         found = 1;
805                         hole = rb_entry(node, struct file_extent_hole, node);
806                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
807                                 hole->start, hole->len);
808                         node = rb_next(node);
809                 }
810                 if (!found)
811                         fprintf(stderr, "\tstart: 0, len: %llu\n",
812                                 round_up(rec->isize, root->sectorsize));
813         }
814 }
815
816 static void print_ref_error(int errors)
817 {
818         if (errors & REF_ERR_NO_DIR_ITEM)
819                 fprintf(stderr, ", no dir item");
820         if (errors & REF_ERR_NO_DIR_INDEX)
821                 fprintf(stderr, ", no dir index");
822         if (errors & REF_ERR_NO_INODE_REF)
823                 fprintf(stderr, ", no inode ref");
824         if (errors & REF_ERR_DUP_DIR_ITEM)
825                 fprintf(stderr, ", dup dir item");
826         if (errors & REF_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & REF_ERR_DUP_INODE_REF)
829                 fprintf(stderr, ", dup inode ref");
830         if (errors & REF_ERR_INDEX_UNMATCH)
831                 fprintf(stderr, ", index mismatch");
832         if (errors & REF_ERR_FILETYPE_UNMATCH)
833                 fprintf(stderr, ", filetype mismatch");
834         if (errors & REF_ERR_NAME_TOO_LONG)
835                 fprintf(stderr, ", name too long");
836         if (errors & REF_ERR_NO_ROOT_REF)
837                 fprintf(stderr, ", no root ref");
838         if (errors & REF_ERR_NO_ROOT_BACKREF)
839                 fprintf(stderr, ", no root backref");
840         if (errors & REF_ERR_DUP_ROOT_REF)
841                 fprintf(stderr, ", dup root ref");
842         if (errors & REF_ERR_DUP_ROOT_BACKREF)
843                 fprintf(stderr, ", dup root backref");
844         fprintf(stderr, "\n");
845 }
846
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
848                                           u64 ino, int mod)
849 {
850         struct ptr_node *node;
851         struct cache_extent *cache;
852         struct inode_record *rec = NULL;
853         int ret;
854
855         cache = lookup_cache_extent(inode_cache, ino, 1);
856         if (cache) {
857                 node = container_of(cache, struct ptr_node, cache);
858                 rec = node->data;
859                 if (mod && rec->refs > 1) {
860                         node->data = clone_inode_rec(rec);
861                         if (IS_ERR(node->data))
862                                 return node->data;
863                         rec->refs--;
864                         rec = node->data;
865                 }
866         } else if (mod) {
867                 rec = calloc(1, sizeof(*rec));
868                 if (!rec)
869                         return ERR_PTR(-ENOMEM);
870                 rec->ino = ino;
871                 rec->extent_start = (u64)-1;
872                 rec->refs = 1;
873                 INIT_LIST_HEAD(&rec->backrefs);
874                 INIT_LIST_HEAD(&rec->orphan_extents);
875                 rec->holes = RB_ROOT;
876
877                 node = malloc(sizeof(*node));
878                 if (!node) {
879                         free(rec);
880                         return ERR_PTR(-ENOMEM);
881                 }
882                 node->cache.start = ino;
883                 node->cache.size = 1;
884                 node->data = rec;
885
886                 if (ino == BTRFS_FREE_INO_OBJECTID)
887                         rec->found_link = 1;
888
889                 ret = insert_cache_extent(inode_cache, &node->cache);
890                 if (ret)
891                         return ERR_PTR(-EEXIST);
892         }
893         return rec;
894 }
895
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
897 {
898         struct orphan_data_extent *orphan;
899
900         while (!list_empty(orphan_extents)) {
901                 orphan = list_entry(orphan_extents->next,
902                                     struct orphan_data_extent, list);
903                 list_del(&orphan->list);
904                 free(orphan);
905         }
906 }
907
908 static void free_inode_rec(struct inode_record *rec)
909 {
910         struct inode_backref *backref;
911
912         if (--rec->refs > 0)
913                 return;
914
915         while (!list_empty(&rec->backrefs)) {
916                 backref = to_inode_backref(rec->backrefs.next);
917                 list_del(&backref->list);
918                 free(backref);
919         }
920         free_orphan_data_extents(&rec->orphan_extents);
921         free_file_extent_holes(&rec->holes);
922         free(rec);
923 }
924
925 static int can_free_inode_rec(struct inode_record *rec)
926 {
927         if (!rec->errors && rec->checked && rec->found_inode_item &&
928             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
929                 return 1;
930         return 0;
931 }
932
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934                                  struct inode_record *rec)
935 {
936         struct cache_extent *cache;
937         struct inode_backref *tmp, *backref;
938         struct ptr_node *node;
939         unsigned char filetype;
940
941         if (!rec->found_inode_item)
942                 return;
943
944         filetype = imode_to_type(rec->imode);
945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946                 if (backref->found_dir_item && backref->found_dir_index) {
947                         if (backref->filetype != filetype)
948                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949                         if (!backref->errors && backref->found_inode_ref &&
950                             rec->nlink == rec->found_link) {
951                                 list_del(&backref->list);
952                                 free(backref);
953                         }
954                 }
955         }
956
957         if (!rec->checked || rec->merging)
958                 return;
959
960         if (S_ISDIR(rec->imode)) {
961                 if (rec->found_size != rec->isize)
962                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963                 if (rec->found_file_extent)
964                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
965         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966                 if (rec->found_dir_item)
967                         rec->errors |= I_ERR_ODD_DIR_ITEM;
968                 if (rec->found_size != rec->nbytes)
969                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970                 if (rec->nlink > 0 && !no_holes &&
971                     (rec->extent_end < rec->isize ||
972                      first_extent_gap(&rec->holes) < rec->isize))
973                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
974         }
975
976         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977                 if (rec->found_csum_item && rec->nodatasum)
978                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
979                 if (rec->some_csum_missing && !rec->nodatasum)
980                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
981         }
982
983         BUG_ON(rec->refs != 1);
984         if (can_free_inode_rec(rec)) {
985                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986                 node = container_of(cache, struct ptr_node, cache);
987                 BUG_ON(node->data != rec);
988                 remove_cache_extent(inode_cache, &node->cache);
989                 free(node);
990                 free_inode_rec(rec);
991         }
992 }
993
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
995 {
996         struct btrfs_path path;
997         struct btrfs_key key;
998         int ret;
999
1000         key.objectid = BTRFS_ORPHAN_OBJECTID;
1001         key.type = BTRFS_ORPHAN_ITEM_KEY;
1002         key.offset = ino;
1003
1004         btrfs_init_path(&path);
1005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006         btrfs_release_path(&path);
1007         if (ret > 0)
1008                 ret = -ENOENT;
1009         return ret;
1010 }
1011
1012 static int process_inode_item(struct extent_buffer *eb,
1013                               int slot, struct btrfs_key *key,
1014                               struct shared_node *active_node)
1015 {
1016         struct inode_record *rec;
1017         struct btrfs_inode_item *item;
1018
1019         rec = active_node->current;
1020         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021         if (rec->found_inode_item) {
1022                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1023                 return 1;
1024         }
1025         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026         rec->nlink = btrfs_inode_nlink(eb, item);
1027         rec->isize = btrfs_inode_size(eb, item);
1028         rec->nbytes = btrfs_inode_nbytes(eb, item);
1029         rec->imode = btrfs_inode_mode(eb, item);
1030         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1031                 rec->nodatasum = 1;
1032         rec->found_inode_item = 1;
1033         if (rec->nlink == 0)
1034                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035         maybe_free_inode_rec(&active_node->inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1040                                                 const char *name,
1041                                                 int namelen, u64 dir)
1042 {
1043         struct inode_backref *backref;
1044
1045         list_for_each_entry(backref, &rec->backrefs, list) {
1046                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1047                         break;
1048                 if (backref->dir != dir || backref->namelen != namelen)
1049                         continue;
1050                 if (memcmp(name, backref->name, namelen))
1051                         continue;
1052                 return backref;
1053         }
1054
1055         backref = malloc(sizeof(*backref) + namelen + 1);
1056         if (!backref)
1057                 return NULL;
1058         memset(backref, 0, sizeof(*backref));
1059         backref->dir = dir;
1060         backref->namelen = namelen;
1061         memcpy(backref->name, name, namelen);
1062         backref->name[namelen] = '\0';
1063         list_add_tail(&backref->list, &rec->backrefs);
1064         return backref;
1065 }
1066
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068                              u64 ino, u64 dir, u64 index,
1069                              const char *name, int namelen,
1070                              int filetype, int itemtype, int errors)
1071 {
1072         struct inode_record *rec;
1073         struct inode_backref *backref;
1074
1075         rec = get_inode_rec(inode_cache, ino, 1);
1076         BUG_ON(IS_ERR(rec));
1077         backref = get_inode_backref(rec, name, namelen, dir);
1078         BUG_ON(!backref);
1079         if (errors)
1080                 backref->errors |= errors;
1081         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082                 if (backref->found_dir_index)
1083                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084                 if (backref->found_inode_ref && backref->index != index)
1085                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1086                 if (backref->found_dir_item && backref->filetype != filetype)
1087                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1088
1089                 backref->index = index;
1090                 backref->filetype = filetype;
1091                 backref->found_dir_index = 1;
1092         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1093                 rec->found_link++;
1094                 if (backref->found_dir_item)
1095                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096                 if (backref->found_dir_index && backref->filetype != filetype)
1097                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1098
1099                 backref->filetype = filetype;
1100                 backref->found_dir_item = 1;
1101         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103                 if (backref->found_inode_ref)
1104                         backref->errors |= REF_ERR_DUP_INODE_REF;
1105                 if (backref->found_dir_index && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 else
1108                         backref->index = index;
1109
1110                 backref->ref_type = itemtype;
1111                 backref->found_inode_ref = 1;
1112         } else {
1113                 BUG_ON(1);
1114         }
1115
1116         maybe_free_inode_rec(inode_cache, rec);
1117         return 0;
1118 }
1119
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121                             struct cache_tree *dst_cache)
1122 {
1123         struct inode_backref *backref;
1124         u32 dir_count = 0;
1125         int ret = 0;
1126
1127         dst->merging = 1;
1128         list_for_each_entry(backref, &src->backrefs, list) {
1129                 if (backref->found_dir_index) {
1130                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1131                                         backref->index, backref->name,
1132                                         backref->namelen, backref->filetype,
1133                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1134                 }
1135                 if (backref->found_dir_item) {
1136                         dir_count++;
1137                         add_inode_backref(dst_cache, dst->ino,
1138                                         backref->dir, 0, backref->name,
1139                                         backref->namelen, backref->filetype,
1140                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1141                 }
1142                 if (backref->found_inode_ref) {
1143                         add_inode_backref(dst_cache, dst->ino,
1144                                         backref->dir, backref->index,
1145                                         backref->name, backref->namelen, 0,
1146                                         backref->ref_type, backref->errors);
1147                 }
1148         }
1149
1150         if (src->found_dir_item)
1151                 dst->found_dir_item = 1;
1152         if (src->found_file_extent)
1153                 dst->found_file_extent = 1;
1154         if (src->found_csum_item)
1155                 dst->found_csum_item = 1;
1156         if (src->some_csum_missing)
1157                 dst->some_csum_missing = 1;
1158         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1160                 if (ret < 0)
1161                         return ret;
1162         }
1163
1164         BUG_ON(src->found_link < dir_count);
1165         dst->found_link += src->found_link - dir_count;
1166         dst->found_size += src->found_size;
1167         if (src->extent_start != (u64)-1) {
1168                 if (dst->extent_start == (u64)-1) {
1169                         dst->extent_start = src->extent_start;
1170                         dst->extent_end = src->extent_end;
1171                 } else {
1172                         if (dst->extent_end > src->extent_start)
1173                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174                         else if (dst->extent_end < src->extent_start) {
1175                                 ret = add_file_extent_hole(&dst->holes,
1176                                         dst->extent_end,
1177                                         src->extent_start - dst->extent_end);
1178                         }
1179                         if (dst->extent_end < src->extent_end)
1180                                 dst->extent_end = src->extent_end;
1181                 }
1182         }
1183
1184         dst->errors |= src->errors;
1185         if (src->found_inode_item) {
1186                 if (!dst->found_inode_item) {
1187                         dst->nlink = src->nlink;
1188                         dst->isize = src->isize;
1189                         dst->nbytes = src->nbytes;
1190                         dst->imode = src->imode;
1191                         dst->nodatasum = src->nodatasum;
1192                         dst->found_inode_item = 1;
1193                 } else {
1194                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1195                 }
1196         }
1197         dst->merging = 0;
1198
1199         return 0;
1200 }
1201
1202 static int splice_shared_node(struct shared_node *src_node,
1203                               struct shared_node *dst_node)
1204 {
1205         struct cache_extent *cache;
1206         struct ptr_node *node, *ins;
1207         struct cache_tree *src, *dst;
1208         struct inode_record *rec, *conflict;
1209         u64 current_ino = 0;
1210         int splice = 0;
1211         int ret;
1212
1213         if (--src_node->refs == 0)
1214                 splice = 1;
1215         if (src_node->current)
1216                 current_ino = src_node->current->ino;
1217
1218         src = &src_node->root_cache;
1219         dst = &dst_node->root_cache;
1220 again:
1221         cache = search_cache_extent(src, 0);
1222         while (cache) {
1223                 node = container_of(cache, struct ptr_node, cache);
1224                 rec = node->data;
1225                 cache = next_cache_extent(cache);
1226
1227                 if (splice) {
1228                         remove_cache_extent(src, &node->cache);
1229                         ins = node;
1230                 } else {
1231                         ins = malloc(sizeof(*ins));
1232                         BUG_ON(!ins);
1233                         ins->cache.start = node->cache.start;
1234                         ins->cache.size = node->cache.size;
1235                         ins->data = rec;
1236                         rec->refs++;
1237                 }
1238                 ret = insert_cache_extent(dst, &ins->cache);
1239                 if (ret == -EEXIST) {
1240                         conflict = get_inode_rec(dst, rec->ino, 1);
1241                         BUG_ON(IS_ERR(conflict));
1242                         merge_inode_recs(rec, conflict, dst);
1243                         if (rec->checked) {
1244                                 conflict->checked = 1;
1245                                 if (dst_node->current == conflict)
1246                                         dst_node->current = NULL;
1247                         }
1248                         maybe_free_inode_rec(dst, conflict);
1249                         free_inode_rec(rec);
1250                         free(ins);
1251                 } else {
1252                         BUG_ON(ret);
1253                 }
1254         }
1255
1256         if (src == &src_node->root_cache) {
1257                 src = &src_node->inode_cache;
1258                 dst = &dst_node->inode_cache;
1259                 goto again;
1260         }
1261
1262         if (current_ino > 0 && (!dst_node->current ||
1263             current_ino > dst_node->current->ino)) {
1264                 if (dst_node->current) {
1265                         dst_node->current->checked = 1;
1266                         maybe_free_inode_rec(dst, dst_node->current);
1267                 }
1268                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269                 BUG_ON(IS_ERR(dst_node->current));
1270         }
1271         return 0;
1272 }
1273
1274 static void free_inode_ptr(struct cache_extent *cache)
1275 {
1276         struct ptr_node *node;
1277         struct inode_record *rec;
1278
1279         node = container_of(cache, struct ptr_node, cache);
1280         rec = node->data;
1281         free_inode_rec(rec);
1282         free(node);
1283 }
1284
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1286
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1288                                             u64 bytenr)
1289 {
1290         struct cache_extent *cache;
1291         struct shared_node *node;
1292
1293         cache = lookup_cache_extent(shared, bytenr, 1);
1294         if (cache) {
1295                 node = container_of(cache, struct shared_node, cache);
1296                 return node;
1297         }
1298         return NULL;
1299 }
1300
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1302 {
1303         int ret;
1304         struct shared_node *node;
1305
1306         node = calloc(1, sizeof(*node));
1307         if (!node)
1308                 return -ENOMEM;
1309         node->cache.start = bytenr;
1310         node->cache.size = 1;
1311         cache_tree_init(&node->root_cache);
1312         cache_tree_init(&node->inode_cache);
1313         node->refs = refs;
1314
1315         ret = insert_cache_extent(shared, &node->cache);
1316
1317         return ret;
1318 }
1319
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321                              struct walk_control *wc, int level)
1322 {
1323         struct shared_node *node;
1324         struct shared_node *dest;
1325         int ret;
1326
1327         if (level == wc->active_node)
1328                 return 0;
1329
1330         BUG_ON(wc->active_node <= level);
1331         node = find_shared_node(&wc->shared, bytenr);
1332         if (!node) {
1333                 ret = add_shared_node(&wc->shared, bytenr, refs);
1334                 BUG_ON(ret);
1335                 node = find_shared_node(&wc->shared, bytenr);
1336                 wc->nodes[level] = node;
1337                 wc->active_node = level;
1338                 return 0;
1339         }
1340
1341         if (wc->root_level == wc->active_node &&
1342             btrfs_root_refs(&root->root_item) == 0) {
1343                 if (--node->refs == 0) {
1344                         free_inode_recs_tree(&node->root_cache);
1345                         free_inode_recs_tree(&node->inode_cache);
1346                         remove_cache_extent(&wc->shared, &node->cache);
1347                         free(node);
1348                 }
1349                 return 1;
1350         }
1351
1352         dest = wc->nodes[wc->active_node];
1353         splice_shared_node(node, dest);
1354         if (node->refs == 0) {
1355                 remove_cache_extent(&wc->shared, &node->cache);
1356                 free(node);
1357         }
1358         return 1;
1359 }
1360
1361 static int leave_shared_node(struct btrfs_root *root,
1362                              struct walk_control *wc, int level)
1363 {
1364         struct shared_node *node;
1365         struct shared_node *dest;
1366         int i;
1367
1368         if (level == wc->root_level)
1369                 return 0;
1370
1371         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1372                 if (wc->nodes[i])
1373                         break;
1374         }
1375         BUG_ON(i >= BTRFS_MAX_LEVEL);
1376
1377         node = wc->nodes[wc->active_node];
1378         wc->nodes[wc->active_node] = NULL;
1379         wc->active_node = i;
1380
1381         dest = wc->nodes[wc->active_node];
1382         if (wc->active_node < wc->root_level ||
1383             btrfs_root_refs(&root->root_item) > 0) {
1384                 BUG_ON(node->refs <= 1);
1385                 splice_shared_node(node, dest);
1386         } else {
1387                 BUG_ON(node->refs < 2);
1388                 node->refs--;
1389         }
1390         return 0;
1391 }
1392
1393 /*
1394  * Returns:
1395  * < 0 - on error
1396  * 1   - if the root with id child_root_id is a child of root parent_root_id
1397  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1398  *       has other root(s) as parent(s)
1399  * 2   - if the root child_root_id doesn't have any parent roots
1400  */
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1402                          u64 child_root_id)
1403 {
1404         struct btrfs_path path;
1405         struct btrfs_key key;
1406         struct extent_buffer *leaf;
1407         int has_parent = 0;
1408         int ret;
1409
1410         btrfs_init_path(&path);
1411
1412         key.objectid = parent_root_id;
1413         key.type = BTRFS_ROOT_REF_KEY;
1414         key.offset = child_root_id;
1415         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1416                                 0, 0);
1417         if (ret < 0)
1418                 return ret;
1419         btrfs_release_path(&path);
1420         if (!ret)
1421                 return 1;
1422
1423         key.objectid = child_root_id;
1424         key.type = BTRFS_ROOT_BACKREF_KEY;
1425         key.offset = 0;
1426         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1427                                 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430
1431         while (1) {
1432                 leaf = path.nodes[0];
1433                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1435                         if (ret)
1436                                 break;
1437                         leaf = path.nodes[0];
1438                 }
1439
1440                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441                 if (key.objectid != child_root_id ||
1442                     key.type != BTRFS_ROOT_BACKREF_KEY)
1443                         break;
1444
1445                 has_parent = 1;
1446
1447                 if (key.offset == parent_root_id) {
1448                         btrfs_release_path(&path);
1449                         return 1;
1450                 }
1451
1452                 path.slots[0]++;
1453         }
1454 out:
1455         btrfs_release_path(&path);
1456         if (ret < 0)
1457                 return ret;
1458         return has_parent ? 0 : 2;
1459 }
1460
1461 static int process_dir_item(struct btrfs_root *root,
1462                             struct extent_buffer *eb,
1463                             int slot, struct btrfs_key *key,
1464                             struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u32 data_len;
1471         int error;
1472         int nritems = 0;
1473         int filetype;
1474         struct btrfs_dir_item *di;
1475         struct inode_record *rec;
1476         struct cache_tree *root_cache;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_key location;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         root_cache = &active_node->root_cache;
1482         inode_cache = &active_node->inode_cache;
1483         rec = active_node->current;
1484         rec->found_dir_item = 1;
1485
1486         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 nritems++;
1490                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491                 name_len = btrfs_dir_name_len(eb, di);
1492                 data_len = btrfs_dir_data_len(eb, di);
1493                 filetype = btrfs_dir_type(eb, di);
1494
1495                 rec->found_size += name_len;
1496                 if (name_len <= BTRFS_NAME_LEN) {
1497                         len = name_len;
1498                         error = 0;
1499                 } else {
1500                         len = BTRFS_NAME_LEN;
1501                         error = REF_ERR_NAME_TOO_LONG;
1502                 }
1503                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1504
1505                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506                         add_inode_backref(inode_cache, location.objectid,
1507                                           key->objectid, key->offset, namebuf,
1508                                           len, filetype, key->type, error);
1509                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510                         add_inode_backref(root_cache, location.objectid,
1511                                           key->objectid, key->offset,
1512                                           namebuf, len, filetype,
1513                                           key->type, error);
1514                 } else {
1515                         fprintf(stderr, "invalid location in dir item %u\n",
1516                                 location.type);
1517                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518                                           key->objectid, key->offset, namebuf,
1519                                           len, filetype, key->type, error);
1520                 }
1521
1522                 len = sizeof(*di) + name_len + data_len;
1523                 di = (struct btrfs_dir_item *)((char *)di + len);
1524                 cur += len;
1525         }
1526         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1528
1529         return 0;
1530 }
1531
1532 static int process_inode_ref(struct extent_buffer *eb,
1533                              int slot, struct btrfs_key *key,
1534                              struct shared_node *active_node)
1535 {
1536         u32 total;
1537         u32 cur = 0;
1538         u32 len;
1539         u32 name_len;
1540         u64 index;
1541         int error;
1542         struct cache_tree *inode_cache;
1543         struct btrfs_inode_ref *ref;
1544         char namebuf[BTRFS_NAME_LEN];
1545
1546         inode_cache = &active_node->inode_cache;
1547
1548         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549         total = btrfs_item_size_nr(eb, slot);
1550         while (cur < total) {
1551                 name_len = btrfs_inode_ref_name_len(eb, ref);
1552                 index = btrfs_inode_ref_index(eb, ref);
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561                 add_inode_backref(inode_cache, key->objectid, key->offset,
1562                                   index, namebuf, len, 0, key->type, error);
1563
1564                 len = sizeof(*ref) + name_len;
1565                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1566                 cur += len;
1567         }
1568         return 0;
1569 }
1570
1571 static int process_inode_extref(struct extent_buffer *eb,
1572                                 int slot, struct btrfs_key *key,
1573                                 struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u64 index;
1580         u64 parent;
1581         int error;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_inode_extref *extref;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         inode_cache = &active_node->inode_cache;
1587
1588         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589         total = btrfs_item_size_nr(eb, slot);
1590         while (cur < total) {
1591                 name_len = btrfs_inode_extref_name_len(eb, extref);
1592                 index = btrfs_inode_extref_index(eb, extref);
1593                 parent = btrfs_inode_extref_parent(eb, extref);
1594                 if (name_len <= BTRFS_NAME_LEN) {
1595                         len = name_len;
1596                         error = 0;
1597                 } else {
1598                         len = BTRFS_NAME_LEN;
1599                         error = REF_ERR_NAME_TOO_LONG;
1600                 }
1601                 read_extent_buffer(eb, namebuf,
1602                                    (unsigned long)(extref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, parent,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*extref) + name_len;
1607                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611
1612 }
1613
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615                             u64 len, u64 *found)
1616 {
1617         struct btrfs_key key;
1618         struct btrfs_path path;
1619         struct extent_buffer *leaf;
1620         int ret;
1621         size_t size;
1622         *found = 0;
1623         u64 csum_end;
1624         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1625
1626         btrfs_init_path(&path);
1627
1628         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1629         key.offset = start;
1630         key.type = BTRFS_EXTENT_CSUM_KEY;
1631
1632         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1633                                 &key, &path, 0, 0);
1634         if (ret < 0)
1635                 goto out;
1636         if (ret > 0 && path.slots[0] > 0) {
1637                 leaf = path.nodes[0];
1638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640                     key.type == BTRFS_EXTENT_CSUM_KEY)
1641                         path.slots[0]--;
1642         }
1643
1644         while (len > 0) {
1645                 leaf = path.nodes[0];
1646                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1648                         if (ret > 0)
1649                                 break;
1650                         else if (ret < 0)
1651                                 goto out;
1652                         leaf = path.nodes[0];
1653                 }
1654
1655                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657                     key.type != BTRFS_EXTENT_CSUM_KEY)
1658                         break;
1659
1660                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661                 if (key.offset >= start + len)
1662                         break;
1663
1664                 if (key.offset > start)
1665                         start = key.offset;
1666
1667                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669                 if (csum_end > start) {
1670                         size = min(csum_end - start, len);
1671                         len -= size;
1672                         start += size;
1673                         *found += size;
1674                 }
1675
1676                 path.slots[0]++;
1677         }
1678 out:
1679         btrfs_release_path(&path);
1680         if (ret < 0)
1681                 return ret;
1682         return 0;
1683 }
1684
1685 static int process_file_extent(struct btrfs_root *root,
1686                                 struct extent_buffer *eb,
1687                                 int slot, struct btrfs_key *key,
1688                                 struct shared_node *active_node)
1689 {
1690         struct inode_record *rec;
1691         struct btrfs_file_extent_item *fi;
1692         u64 num_bytes = 0;
1693         u64 disk_bytenr = 0;
1694         u64 extent_offset = 0;
1695         u64 mask = root->sectorsize - 1;
1696         int extent_type;
1697         int ret;
1698
1699         rec = active_node->current;
1700         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701         rec->found_file_extent = 1;
1702
1703         if (rec->extent_start == (u64)-1) {
1704                 rec->extent_start = key->offset;
1705                 rec->extent_end = key->offset;
1706         }
1707
1708         if (rec->extent_end > key->offset)
1709                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710         else if (rec->extent_end < key->offset) {
1711                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712                                            key->offset - rec->extent_end);
1713                 if (ret < 0)
1714                         return ret;
1715         }
1716
1717         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718         extent_type = btrfs_file_extent_type(eb, fi);
1719
1720         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1722                 if (num_bytes == 0)
1723                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724                 rec->found_size += num_bytes;
1725                 num_bytes = (num_bytes + mask) & ~mask;
1726         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730                 extent_offset = btrfs_file_extent_offset(eb, fi);
1731                 if (num_bytes == 0 || (num_bytes & mask))
1732                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733                 if (num_bytes + extent_offset >
1734                     btrfs_file_extent_ram_bytes(eb, fi))
1735                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737                     (btrfs_file_extent_compression(eb, fi) ||
1738                      btrfs_file_extent_encryption(eb, fi) ||
1739                      btrfs_file_extent_other_encoding(eb, fi)))
1740                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741                 if (disk_bytenr > 0)
1742                         rec->found_size += num_bytes;
1743         } else {
1744                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1745         }
1746         rec->extent_end = key->offset + num_bytes;
1747
1748         /*
1749          * The data reloc tree will copy full extents into its inode and then
1750          * copy the corresponding csums.  Because the extent it copied could be
1751          * a preallocated extent that hasn't been written to yet there may be no
1752          * csums to copy, ergo we won't have csums for our file extent.  This is
1753          * ok so just don't bother checking csums if the inode belongs to the
1754          * data reloc tree.
1755          */
1756         if (disk_bytenr > 0 &&
1757             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1758                 u64 found;
1759                 if (btrfs_file_extent_compression(eb, fi))
1760                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1761                 else
1762                         disk_bytenr += extent_offset;
1763
1764                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1765                 if (ret < 0)
1766                         return ret;
1767                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1768                         if (found > 0)
1769                                 rec->found_csum_item = 1;
1770                         if (found < num_bytes)
1771                                 rec->some_csum_missing = 1;
1772                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1773                         if (found > 0)
1774                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781                             struct walk_control *wc)
1782 {
1783         struct btrfs_key key;
1784         u32 nritems;
1785         int i;
1786         int ret = 0;
1787         struct cache_tree *inode_cache;
1788         struct shared_node *active_node;
1789
1790         if (wc->root_level == wc->active_node &&
1791             btrfs_root_refs(&root->root_item) == 0)
1792                 return 0;
1793
1794         active_node = wc->nodes[wc->active_node];
1795         inode_cache = &active_node->inode_cache;
1796         nritems = btrfs_header_nritems(eb);
1797         for (i = 0; i < nritems; i++) {
1798                 btrfs_item_key_to_cpu(eb, &key, i);
1799
1800                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1801                         continue;
1802                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1803                         continue;
1804
1805                 if (active_node->current == NULL ||
1806                     active_node->current->ino < key.objectid) {
1807                         if (active_node->current) {
1808                                 active_node->current->checked = 1;
1809                                 maybe_free_inode_rec(inode_cache,
1810                                                      active_node->current);
1811                         }
1812                         active_node->current = get_inode_rec(inode_cache,
1813                                                              key.objectid, 1);
1814                         BUG_ON(IS_ERR(active_node->current));
1815                 }
1816                 switch (key.type) {
1817                 case BTRFS_DIR_ITEM_KEY:
1818                 case BTRFS_DIR_INDEX_KEY:
1819                         ret = process_dir_item(root, eb, i, &key, active_node);
1820                         break;
1821                 case BTRFS_INODE_REF_KEY:
1822                         ret = process_inode_ref(eb, i, &key, active_node);
1823                         break;
1824                 case BTRFS_INODE_EXTREF_KEY:
1825                         ret = process_inode_extref(eb, i, &key, active_node);
1826                         break;
1827                 case BTRFS_INODE_ITEM_KEY:
1828                         ret = process_inode_item(eb, i, &key, active_node);
1829                         break;
1830                 case BTRFS_EXTENT_DATA_KEY:
1831                         ret = process_file_extent(root, eb, i, &key,
1832                                                   active_node);
1833                         break;
1834                 default:
1835                         break;
1836                 };
1837         }
1838         return ret;
1839 }
1840
1841 static void reada_walk_down(struct btrfs_root *root,
1842                             struct extent_buffer *node, int slot)
1843 {
1844         u64 bytenr;
1845         u64 ptr_gen;
1846         u32 nritems;
1847         u32 blocksize;
1848         int i;
1849         int level;
1850
1851         level = btrfs_header_level(node);
1852         if (level != 1)
1853                 return;
1854
1855         nritems = btrfs_header_nritems(node);
1856         blocksize = root->nodesize;
1857         for (i = slot; i < nritems; i++) {
1858                 bytenr = btrfs_node_blockptr(node, i);
1859                 ptr_gen = btrfs_node_ptr_generation(node, i);
1860                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1861         }
1862 }
1863
1864 /*
1865  * Check the child node/leaf by the following condition:
1866  * 1. the first item key of the node/leaf should be the same with the one
1867  *    in parent.
1868  * 2. block in parent node should match the child node/leaf.
1869  * 3. generation of parent node and child's header should be consistent.
1870  *
1871  * Or the child node/leaf pointed by the key in parent is not valid.
1872  *
1873  * We hope to check leaf owner too, but since subvol may share leaves,
1874  * which makes leaf owner check not so strong, key check should be
1875  * sufficient enough for that case.
1876  */
1877 static int check_child_node(struct btrfs_root *root,
1878                             struct extent_buffer *parent, int slot,
1879                             struct extent_buffer *child)
1880 {
1881         struct btrfs_key parent_key;
1882         struct btrfs_key child_key;
1883         int ret = 0;
1884
1885         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886         if (btrfs_header_level(child) == 0)
1887                 btrfs_item_key_to_cpu(child, &child_key, 0);
1888         else
1889                 btrfs_node_key_to_cpu(child, &child_key, 0);
1890
1891         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1892                 ret = -EINVAL;
1893                 fprintf(stderr,
1894                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895                         parent_key.objectid, parent_key.type, parent_key.offset,
1896                         child_key.objectid, child_key.type, child_key.offset);
1897         }
1898         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1899                 ret = -EINVAL;
1900                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901                         btrfs_node_blockptr(parent, slot),
1902                         btrfs_header_bytenr(child));
1903         }
1904         if (btrfs_node_ptr_generation(parent, slot) !=
1905             btrfs_header_generation(child)) {
1906                 ret = -EINVAL;
1907                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908                         btrfs_header_generation(child),
1909                         btrfs_node_ptr_generation(parent, slot));
1910         }
1911         return ret;
1912 }
1913
1914 struct node_refs {
1915         u64 bytenr[BTRFS_MAX_LEVEL];
1916         u64 refs[BTRFS_MAX_LEVEL];
1917 };
1918
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920                           struct walk_control *wc, int *level,
1921                           struct node_refs *nrefs)
1922 {
1923         enum btrfs_tree_block_status status;
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         struct extent_buffer *next;
1927         struct extent_buffer *cur;
1928         u32 blocksize;
1929         int ret, err = 0;
1930         u64 refs;
1931
1932         WARN_ON(*level < 0);
1933         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1934
1935         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936                 refs = nrefs->refs[*level];
1937                 ret = 0;
1938         } else {
1939                 ret = btrfs_lookup_extent_info(NULL, root,
1940                                        path->nodes[*level]->start,
1941                                        *level, 1, &refs, NULL);
1942                 if (ret < 0) {
1943                         err = ret;
1944                         goto out;
1945                 }
1946                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947                 nrefs->refs[*level] = refs;
1948         }
1949
1950         if (refs > 1) {
1951                 ret = enter_shared_node(root, path->nodes[*level]->start,
1952                                         refs, wc, *level);
1953                 if (ret > 0) {
1954                         err = ret;
1955                         goto out;
1956                 }
1957         }
1958
1959         while (*level >= 0) {
1960                 WARN_ON(*level < 0);
1961                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962                 cur = path->nodes[*level];
1963
1964                 if (btrfs_header_level(cur) != *level)
1965                         WARN_ON(1);
1966
1967                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1968                         break;
1969                 if (*level == 0) {
1970                         ret = process_one_leaf(root, cur, wc);
1971                         if (ret < 0)
1972                                 err = ret;
1973                         break;
1974                 }
1975                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977                 blocksize = root->nodesize;
1978
1979                 if (bytenr == nrefs->bytenr[*level - 1]) {
1980                         refs = nrefs->refs[*level - 1];
1981                 } else {
1982                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983                                         *level - 1, 1, &refs, NULL);
1984                         if (ret < 0) {
1985                                 refs = 0;
1986                         } else {
1987                                 nrefs->bytenr[*level - 1] = bytenr;
1988                                 nrefs->refs[*level - 1] = refs;
1989                         }
1990                 }
1991
1992                 if (refs > 1) {
1993                         ret = enter_shared_node(root, bytenr, refs,
1994                                                 wc, *level - 1);
1995                         if (ret > 0) {
1996                                 path->slots[*level]++;
1997                                 continue;
1998                         }
1999                 }
2000
2001                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003                         free_extent_buffer(next);
2004                         reada_walk_down(root, cur, path->slots[*level]);
2005                         next = read_tree_block(root, bytenr, blocksize,
2006                                                ptr_gen);
2007                         if (!extent_buffer_uptodate(next)) {
2008                                 struct btrfs_key node_key;
2009
2010                                 btrfs_node_key_to_cpu(path->nodes[*level],
2011                                                       &node_key,
2012                                                       path->slots[*level]);
2013                                 btrfs_add_corrupt_extent_record(root->fs_info,
2014                                                 &node_key,
2015                                                 path->nodes[*level]->start,
2016                                                 root->nodesize, *level);
2017                                 err = -EIO;
2018                                 goto out;
2019                         }
2020                 }
2021
2022                 ret = check_child_node(root, cur, path->slots[*level], next);
2023                 if (ret) {
2024                         err = ret;
2025                         goto out;
2026                 }
2027
2028                 if (btrfs_is_leaf(next))
2029                         status = btrfs_check_leaf(root, NULL, next);
2030                 else
2031                         status = btrfs_check_node(root, NULL, next);
2032                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033                         free_extent_buffer(next);
2034                         err = -EIO;
2035                         goto out;
2036                 }
2037
2038                 *level = *level - 1;
2039                 free_extent_buffer(path->nodes[*level]);
2040                 path->nodes[*level] = next;
2041                 path->slots[*level] = 0;
2042         }
2043 out:
2044         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2045         return err;
2046 }
2047
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049                         struct walk_control *wc, int *level)
2050 {
2051         int i;
2052         struct extent_buffer *leaf;
2053
2054         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055                 leaf = path->nodes[i];
2056                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2057                         path->slots[i]++;
2058                         *level = i;
2059                         return 0;
2060                 } else {
2061                         free_extent_buffer(path->nodes[*level]);
2062                         path->nodes[*level] = NULL;
2063                         BUG_ON(*level > wc->active_node);
2064                         if (*level == wc->active_node)
2065                                 leave_shared_node(root, wc, *level);
2066                         *level = i + 1;
2067                 }
2068         }
2069         return 1;
2070 }
2071
2072 static int check_root_dir(struct inode_record *rec)
2073 {
2074         struct inode_backref *backref;
2075         int ret = -1;
2076
2077         if (!rec->found_inode_item || rec->errors)
2078                 goto out;
2079         if (rec->nlink != 1 || rec->found_link != 0)
2080                 goto out;
2081         if (list_empty(&rec->backrefs))
2082                 goto out;
2083         backref = to_inode_backref(rec->backrefs.next);
2084         if (!backref->found_inode_ref)
2085                 goto out;
2086         if (backref->index != 0 || backref->namelen != 2 ||
2087             memcmp(backref->name, "..", 2))
2088                 goto out;
2089         if (backref->found_dir_index || backref->found_dir_item)
2090                 goto out;
2091         ret = 0;
2092 out:
2093         return ret;
2094 }
2095
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097                               struct btrfs_root *root, struct btrfs_path *path,
2098                               struct inode_record *rec)
2099 {
2100         struct btrfs_inode_item *ei;
2101         struct btrfs_key key;
2102         int ret;
2103
2104         key.objectid = rec->ino;
2105         key.type = BTRFS_INODE_ITEM_KEY;
2106         key.offset = (u64)-1;
2107
2108         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2109         if (ret < 0)
2110                 goto out;
2111         if (ret) {
2112                 if (!path->slots[0]) {
2113                         ret = -ENOENT;
2114                         goto out;
2115                 }
2116                 path->slots[0]--;
2117                 ret = 0;
2118         }
2119         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120         if (key.objectid != rec->ino) {
2121                 ret = -ENOENT;
2122                 goto out;
2123         }
2124
2125         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126                             struct btrfs_inode_item);
2127         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128         btrfs_mark_buffer_dirty(path->nodes[0]);
2129         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131                root->root_key.objectid);
2132 out:
2133         btrfs_release_path(path);
2134         return ret;
2135 }
2136
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138                                     struct btrfs_root *root,
2139                                     struct btrfs_path *path,
2140                                     struct inode_record *rec)
2141 {
2142         int ret;
2143
2144         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145         btrfs_release_path(path);
2146         if (!ret)
2147                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2148         return ret;
2149 }
2150
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152                                struct btrfs_root *root,
2153                                struct btrfs_path *path,
2154                                struct inode_record *rec)
2155 {
2156         struct btrfs_inode_item *ei;
2157         struct btrfs_key key;
2158         int ret = 0;
2159
2160         key.objectid = rec->ino;
2161         key.type = BTRFS_INODE_ITEM_KEY;
2162         key.offset = 0;
2163
2164         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2165         if (ret) {
2166                 if (ret > 0)
2167                         ret = -ENOENT;
2168                 goto out;
2169         }
2170
2171         /* Since ret == 0, no need to check anything */
2172         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173                             struct btrfs_inode_item);
2174         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175         btrfs_mark_buffer_dirty(path->nodes[0]);
2176         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177         printf("reset nbytes for ino %llu root %llu\n",
2178                rec->ino, root->root_key.objectid);
2179 out:
2180         btrfs_release_path(path);
2181         return ret;
2182 }
2183
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185                                  struct cache_tree *inode_cache,
2186                                  struct inode_record *rec,
2187                                  struct inode_backref *backref)
2188 {
2189         struct btrfs_path *path;
2190         struct btrfs_trans_handle *trans;
2191         struct btrfs_dir_item *dir_item;
2192         struct extent_buffer *leaf;
2193         struct btrfs_key key;
2194         struct btrfs_disk_key disk_key;
2195         struct inode_record *dir_rec;
2196         unsigned long name_ptr;
2197         u32 data_size = sizeof(*dir_item) + backref->namelen;
2198         int ret;
2199
2200         path = btrfs_alloc_path();
2201         if (!path)
2202                 return -ENOMEM;
2203
2204         trans = btrfs_start_transaction(root, 1);
2205         if (IS_ERR(trans)) {
2206                 btrfs_free_path(path);
2207                 return PTR_ERR(trans);
2208         }
2209
2210         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2211                 (unsigned long long)rec->ino);
2212         key.objectid = backref->dir;
2213         key.type = BTRFS_DIR_INDEX_KEY;
2214         key.offset = backref->index;
2215
2216         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2217         BUG_ON(ret);
2218
2219         leaf = path->nodes[0];
2220         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2221
2222         disk_key.objectid = cpu_to_le64(rec->ino);
2223         disk_key.type = BTRFS_INODE_ITEM_KEY;
2224         disk_key.offset = 0;
2225
2226         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2227         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2228         btrfs_set_dir_data_len(leaf, dir_item, 0);
2229         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2230         name_ptr = (unsigned long)(dir_item + 1);
2231         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2232         btrfs_mark_buffer_dirty(leaf);
2233         btrfs_free_path(path);
2234         btrfs_commit_transaction(trans, root);
2235
2236         backref->found_dir_index = 1;
2237         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2238         BUG_ON(IS_ERR(dir_rec));
2239         if (!dir_rec)
2240                 return 0;
2241         dir_rec->found_size += backref->namelen;
2242         if (dir_rec->found_size == dir_rec->isize &&
2243             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2244                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2245         if (dir_rec->found_size != dir_rec->isize)
2246                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247
2248         return 0;
2249 }
2250
2251 static int delete_dir_index(struct btrfs_root *root,
2252                             struct cache_tree *inode_cache,
2253                             struct inode_record *rec,
2254                             struct inode_backref *backref)
2255 {
2256         struct btrfs_trans_handle *trans;
2257         struct btrfs_dir_item *di;
2258         struct btrfs_path *path;
2259         int ret = 0;
2260
2261         path = btrfs_alloc_path();
2262         if (!path)
2263                 return -ENOMEM;
2264
2265         trans = btrfs_start_transaction(root, 1);
2266         if (IS_ERR(trans)) {
2267                 btrfs_free_path(path);
2268                 return PTR_ERR(trans);
2269         }
2270
2271
2272         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2273                 (unsigned long long)backref->dir,
2274                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2275                 (unsigned long long)root->objectid);
2276
2277         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2278                                     backref->name, backref->namelen,
2279                                     backref->index, -1);
2280         if (IS_ERR(di)) {
2281                 ret = PTR_ERR(di);
2282                 btrfs_free_path(path);
2283                 btrfs_commit_transaction(trans, root);
2284                 if (ret == -ENOENT)
2285                         return 0;
2286                 return ret;
2287         }
2288
2289         if (!di)
2290                 ret = btrfs_del_item(trans, root, path);
2291         else
2292                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2293         BUG_ON(ret);
2294         btrfs_free_path(path);
2295         btrfs_commit_transaction(trans, root);
2296         return ret;
2297 }
2298
2299 static int create_inode_item(struct btrfs_root *root,
2300                              struct inode_record *rec,
2301                              struct inode_backref *backref, int root_dir)
2302 {
2303         struct btrfs_trans_handle *trans;
2304         struct btrfs_inode_item inode_item;
2305         time_t now = time(NULL);
2306         int ret;
2307
2308         trans = btrfs_start_transaction(root, 1);
2309         if (IS_ERR(trans)) {
2310                 ret = PTR_ERR(trans);
2311                 return ret;
2312         }
2313
2314         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2315                 "be incomplete, please check permissions and content after "
2316                 "the fsck completes.\n", (unsigned long long)root->objectid,
2317                 (unsigned long long)rec->ino);
2318
2319         memset(&inode_item, 0, sizeof(inode_item));
2320         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2321         if (root_dir)
2322                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2323         else
2324                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2325         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2326         if (rec->found_dir_item) {
2327                 if (rec->found_file_extent)
2328                         fprintf(stderr, "root %llu inode %llu has both a dir "
2329                                 "item and extents, unsure if it is a dir or a "
2330                                 "regular file so setting it as a directory\n",
2331                                 (unsigned long long)root->objectid,
2332                                 (unsigned long long)rec->ino);
2333                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2334                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2335         } else if (!rec->found_dir_item) {
2336                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2337                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2338         }
2339         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2340         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2341         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2342         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2343         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2344         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2345         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2346         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2347
2348         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2349         BUG_ON(ret);
2350         btrfs_commit_transaction(trans, root);
2351         return 0;
2352 }
2353
2354 static int repair_inode_backrefs(struct btrfs_root *root,
2355                                  struct inode_record *rec,
2356                                  struct cache_tree *inode_cache,
2357                                  int delete)
2358 {
2359         struct inode_backref *tmp, *backref;
2360         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2361         int ret = 0;
2362         int repaired = 0;
2363
2364         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2365                 if (!delete && rec->ino == root_dirid) {
2366                         if (!rec->found_inode_item) {
2367                                 ret = create_inode_item(root, rec, backref, 1);
2368                                 if (ret)
2369                                         break;
2370                                 repaired++;
2371                         }
2372                 }
2373
2374                 /* Index 0 for root dir's are special, don't mess with it */
2375                 if (rec->ino == root_dirid && backref->index == 0)
2376                         continue;
2377
2378                 if (delete &&
2379                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2380                      (backref->found_dir_index && backref->found_inode_ref &&
2381                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2382                         ret = delete_dir_index(root, inode_cache, rec, backref);
2383                         if (ret)
2384                                 break;
2385                         repaired++;
2386                         list_del(&backref->list);
2387                         free(backref);
2388                 }
2389
2390                 if (!delete && !backref->found_dir_index &&
2391                     backref->found_dir_item && backref->found_inode_ref) {
2392                         ret = add_missing_dir_index(root, inode_cache, rec,
2393                                                     backref);
2394                         if (ret)
2395                                 break;
2396                         repaired++;
2397                         if (backref->found_dir_item &&
2398                             backref->found_dir_index &&
2399                             backref->found_dir_index) {
2400                                 if (!backref->errors &&
2401                                     backref->found_inode_ref) {
2402                                         list_del(&backref->list);
2403                                         free(backref);
2404                                 }
2405                         }
2406                 }
2407
2408                 if (!delete && (!backref->found_dir_index &&
2409                                 !backref->found_dir_item &&
2410                                 backref->found_inode_ref)) {
2411                         struct btrfs_trans_handle *trans;
2412                         struct btrfs_key location;
2413
2414                         ret = check_dir_conflict(root, backref->name,
2415                                                  backref->namelen,
2416                                                  backref->dir,
2417                                                  backref->index);
2418                         if (ret) {
2419                                 /*
2420                                  * let nlink fixing routine to handle it,
2421                                  * which can do it better.
2422                                  */
2423                                 ret = 0;
2424                                 break;
2425                         }
2426                         location.objectid = rec->ino;
2427                         location.type = BTRFS_INODE_ITEM_KEY;
2428                         location.offset = 0;
2429
2430                         trans = btrfs_start_transaction(root, 1);
2431                         if (IS_ERR(trans)) {
2432                                 ret = PTR_ERR(trans);
2433                                 break;
2434                         }
2435                         fprintf(stderr, "adding missing dir index/item pair "
2436                                 "for inode %llu\n",
2437                                 (unsigned long long)rec->ino);
2438                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2439                                                     backref->namelen,
2440                                                     backref->dir, &location,
2441                                                     imode_to_type(rec->imode),
2442                                                     backref->index);
2443                         BUG_ON(ret);
2444                         btrfs_commit_transaction(trans, root);
2445                         repaired++;
2446                 }
2447
2448                 if (!delete && (backref->found_inode_ref &&
2449                                 backref->found_dir_index &&
2450                                 backref->found_dir_item &&
2451                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2452                                 !rec->found_inode_item)) {
2453                         ret = create_inode_item(root, rec, backref, 0);
2454                         if (ret)
2455                                 break;
2456                         repaired++;
2457                 }
2458
2459         }
2460         return ret ? ret : repaired;
2461 }
2462
2463 /*
2464  * To determine the file type for nlink/inode_item repair
2465  *
2466  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2467  * Return -ENOENT if file type is not found.
2468  */
2469 static int find_file_type(struct inode_record *rec, u8 *type)
2470 {
2471         struct inode_backref *backref;
2472
2473         /* For inode item recovered case */
2474         if (rec->found_inode_item) {
2475                 *type = imode_to_type(rec->imode);
2476                 return 0;
2477         }
2478
2479         list_for_each_entry(backref, &rec->backrefs, list) {
2480                 if (backref->found_dir_index || backref->found_dir_item) {
2481                         *type = backref->filetype;
2482                         return 0;
2483                 }
2484         }
2485         return -ENOENT;
2486 }
2487
2488 /*
2489  * To determine the file name for nlink repair
2490  *
2491  * Return 0 if file name is found, set name and namelen.
2492  * Return -ENOENT if file name is not found.
2493  */
2494 static int find_file_name(struct inode_record *rec,
2495                           char *name, int *namelen)
2496 {
2497         struct inode_backref *backref;
2498
2499         list_for_each_entry(backref, &rec->backrefs, list) {
2500                 if (backref->found_dir_index || backref->found_dir_item ||
2501                     backref->found_inode_ref) {
2502                         memcpy(name, backref->name, backref->namelen);
2503                         *namelen = backref->namelen;
2504                         return 0;
2505                 }
2506         }
2507         return -ENOENT;
2508 }
2509
2510 /* Reset the nlink of the inode to the correct one */
2511 static int reset_nlink(struct btrfs_trans_handle *trans,
2512                        struct btrfs_root *root,
2513                        struct btrfs_path *path,
2514                        struct inode_record *rec)
2515 {
2516         struct inode_backref *backref;
2517         struct inode_backref *tmp;
2518         struct btrfs_key key;
2519         struct btrfs_inode_item *inode_item;
2520         int ret = 0;
2521
2522         /* We don't believe this either, reset it and iterate backref */
2523         rec->found_link = 0;
2524
2525         /* Remove all backref including the valid ones */
2526         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2527                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2528                                    backref->index, backref->name,
2529                                    backref->namelen, 0);
2530                 if (ret < 0)
2531                         goto out;
2532
2533                 /* remove invalid backref, so it won't be added back */
2534                 if (!(backref->found_dir_index &&
2535                       backref->found_dir_item &&
2536                       backref->found_inode_ref)) {
2537                         list_del(&backref->list);
2538                         free(backref);
2539                 } else {
2540                         rec->found_link++;
2541                 }
2542         }
2543
2544         /* Set nlink to 0 */
2545         key.objectid = rec->ino;
2546         key.type = BTRFS_INODE_ITEM_KEY;
2547         key.offset = 0;
2548         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2549         if (ret < 0)
2550                 goto out;
2551         if (ret > 0) {
2552                 ret = -ENOENT;
2553                 goto out;
2554         }
2555         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2556                                     struct btrfs_inode_item);
2557         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2558         btrfs_mark_buffer_dirty(path->nodes[0]);
2559         btrfs_release_path(path);
2560
2561         /*
2562          * Add back valid inode_ref/dir_item/dir_index,
2563          * add_link() will handle the nlink inc, so new nlink must be correct
2564          */
2565         list_for_each_entry(backref, &rec->backrefs, list) {
2566                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2567                                      backref->name, backref->namelen,
2568                                      backref->filetype, &backref->index, 1);
2569                 if (ret < 0)
2570                         goto out;
2571         }
2572 out:
2573         btrfs_release_path(path);
2574         return ret;
2575 }
2576
2577 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2578                                struct btrfs_root *root,
2579                                struct btrfs_path *path,
2580                                struct inode_record *rec)
2581 {
2582         char *dir_name = "lost+found";
2583         char namebuf[BTRFS_NAME_LEN] = {0};
2584         u64 lost_found_ino;
2585         u32 mode = 0700;
2586         u8 type = 0;
2587         int namelen = 0;
2588         int name_recovered = 0;
2589         int type_recovered = 0;
2590         int ret = 0;
2591
2592         /*
2593          * Get file name and type first before these invalid inode ref
2594          * are deleted by remove_all_invalid_backref()
2595          */
2596         name_recovered = !find_file_name(rec, namebuf, &namelen);
2597         type_recovered = !find_file_type(rec, &type);
2598
2599         if (!name_recovered) {
2600                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2601                        rec->ino, rec->ino);
2602                 namelen = count_digits(rec->ino);
2603                 sprintf(namebuf, "%llu", rec->ino);
2604                 name_recovered = 1;
2605         }
2606         if (!type_recovered) {
2607                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2608                        rec->ino);
2609                 type = BTRFS_FT_REG_FILE;
2610                 type_recovered = 1;
2611         }
2612
2613         ret = reset_nlink(trans, root, path, rec);
2614         if (ret < 0) {
2615                 fprintf(stderr,
2616                         "Failed to reset nlink for inode %llu: %s\n",
2617                         rec->ino, strerror(-ret));
2618                 goto out;
2619         }
2620
2621         if (rec->found_link == 0) {
2622                 lost_found_ino = root->highest_inode;
2623                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2624                         ret = -EOVERFLOW;
2625                         goto out;
2626                 }
2627                 lost_found_ino++;
2628                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2629                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2630                                   mode);
2631                 if (ret < 0) {
2632                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2633                                 dir_name, strerror(-ret));
2634                         goto out;
2635                 }
2636                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2637                                      namebuf, namelen, type, NULL, 1);
2638                 /*
2639                  * Add ".INO" suffix several times to handle case where
2640                  * "FILENAME.INO" is already taken by another file.
2641                  */
2642                 while (ret == -EEXIST) {
2643                         /*
2644                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2645                          */
2646                         if (namelen + count_digits(rec->ino) + 1 >
2647                             BTRFS_NAME_LEN) {
2648                                 ret = -EFBIG;
2649                                 goto out;
2650                         }
2651                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2652                                  ".%llu", rec->ino);
2653                         namelen += count_digits(rec->ino) + 1;
2654                         ret = btrfs_add_link(trans, root, rec->ino,
2655                                              lost_found_ino, namebuf,
2656                                              namelen, type, NULL, 1);
2657                 }
2658                 if (ret < 0) {
2659                         fprintf(stderr,
2660                                 "Failed to link the inode %llu to %s dir: %s\n",
2661                                 rec->ino, dir_name, strerror(-ret));
2662                         goto out;
2663                 }
2664                 /*
2665                  * Just increase the found_link, don't actually add the
2666                  * backref. This will make things easier and this inode
2667                  * record will be freed after the repair is done.
2668                  * So fsck will not report problem about this inode.
2669                  */
2670                 rec->found_link++;
2671                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2672                        namelen, namebuf, dir_name);
2673         }
2674         printf("Fixed the nlink of inode %llu\n", rec->ino);
2675 out:
2676         /*
2677          * Clear the flag anyway, or we will loop forever for the same inode
2678          * as it will not be removed from the bad inode list and the dead loop
2679          * happens.
2680          */
2681         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2682         btrfs_release_path(path);
2683         return ret;
2684 }
2685
2686 /*
2687  * Check if there is any normal(reg or prealloc) file extent for given
2688  * ino.
2689  * This is used to determine the file type when neither its dir_index/item or
2690  * inode_item exists.
2691  *
2692  * This will *NOT* report error, if any error happens, just consider it does
2693  * not have any normal file extent.
2694  */
2695 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2696 {
2697         struct btrfs_path *path;
2698         struct btrfs_key key;
2699         struct btrfs_key found_key;
2700         struct btrfs_file_extent_item *fi;
2701         u8 type;
2702         int ret = 0;
2703
2704         path = btrfs_alloc_path();
2705         if (!path)
2706                 goto out;
2707         key.objectid = ino;
2708         key.type = BTRFS_EXTENT_DATA_KEY;
2709         key.offset = 0;
2710
2711         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2712         if (ret < 0) {
2713                 ret = 0;
2714                 goto out;
2715         }
2716         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2717                 ret = btrfs_next_leaf(root, path);
2718                 if (ret) {
2719                         ret = 0;
2720                         goto out;
2721                 }
2722         }
2723         while (1) {
2724                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2725                                       path->slots[0]);
2726                 if (found_key.objectid != ino ||
2727                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2728                         break;
2729                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2730                                     struct btrfs_file_extent_item);
2731                 type = btrfs_file_extent_type(path->nodes[0], fi);
2732                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2733                         ret = 1;
2734                         goto out;
2735                 }
2736         }
2737 out:
2738         btrfs_free_path(path);
2739         return ret;
2740 }
2741
2742 static u32 btrfs_type_to_imode(u8 type)
2743 {
2744         static u32 imode_by_btrfs_type[] = {
2745                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2746                 [BTRFS_FT_DIR]          = S_IFDIR,
2747                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2748                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2749                 [BTRFS_FT_FIFO]         = S_IFIFO,
2750                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2751                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2752         };
2753
2754         return imode_by_btrfs_type[(type)];
2755 }
2756
2757 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2758                                 struct btrfs_root *root,
2759                                 struct btrfs_path *path,
2760                                 struct inode_record *rec)
2761 {
2762         u8 filetype;
2763         u32 mode = 0700;
2764         int type_recovered = 0;
2765         int ret = 0;
2766
2767         printf("Trying to rebuild inode:%llu\n", rec->ino);
2768
2769         type_recovered = !find_file_type(rec, &filetype);
2770
2771         /*
2772          * Try to determine inode type if type not found.
2773          *
2774          * For found regular file extent, it must be FILE.
2775          * For found dir_item/index, it must be DIR.
2776          *
2777          * For undetermined one, use FILE as fallback.
2778          *
2779          * TODO:
2780          * 1. If found backref(inode_index/item is already handled) to it,
2781          *    it must be DIR.
2782          *    Need new inode-inode ref structure to allow search for that.
2783          */
2784         if (!type_recovered) {
2785                 if (rec->found_file_extent &&
2786                     find_normal_file_extent(root, rec->ino)) {
2787                         type_recovered = 1;
2788                         filetype = BTRFS_FT_REG_FILE;
2789                 } else if (rec->found_dir_item) {
2790                         type_recovered = 1;
2791                         filetype = BTRFS_FT_DIR;
2792                 } else if (!list_empty(&rec->orphan_extents)) {
2793                         type_recovered = 1;
2794                         filetype = BTRFS_FT_REG_FILE;
2795                 } else{
2796                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2797                                rec->ino);
2798                         type_recovered = 1;
2799                         filetype = BTRFS_FT_REG_FILE;
2800                 }
2801         }
2802
2803         ret = btrfs_new_inode(trans, root, rec->ino,
2804                               mode | btrfs_type_to_imode(filetype));
2805         if (ret < 0)
2806                 goto out;
2807
2808         /*
2809          * Here inode rebuild is done, we only rebuild the inode item,
2810          * don't repair the nlink(like move to lost+found).
2811          * That is the job of nlink repair.
2812          *
2813          * We just fill the record and return
2814          */
2815         rec->found_dir_item = 1;
2816         rec->imode = mode | btrfs_type_to_imode(filetype);
2817         rec->nlink = 0;
2818         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2819         /* Ensure the inode_nlinks repair function will be called */
2820         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2821 out:
2822         return ret;
2823 }
2824
2825 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2826                                       struct btrfs_root *root,
2827                                       struct btrfs_path *path,
2828                                       struct inode_record *rec)
2829 {
2830         struct orphan_data_extent *orphan;
2831         struct orphan_data_extent *tmp;
2832         int ret = 0;
2833
2834         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2835                 /*
2836                  * Check for conflicting file extents
2837                  *
2838                  * Here we don't know whether the extents is compressed or not,
2839                  * so we can only assume it not compressed nor data offset,
2840                  * and use its disk_len as extent length.
2841                  */
2842                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2843                                        orphan->offset, orphan->disk_len, 0);
2844                 btrfs_release_path(path);
2845                 if (ret < 0)
2846                         goto out;
2847                 if (!ret) {
2848                         fprintf(stderr,
2849                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2850                                 orphan->disk_bytenr, orphan->disk_len);
2851                         ret = btrfs_free_extent(trans,
2852                                         root->fs_info->extent_root,
2853                                         orphan->disk_bytenr, orphan->disk_len,
2854                                         0, root->objectid, orphan->objectid,
2855                                         orphan->offset);
2856                         if (ret < 0)
2857                                 goto out;
2858                 }
2859                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2860                                 orphan->offset, orphan->disk_bytenr,
2861                                 orphan->disk_len, orphan->disk_len);
2862                 if (ret < 0)
2863                         goto out;
2864
2865                 /* Update file size info */
2866                 rec->found_size += orphan->disk_len;
2867                 if (rec->found_size == rec->nbytes)
2868                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2869
2870                 /* Update the file extent hole info too */
2871                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2872                                            orphan->disk_len);
2873                 if (ret < 0)
2874                         goto out;
2875                 if (RB_EMPTY_ROOT(&rec->holes))
2876                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2877
2878                 list_del(&orphan->list);
2879                 free(orphan);
2880         }
2881         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2882 out:
2883         return ret;
2884 }
2885
2886 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2887                                         struct btrfs_root *root,
2888                                         struct btrfs_path *path,
2889                                         struct inode_record *rec)
2890 {
2891         struct rb_node *node;
2892         struct file_extent_hole *hole;
2893         int found = 0;
2894         int ret = 0;
2895
2896         node = rb_first(&rec->holes);
2897
2898         while (node) {
2899                 found = 1;
2900                 hole = rb_entry(node, struct file_extent_hole, node);
2901                 ret = btrfs_punch_hole(trans, root, rec->ino,
2902                                        hole->start, hole->len);
2903                 if (ret < 0)
2904                         goto out;
2905                 ret = del_file_extent_hole(&rec->holes, hole->start,
2906                                            hole->len);
2907                 if (ret < 0)
2908                         goto out;
2909                 if (RB_EMPTY_ROOT(&rec->holes))
2910                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2911                 node = rb_first(&rec->holes);
2912         }
2913         /* special case for a file losing all its file extent */
2914         if (!found) {
2915                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2916                                        round_up(rec->isize, root->sectorsize));
2917                 if (ret < 0)
2918                         goto out;
2919         }
2920         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2921                rec->ino, root->objectid);
2922 out:
2923         return ret;
2924 }
2925
2926 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2927 {
2928         struct btrfs_trans_handle *trans;
2929         struct btrfs_path *path;
2930         int ret = 0;
2931
2932         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2933                              I_ERR_NO_ORPHAN_ITEM |
2934                              I_ERR_LINK_COUNT_WRONG |
2935                              I_ERR_NO_INODE_ITEM |
2936                              I_ERR_FILE_EXTENT_ORPHAN |
2937                              I_ERR_FILE_EXTENT_DISCOUNT|
2938                              I_ERR_FILE_NBYTES_WRONG)))
2939                 return rec->errors;
2940
2941         path = btrfs_alloc_path();
2942         if (!path)
2943                 return -ENOMEM;
2944
2945         /*
2946          * For nlink repair, it may create a dir and add link, so
2947          * 2 for parent(256)'s dir_index and dir_item
2948          * 2 for lost+found dir's inode_item and inode_ref
2949          * 1 for the new inode_ref of the file
2950          * 2 for lost+found dir's dir_index and dir_item for the file
2951          */
2952         trans = btrfs_start_transaction(root, 7);
2953         if (IS_ERR(trans)) {
2954                 btrfs_free_path(path);
2955                 return PTR_ERR(trans);
2956         }
2957
2958         if (rec->errors & I_ERR_NO_INODE_ITEM)
2959                 ret = repair_inode_no_item(trans, root, path, rec);
2960         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2961                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2962         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2963                 ret = repair_inode_discount_extent(trans, root, path, rec);
2964         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2965                 ret = repair_inode_isize(trans, root, path, rec);
2966         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2967                 ret = repair_inode_orphan_item(trans, root, path, rec);
2968         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2969                 ret = repair_inode_nlinks(trans, root, path, rec);
2970         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2971                 ret = repair_inode_nbytes(trans, root, path, rec);
2972         btrfs_commit_transaction(trans, root);
2973         btrfs_free_path(path);
2974         return ret;
2975 }
2976
2977 static int check_inode_recs(struct btrfs_root *root,
2978                             struct cache_tree *inode_cache)
2979 {
2980         struct cache_extent *cache;
2981         struct ptr_node *node;
2982         struct inode_record *rec;
2983         struct inode_backref *backref;
2984         int stage = 0;
2985         int ret = 0;
2986         int err = 0;
2987         u64 error = 0;
2988         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2989
2990         if (btrfs_root_refs(&root->root_item) == 0) {
2991                 if (!cache_tree_empty(inode_cache))
2992                         fprintf(stderr, "warning line %d\n", __LINE__);
2993                 return 0;
2994         }
2995
2996         /*
2997          * We need to record the highest inode number for later 'lost+found'
2998          * dir creation.
2999          * We must select an ino not used/referred by any existing inode, or
3000          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3001          * this may cause 'lost+found' dir has wrong nlinks.
3002          */
3003         cache = last_cache_extent(inode_cache);
3004         if (cache) {
3005                 node = container_of(cache, struct ptr_node, cache);
3006                 rec = node->data;
3007                 if (rec->ino > root->highest_inode)
3008                         root->highest_inode = rec->ino;
3009         }
3010
3011         /*
3012          * We need to repair backrefs first because we could change some of the
3013          * errors in the inode recs.
3014          *
3015          * We also need to go through and delete invalid backrefs first and then
3016          * add the correct ones second.  We do this because we may get EEXIST
3017          * when adding back the correct index because we hadn't yet deleted the
3018          * invalid index.
3019          *
3020          * For example, if we were missing a dir index then the directories
3021          * isize would be wrong, so if we fixed the isize to what we thought it
3022          * would be and then fixed the backref we'd still have a invalid fs, so
3023          * we need to add back the dir index and then check to see if the isize
3024          * is still wrong.
3025          */
3026         while (stage < 3) {
3027                 stage++;
3028                 if (stage == 3 && !err)
3029                         break;
3030
3031                 cache = search_cache_extent(inode_cache, 0);
3032                 while (repair && cache) {
3033                         node = container_of(cache, struct ptr_node, cache);
3034                         rec = node->data;
3035                         cache = next_cache_extent(cache);
3036
3037                         /* Need to free everything up and rescan */
3038                         if (stage == 3) {
3039                                 remove_cache_extent(inode_cache, &node->cache);
3040                                 free(node);
3041                                 free_inode_rec(rec);
3042                                 continue;
3043                         }
3044
3045                         if (list_empty(&rec->backrefs))
3046                                 continue;
3047
3048                         ret = repair_inode_backrefs(root, rec, inode_cache,
3049                                                     stage == 1);
3050                         if (ret < 0) {
3051                                 err = ret;
3052                                 stage = 2;
3053                                 break;
3054                         } if (ret > 0) {
3055                                 err = -EAGAIN;
3056                         }
3057                 }
3058         }
3059         if (err)
3060                 return err;
3061
3062         rec = get_inode_rec(inode_cache, root_dirid, 0);
3063         BUG_ON(IS_ERR(rec));
3064         if (rec) {
3065                 ret = check_root_dir(rec);
3066                 if (ret) {
3067                         fprintf(stderr, "root %llu root dir %llu error\n",
3068                                 (unsigned long long)root->root_key.objectid,
3069                                 (unsigned long long)root_dirid);
3070                         print_inode_error(root, rec);
3071                         error++;
3072                 }
3073         } else {
3074                 if (repair) {
3075                         struct btrfs_trans_handle *trans;
3076
3077                         trans = btrfs_start_transaction(root, 1);
3078                         if (IS_ERR(trans)) {
3079                                 err = PTR_ERR(trans);
3080                                 return err;
3081                         }
3082
3083                         fprintf(stderr,
3084                                 "root %llu missing its root dir, recreating\n",
3085                                 (unsigned long long)root->objectid);
3086
3087                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3088                         BUG_ON(ret);
3089
3090                         btrfs_commit_transaction(trans, root);
3091                         return -EAGAIN;
3092                 }
3093
3094                 fprintf(stderr, "root %llu root dir %llu not found\n",
3095                         (unsigned long long)root->root_key.objectid,
3096                         (unsigned long long)root_dirid);
3097         }
3098
3099         while (1) {
3100                 cache = search_cache_extent(inode_cache, 0);
3101                 if (!cache)
3102                         break;
3103                 node = container_of(cache, struct ptr_node, cache);
3104                 rec = node->data;
3105                 remove_cache_extent(inode_cache, &node->cache);
3106                 free(node);
3107                 if (rec->ino == root_dirid ||
3108                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3109                         free_inode_rec(rec);
3110                         continue;
3111                 }
3112
3113                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3114                         ret = check_orphan_item(root, rec->ino);
3115                         if (ret == 0)
3116                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3117                         if (can_free_inode_rec(rec)) {
3118                                 free_inode_rec(rec);
3119                                 continue;
3120                         }
3121                 }
3122
3123                 if (!rec->found_inode_item)
3124                         rec->errors |= I_ERR_NO_INODE_ITEM;
3125                 if (rec->found_link != rec->nlink)
3126                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3127                 if (repair) {
3128                         ret = try_repair_inode(root, rec);
3129                         if (ret == 0 && can_free_inode_rec(rec)) {
3130                                 free_inode_rec(rec);
3131                                 continue;
3132                         }
3133                         ret = 0;
3134                 }
3135
3136                 if (!(repair && ret == 0))
3137                         error++;
3138                 print_inode_error(root, rec);
3139                 list_for_each_entry(backref, &rec->backrefs, list) {
3140                         if (!backref->found_dir_item)
3141                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3142                         if (!backref->found_dir_index)
3143                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3144                         if (!backref->found_inode_ref)
3145                                 backref->errors |= REF_ERR_NO_INODE_REF;
3146                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3147                                 " namelen %u name %s filetype %d errors %x",
3148                                 (unsigned long long)backref->dir,
3149                                 (unsigned long long)backref->index,
3150                                 backref->namelen, backref->name,
3151                                 backref->filetype, backref->errors);
3152                         print_ref_error(backref->errors);
3153                 }
3154                 free_inode_rec(rec);
3155         }
3156         return (error > 0) ? -1 : 0;
3157 }
3158
3159 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3160                                         u64 objectid)
3161 {
3162         struct cache_extent *cache;
3163         struct root_record *rec = NULL;
3164         int ret;
3165
3166         cache = lookup_cache_extent(root_cache, objectid, 1);
3167         if (cache) {
3168                 rec = container_of(cache, struct root_record, cache);
3169         } else {
3170                 rec = calloc(1, sizeof(*rec));
3171                 if (!rec)
3172                         return ERR_PTR(-ENOMEM);
3173                 rec->objectid = objectid;
3174                 INIT_LIST_HEAD(&rec->backrefs);
3175                 rec->cache.start = objectid;
3176                 rec->cache.size = 1;
3177
3178                 ret = insert_cache_extent(root_cache, &rec->cache);
3179                 if (ret)
3180                         return ERR_PTR(-EEXIST);
3181         }
3182         return rec;
3183 }
3184
3185 static struct root_backref *get_root_backref(struct root_record *rec,
3186                                              u64 ref_root, u64 dir, u64 index,
3187                                              const char *name, int namelen)
3188 {
3189         struct root_backref *backref;
3190
3191         list_for_each_entry(backref, &rec->backrefs, list) {
3192                 if (backref->ref_root != ref_root || backref->dir != dir ||
3193                     backref->namelen != namelen)
3194                         continue;
3195                 if (memcmp(name, backref->name, namelen))
3196                         continue;
3197                 return backref;
3198         }
3199
3200         backref = calloc(1, sizeof(*backref) + namelen + 1);
3201         if (!backref)
3202                 return NULL;
3203         backref->ref_root = ref_root;
3204         backref->dir = dir;
3205         backref->index = index;
3206         backref->namelen = namelen;
3207         memcpy(backref->name, name, namelen);
3208         backref->name[namelen] = '\0';
3209         list_add_tail(&backref->list, &rec->backrefs);
3210         return backref;
3211 }
3212
3213 static void free_root_record(struct cache_extent *cache)
3214 {
3215         struct root_record *rec;
3216         struct root_backref *backref;
3217
3218         rec = container_of(cache, struct root_record, cache);
3219         while (!list_empty(&rec->backrefs)) {
3220                 backref = to_root_backref(rec->backrefs.next);
3221                 list_del(&backref->list);
3222                 free(backref);
3223         }
3224
3225         free(rec);
3226 }
3227
3228 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3229
3230 static int add_root_backref(struct cache_tree *root_cache,
3231                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3232                             const char *name, int namelen,
3233                             int item_type, int errors)
3234 {
3235         struct root_record *rec;
3236         struct root_backref *backref;
3237
3238         rec = get_root_rec(root_cache, root_id);
3239         BUG_ON(IS_ERR(rec));
3240         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3241         BUG_ON(!backref);
3242
3243         backref->errors |= errors;
3244
3245         if (item_type != BTRFS_DIR_ITEM_KEY) {
3246                 if (backref->found_dir_index || backref->found_back_ref ||
3247                     backref->found_forward_ref) {
3248                         if (backref->index != index)
3249                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3250                 } else {
3251                         backref->index = index;
3252                 }
3253         }
3254
3255         if (item_type == BTRFS_DIR_ITEM_KEY) {
3256                 if (backref->found_forward_ref)
3257                         rec->found_ref++;
3258                 backref->found_dir_item = 1;
3259         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3260                 backref->found_dir_index = 1;
3261         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3262                 if (backref->found_forward_ref)
3263                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3264                 else if (backref->found_dir_item)
3265                         rec->found_ref++;
3266                 backref->found_forward_ref = 1;
3267         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3268                 if (backref->found_back_ref)
3269                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3270                 backref->found_back_ref = 1;
3271         } else {
3272                 BUG_ON(1);
3273         }
3274
3275         if (backref->found_forward_ref && backref->found_dir_item)
3276                 backref->reachable = 1;
3277         return 0;
3278 }
3279
3280 static int merge_root_recs(struct btrfs_root *root,
3281                            struct cache_tree *src_cache,
3282                            struct cache_tree *dst_cache)
3283 {
3284         struct cache_extent *cache;
3285         struct ptr_node *node;
3286         struct inode_record *rec;
3287         struct inode_backref *backref;
3288         int ret = 0;
3289
3290         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3291                 free_inode_recs_tree(src_cache);
3292                 return 0;
3293         }
3294
3295         while (1) {
3296                 cache = search_cache_extent(src_cache, 0);
3297                 if (!cache)
3298                         break;
3299                 node = container_of(cache, struct ptr_node, cache);
3300                 rec = node->data;
3301                 remove_cache_extent(src_cache, &node->cache);
3302                 free(node);
3303
3304                 ret = is_child_root(root, root->objectid, rec->ino);
3305                 if (ret < 0)
3306                         break;
3307                 else if (ret == 0)
3308                         goto skip;
3309
3310                 list_for_each_entry(backref, &rec->backrefs, list) {
3311                         BUG_ON(backref->found_inode_ref);
3312                         if (backref->found_dir_item)
3313                                 add_root_backref(dst_cache, rec->ino,
3314                                         root->root_key.objectid, backref->dir,
3315                                         backref->index, backref->name,
3316                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3317                                         backref->errors);
3318                         if (backref->found_dir_index)
3319                                 add_root_backref(dst_cache, rec->ino,
3320                                         root->root_key.objectid, backref->dir,
3321                                         backref->index, backref->name,
3322                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3323                                         backref->errors);
3324                 }
3325 skip:
3326                 free_inode_rec(rec);
3327         }
3328         if (ret < 0)
3329                 return ret;
3330         return 0;
3331 }
3332
3333 static int check_root_refs(struct btrfs_root *root,
3334                            struct cache_tree *root_cache)
3335 {
3336         struct root_record *rec;
3337         struct root_record *ref_root;
3338         struct root_backref *backref;
3339         struct cache_extent *cache;
3340         int loop = 1;
3341         int ret;
3342         int error;
3343         int errors = 0;
3344
3345         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3346         BUG_ON(IS_ERR(rec));
3347         rec->found_ref = 1;
3348
3349         /* fixme: this can not detect circular references */
3350         while (loop) {
3351                 loop = 0;
3352                 cache = search_cache_extent(root_cache, 0);
3353                 while (1) {
3354                         if (!cache)
3355                                 break;
3356                         rec = container_of(cache, struct root_record, cache);
3357                         cache = next_cache_extent(cache);
3358
3359                         if (rec->found_ref == 0)
3360                                 continue;
3361
3362                         list_for_each_entry(backref, &rec->backrefs, list) {
3363                                 if (!backref->reachable)
3364                                         continue;
3365
3366                                 ref_root = get_root_rec(root_cache,
3367                                                         backref->ref_root);
3368                                 BUG_ON(IS_ERR(ref_root));
3369                                 if (ref_root->found_ref > 0)
3370                                         continue;
3371
3372                                 backref->reachable = 0;
3373                                 rec->found_ref--;
3374                                 if (rec->found_ref == 0)
3375                                         loop = 1;
3376                         }
3377                 }
3378         }
3379
3380         cache = search_cache_extent(root_cache, 0);
3381         while (1) {
3382                 if (!cache)
3383                         break;
3384                 rec = container_of(cache, struct root_record, cache);
3385                 cache = next_cache_extent(cache);
3386
3387                 if (rec->found_ref == 0 &&
3388                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3389                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3390                         ret = check_orphan_item(root->fs_info->tree_root,
3391                                                 rec->objectid);
3392                         if (ret == 0)
3393                                 continue;
3394
3395                         /*
3396                          * If we don't have a root item then we likely just have
3397                          * a dir item in a snapshot for this root but no actual
3398                          * ref key or anything so it's meaningless.
3399                          */
3400                         if (!rec->found_root_item)
3401                                 continue;
3402                         errors++;
3403                         fprintf(stderr, "fs tree %llu not referenced\n",
3404                                 (unsigned long long)rec->objectid);
3405                 }
3406
3407                 error = 0;
3408                 if (rec->found_ref > 0 && !rec->found_root_item)
3409                         error = 1;
3410                 list_for_each_entry(backref, &rec->backrefs, list) {
3411                         if (!backref->found_dir_item)
3412                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3413                         if (!backref->found_dir_index)
3414                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3415                         if (!backref->found_back_ref)
3416                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3417                         if (!backref->found_forward_ref)
3418                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3419                         if (backref->reachable && backref->errors)
3420                                 error = 1;
3421                 }
3422                 if (!error)
3423                         continue;
3424
3425                 errors++;
3426                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3427                         (unsigned long long)rec->objectid, rec->found_ref,
3428                          rec->found_root_item ? "" : "not found");
3429
3430                 list_for_each_entry(backref, &rec->backrefs, list) {
3431                         if (!backref->reachable)
3432                                 continue;
3433                         if (!backref->errors && rec->found_root_item)
3434                                 continue;
3435                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3436                                 " index %llu namelen %u name %s errors %x\n",
3437                                 (unsigned long long)backref->ref_root,
3438                                 (unsigned long long)backref->dir,
3439                                 (unsigned long long)backref->index,
3440                                 backref->namelen, backref->name,
3441                                 backref->errors);
3442                         print_ref_error(backref->errors);
3443                 }
3444         }
3445         return errors > 0 ? 1 : 0;
3446 }
3447
3448 static int process_root_ref(struct extent_buffer *eb, int slot,
3449                             struct btrfs_key *key,
3450                             struct cache_tree *root_cache)
3451 {
3452         u64 dirid;
3453         u64 index;
3454         u32 len;
3455         u32 name_len;
3456         struct btrfs_root_ref *ref;
3457         char namebuf[BTRFS_NAME_LEN];
3458         int error;
3459
3460         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3461
3462         dirid = btrfs_root_ref_dirid(eb, ref);
3463         index = btrfs_root_ref_sequence(eb, ref);
3464         name_len = btrfs_root_ref_name_len(eb, ref);
3465
3466         if (name_len <= BTRFS_NAME_LEN) {
3467                 len = name_len;
3468                 error = 0;
3469         } else {
3470                 len = BTRFS_NAME_LEN;
3471                 error = REF_ERR_NAME_TOO_LONG;
3472         }
3473         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3474
3475         if (key->type == BTRFS_ROOT_REF_KEY) {
3476                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3477                                  index, namebuf, len, key->type, error);
3478         } else {
3479                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3480                                  index, namebuf, len, key->type, error);
3481         }
3482         return 0;
3483 }
3484
3485 static void free_corrupt_block(struct cache_extent *cache)
3486 {
3487         struct btrfs_corrupt_block *corrupt;
3488
3489         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3490         free(corrupt);
3491 }
3492
3493 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3494
3495 /*
3496  * Repair the btree of the given root.
3497  *
3498  * The fix is to remove the node key in corrupt_blocks cache_tree.
3499  * and rebalance the tree.
3500  * After the fix, the btree should be writeable.
3501  */
3502 static int repair_btree(struct btrfs_root *root,
3503                         struct cache_tree *corrupt_blocks)
3504 {
3505         struct btrfs_trans_handle *trans;
3506         struct btrfs_path *path;
3507         struct btrfs_corrupt_block *corrupt;
3508         struct cache_extent *cache;
3509         struct btrfs_key key;
3510         u64 offset;
3511         int level;
3512         int ret = 0;
3513
3514         if (cache_tree_empty(corrupt_blocks))
3515                 return 0;
3516
3517         path = btrfs_alloc_path();
3518         if (!path)
3519                 return -ENOMEM;
3520
3521         trans = btrfs_start_transaction(root, 1);
3522         if (IS_ERR(trans)) {
3523                 ret = PTR_ERR(trans);
3524                 fprintf(stderr, "Error starting transaction: %s\n",
3525                         strerror(-ret));
3526                 goto out_free_path;
3527         }
3528         cache = first_cache_extent(corrupt_blocks);
3529         while (cache) {
3530                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3531                                        cache);
3532                 level = corrupt->level;
3533                 path->lowest_level = level;
3534                 key.objectid = corrupt->key.objectid;
3535                 key.type = corrupt->key.type;
3536                 key.offset = corrupt->key.offset;
3537
3538                 /*
3539                  * Here we don't want to do any tree balance, since it may
3540                  * cause a balance with corrupted brother leaf/node,
3541                  * so ins_len set to 0 here.
3542                  * Balance will be done after all corrupt node/leaf is deleted.
3543                  */
3544                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3545                 if (ret < 0)
3546                         goto out;
3547                 offset = btrfs_node_blockptr(path->nodes[level],
3548                                              path->slots[level]);
3549
3550                 /* Remove the ptr */
3551                 ret = btrfs_del_ptr(trans, root, path, level,
3552                                     path->slots[level]);
3553                 if (ret < 0)
3554                         goto out;
3555                 /*
3556                  * Remove the corresponding extent
3557                  * return value is not concerned.
3558                  */
3559                 btrfs_release_path(path);
3560                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3561                                         0, root->root_key.objectid,
3562                                         level - 1, 0);
3563                 cache = next_cache_extent(cache);
3564         }
3565
3566         /* Balance the btree using btrfs_search_slot() */
3567         cache = first_cache_extent(corrupt_blocks);
3568         while (cache) {
3569                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3570                                        cache);
3571                 memcpy(&key, &corrupt->key, sizeof(key));
3572                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3573                 if (ret < 0)
3574                         goto out;
3575                 /* return will always >0 since it won't find the item */
3576                 ret = 0;
3577                 btrfs_release_path(path);
3578                 cache = next_cache_extent(cache);
3579         }
3580 out:
3581         btrfs_commit_transaction(trans, root);
3582 out_free_path:
3583         btrfs_free_path(path);
3584         return ret;
3585 }
3586
3587 static int check_fs_root(struct btrfs_root *root,
3588                          struct cache_tree *root_cache,
3589                          struct walk_control *wc)
3590 {
3591         int ret = 0;
3592         int err = 0;
3593         int wret;
3594         int level;
3595         struct btrfs_path path;
3596         struct shared_node root_node;
3597         struct root_record *rec;
3598         struct btrfs_root_item *root_item = &root->root_item;
3599         struct cache_tree corrupt_blocks;
3600         struct orphan_data_extent *orphan;
3601         struct orphan_data_extent *tmp;
3602         enum btrfs_tree_block_status status;
3603         struct node_refs nrefs;
3604
3605         /*
3606          * Reuse the corrupt_block cache tree to record corrupted tree block
3607          *
3608          * Unlike the usage in extent tree check, here we do it in a per
3609          * fs/subvol tree base.
3610          */
3611         cache_tree_init(&corrupt_blocks);
3612         root->fs_info->corrupt_blocks = &corrupt_blocks;
3613
3614         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3615                 rec = get_root_rec(root_cache, root->root_key.objectid);
3616                 BUG_ON(IS_ERR(rec));
3617                 if (btrfs_root_refs(root_item) > 0)
3618                         rec->found_root_item = 1;
3619         }
3620
3621         btrfs_init_path(&path);
3622         memset(&root_node, 0, sizeof(root_node));
3623         cache_tree_init(&root_node.root_cache);
3624         cache_tree_init(&root_node.inode_cache);
3625         memset(&nrefs, 0, sizeof(nrefs));
3626
3627         /* Move the orphan extent record to corresponding inode_record */
3628         list_for_each_entry_safe(orphan, tmp,
3629                                  &root->orphan_data_extents, list) {
3630                 struct inode_record *inode;
3631
3632                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3633                                       1);
3634                 BUG_ON(IS_ERR(inode));
3635                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3636                 list_move(&orphan->list, &inode->orphan_extents);
3637         }
3638
3639         level = btrfs_header_level(root->node);
3640         memset(wc->nodes, 0, sizeof(wc->nodes));
3641         wc->nodes[level] = &root_node;
3642         wc->active_node = level;
3643         wc->root_level = level;
3644
3645         /* We may not have checked the root block, lets do that now */
3646         if (btrfs_is_leaf(root->node))
3647                 status = btrfs_check_leaf(root, NULL, root->node);
3648         else
3649                 status = btrfs_check_node(root, NULL, root->node);
3650         if (status != BTRFS_TREE_BLOCK_CLEAN)
3651                 return -EIO;
3652
3653         if (btrfs_root_refs(root_item) > 0 ||
3654             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3655                 path.nodes[level] = root->node;
3656                 extent_buffer_get(root->node);
3657                 path.slots[level] = 0;
3658         } else {
3659                 struct btrfs_key key;
3660                 struct btrfs_disk_key found_key;
3661
3662                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3663                 level = root_item->drop_level;
3664                 path.lowest_level = level;
3665                 if (level > btrfs_header_level(root->node) ||
3666                     level >= BTRFS_MAX_LEVEL) {
3667                         error("ignoring invalid drop level: %u", level);
3668                         goto skip_walking;
3669                 }
3670                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3671                 if (wret < 0)
3672                         goto skip_walking;
3673                 btrfs_node_key(path.nodes[level], &found_key,
3674                                 path.slots[level]);
3675                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3676                                         sizeof(found_key)));
3677         }
3678
3679         while (1) {
3680                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3681                 if (wret < 0)
3682                         ret = wret;
3683                 if (wret != 0)
3684                         break;
3685
3686                 wret = walk_up_tree(root, &path, wc, &level);
3687                 if (wret < 0)
3688                         ret = wret;
3689                 if (wret != 0)
3690                         break;
3691         }
3692 skip_walking:
3693         btrfs_release_path(&path);
3694
3695         if (!cache_tree_empty(&corrupt_blocks)) {
3696                 struct cache_extent *cache;
3697                 struct btrfs_corrupt_block *corrupt;
3698
3699                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3700                        root->root_key.objectid);
3701                 cache = first_cache_extent(&corrupt_blocks);
3702                 while (cache) {
3703                         corrupt = container_of(cache,
3704                                                struct btrfs_corrupt_block,
3705                                                cache);
3706                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3707                                cache->start, corrupt->level,
3708                                corrupt->key.objectid, corrupt->key.type,
3709                                corrupt->key.offset);
3710                         cache = next_cache_extent(cache);
3711                 }
3712                 if (repair) {
3713                         printf("Try to repair the btree for root %llu\n",
3714                                root->root_key.objectid);
3715                         ret = repair_btree(root, &corrupt_blocks);
3716                         if (ret < 0)
3717                                 fprintf(stderr, "Failed to repair btree: %s\n",
3718                                         strerror(-ret));
3719                         if (!ret)
3720                                 printf("Btree for root %llu is fixed\n",
3721                                        root->root_key.objectid);
3722                 }
3723         }
3724
3725         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3726         if (err < 0)
3727                 ret = err;
3728
3729         if (root_node.current) {
3730                 root_node.current->checked = 1;
3731                 maybe_free_inode_rec(&root_node.inode_cache,
3732                                 root_node.current);
3733         }
3734
3735         err = check_inode_recs(root, &root_node.inode_cache);
3736         if (!ret)
3737                 ret = err;
3738
3739         free_corrupt_blocks_tree(&corrupt_blocks);
3740         root->fs_info->corrupt_blocks = NULL;
3741         free_orphan_data_extents(&root->orphan_data_extents);
3742         return ret;
3743 }
3744
3745 static int fs_root_objectid(u64 objectid)
3746 {
3747         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3748             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3749                 return 1;
3750         return is_fstree(objectid);
3751 }
3752
3753 static int check_fs_roots(struct btrfs_root *root,
3754                           struct cache_tree *root_cache)
3755 {
3756         struct btrfs_path path;
3757         struct btrfs_key key;
3758         struct walk_control wc;
3759         struct extent_buffer *leaf, *tree_node;
3760         struct btrfs_root *tmp_root;
3761         struct btrfs_root *tree_root = root->fs_info->tree_root;
3762         int ret;
3763         int err = 0;
3764
3765         if (ctx.progress_enabled) {
3766                 ctx.tp = TASK_FS_ROOTS;
3767                 task_start(ctx.info);
3768         }
3769
3770         /*
3771          * Just in case we made any changes to the extent tree that weren't
3772          * reflected into the free space cache yet.
3773          */
3774         if (repair)
3775                 reset_cached_block_groups(root->fs_info);
3776         memset(&wc, 0, sizeof(wc));
3777         cache_tree_init(&wc.shared);
3778         btrfs_init_path(&path);
3779
3780 again:
3781         key.offset = 0;
3782         key.objectid = 0;
3783         key.type = BTRFS_ROOT_ITEM_KEY;
3784         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3785         if (ret < 0) {
3786                 err = 1;
3787                 goto out;
3788         }
3789         tree_node = tree_root->node;
3790         while (1) {
3791                 if (tree_node != tree_root->node) {
3792                         free_root_recs_tree(root_cache);
3793                         btrfs_release_path(&path);
3794                         goto again;
3795                 }
3796                 leaf = path.nodes[0];
3797                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3798                         ret = btrfs_next_leaf(tree_root, &path);
3799                         if (ret) {
3800                                 if (ret < 0)
3801                                         err = 1;
3802                                 break;
3803                         }
3804                         leaf = path.nodes[0];
3805                 }
3806                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3807                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3808                     fs_root_objectid(key.objectid)) {
3809                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3810                                 tmp_root = btrfs_read_fs_root_no_cache(
3811                                                 root->fs_info, &key);
3812                         } else {
3813                                 key.offset = (u64)-1;
3814                                 tmp_root = btrfs_read_fs_root(
3815                                                 root->fs_info, &key);
3816                         }
3817                         if (IS_ERR(tmp_root)) {
3818                                 err = 1;
3819                                 goto next;
3820                         }
3821                         ret = check_fs_root(tmp_root, root_cache, &wc);
3822                         if (ret == -EAGAIN) {
3823                                 free_root_recs_tree(root_cache);
3824                                 btrfs_release_path(&path);
3825                                 goto again;
3826                         }
3827                         if (ret)
3828                                 err = 1;
3829                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3830                                 btrfs_free_fs_root(tmp_root);
3831                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3832                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3833                         process_root_ref(leaf, path.slots[0], &key,
3834                                          root_cache);
3835                 }
3836 next:
3837                 path.slots[0]++;
3838         }
3839 out:
3840         btrfs_release_path(&path);
3841         if (err)
3842                 free_extent_cache_tree(&wc.shared);
3843         if (!cache_tree_empty(&wc.shared))
3844                 fprintf(stderr, "warning line %d\n", __LINE__);
3845
3846         task_stop(ctx.info);
3847
3848         return err;
3849 }
3850
3851 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3852 {
3853         struct list_head *cur = rec->backrefs.next;
3854         struct extent_backref *back;
3855         struct tree_backref *tback;
3856         struct data_backref *dback;
3857         u64 found = 0;
3858         int err = 0;
3859
3860         while(cur != &rec->backrefs) {
3861                 back = to_extent_backref(cur);
3862                 cur = cur->next;
3863                 if (!back->found_extent_tree) {
3864                         err = 1;
3865                         if (!print_errs)
3866                                 goto out;
3867                         if (back->is_data) {
3868                                 dback = to_data_backref(back);
3869                                 fprintf(stderr, "Backref %llu %s %llu"
3870                                         " owner %llu offset %llu num_refs %lu"
3871                                         " not found in extent tree\n",
3872                                         (unsigned long long)rec->start,
3873                                         back->full_backref ?
3874                                         "parent" : "root",
3875                                         back->full_backref ?
3876                                         (unsigned long long)dback->parent:
3877                                         (unsigned long long)dback->root,
3878                                         (unsigned long long)dback->owner,
3879                                         (unsigned long long)dback->offset,
3880                                         (unsigned long)dback->num_refs);
3881                         } else {
3882                                 tback = to_tree_backref(back);
3883                                 fprintf(stderr, "Backref %llu parent %llu"
3884                                         " root %llu not found in extent tree\n",
3885                                         (unsigned long long)rec->start,
3886                                         (unsigned long long)tback->parent,
3887                                         (unsigned long long)tback->root);
3888                         }
3889                 }
3890                 if (!back->is_data && !back->found_ref) {
3891                         err = 1;
3892                         if (!print_errs)
3893                                 goto out;
3894                         tback = to_tree_backref(back);
3895                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3896                                 (unsigned long long)rec->start,
3897                                 back->full_backref ? "parent" : "root",
3898                                 back->full_backref ?
3899                                 (unsigned long long)tback->parent :
3900                                 (unsigned long long)tback->root, back);
3901                 }
3902                 if (back->is_data) {
3903                         dback = to_data_backref(back);
3904                         if (dback->found_ref != dback->num_refs) {
3905                                 err = 1;
3906                                 if (!print_errs)
3907                                         goto out;
3908                                 fprintf(stderr, "Incorrect local backref count"
3909                                         " on %llu %s %llu owner %llu"
3910                                         " offset %llu found %u wanted %u back %p\n",
3911                                         (unsigned long long)rec->start,
3912                                         back->full_backref ?
3913                                         "parent" : "root",
3914                                         back->full_backref ?
3915                                         (unsigned long long)dback->parent:
3916                                         (unsigned long long)dback->root,
3917                                         (unsigned long long)dback->owner,
3918                                         (unsigned long long)dback->offset,
3919                                         dback->found_ref, dback->num_refs, back);
3920                         }
3921                         if (dback->disk_bytenr != rec->start) {
3922                                 err = 1;
3923                                 if (!print_errs)
3924                                         goto out;
3925                                 fprintf(stderr, "Backref disk bytenr does not"
3926                                         " match extent record, bytenr=%llu, "
3927                                         "ref bytenr=%llu\n",
3928                                         (unsigned long long)rec->start,
3929                                         (unsigned long long)dback->disk_bytenr);
3930                         }
3931
3932                         if (dback->bytes != rec->nr) {
3933                                 err = 1;
3934                                 if (!print_errs)
3935                                         goto out;
3936                                 fprintf(stderr, "Backref bytes do not match "
3937                                         "extent backref, bytenr=%llu, ref "
3938                                         "bytes=%llu, backref bytes=%llu\n",
3939                                         (unsigned long long)rec->start,
3940                                         (unsigned long long)rec->nr,
3941                                         (unsigned long long)dback->bytes);
3942                         }
3943                 }
3944                 if (!back->is_data) {
3945                         found += 1;
3946                 } else {
3947                         dback = to_data_backref(back);
3948                         found += dback->found_ref;
3949                 }
3950         }
3951         if (found != rec->refs) {
3952                 err = 1;
3953                 if (!print_errs)
3954                         goto out;
3955                 fprintf(stderr, "Incorrect global backref count "
3956                         "on %llu found %llu wanted %llu\n",
3957                         (unsigned long long)rec->start,
3958                         (unsigned long long)found,
3959                         (unsigned long long)rec->refs);
3960         }
3961 out:
3962         return err;
3963 }
3964
3965 static int free_all_extent_backrefs(struct extent_record *rec)
3966 {
3967         struct extent_backref *back;
3968         struct list_head *cur;
3969         while (!list_empty(&rec->backrefs)) {
3970                 cur = rec->backrefs.next;
3971                 back = to_extent_backref(cur);
3972                 list_del(cur);
3973                 free(back);
3974         }
3975         return 0;
3976 }
3977
3978 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3979                                      struct cache_tree *extent_cache)
3980 {
3981         struct cache_extent *cache;
3982         struct extent_record *rec;
3983
3984         while (1) {
3985                 cache = first_cache_extent(extent_cache);
3986                 if (!cache)
3987                         break;
3988                 rec = container_of(cache, struct extent_record, cache);
3989                 remove_cache_extent(extent_cache, cache);
3990                 free_all_extent_backrefs(rec);
3991                 free(rec);
3992         }
3993 }
3994
3995 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3996                                  struct extent_record *rec)
3997 {
3998         if (rec->content_checked && rec->owner_ref_checked &&
3999             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4000             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4001             !rec->bad_full_backref && !rec->crossing_stripes &&
4002             !rec->wrong_chunk_type) {
4003                 remove_cache_extent(extent_cache, &rec->cache);
4004                 free_all_extent_backrefs(rec);
4005                 list_del_init(&rec->list);
4006                 free(rec);
4007         }
4008         return 0;
4009 }
4010
4011 static int check_owner_ref(struct btrfs_root *root,
4012                             struct extent_record *rec,
4013                             struct extent_buffer *buf)
4014 {
4015         struct extent_backref *node;
4016         struct tree_backref *back;
4017         struct btrfs_root *ref_root;
4018         struct btrfs_key key;
4019         struct btrfs_path path;
4020         struct extent_buffer *parent;
4021         int level;
4022         int found = 0;
4023         int ret;
4024
4025         list_for_each_entry(node, &rec->backrefs, list) {
4026                 if (node->is_data)
4027                         continue;
4028                 if (!node->found_ref)
4029                         continue;
4030                 if (node->full_backref)
4031                         continue;
4032                 back = to_tree_backref(node);
4033                 if (btrfs_header_owner(buf) == back->root)
4034                         return 0;
4035         }
4036         BUG_ON(rec->is_root);
4037
4038         /* try to find the block by search corresponding fs tree */
4039         key.objectid = btrfs_header_owner(buf);
4040         key.type = BTRFS_ROOT_ITEM_KEY;
4041         key.offset = (u64)-1;
4042
4043         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4044         if (IS_ERR(ref_root))
4045                 return 1;
4046
4047         level = btrfs_header_level(buf);
4048         if (level == 0)
4049                 btrfs_item_key_to_cpu(buf, &key, 0);
4050         else
4051                 btrfs_node_key_to_cpu(buf, &key, 0);
4052
4053         btrfs_init_path(&path);
4054         path.lowest_level = level + 1;
4055         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4056         if (ret < 0)
4057                 return 0;
4058
4059         parent = path.nodes[level + 1];
4060         if (parent && buf->start == btrfs_node_blockptr(parent,
4061                                                         path.slots[level + 1]))
4062                 found = 1;
4063
4064         btrfs_release_path(&path);
4065         return found ? 0 : 1;
4066 }
4067
4068 static int is_extent_tree_record(struct extent_record *rec)
4069 {
4070         struct list_head *cur = rec->backrefs.next;
4071         struct extent_backref *node;
4072         struct tree_backref *back;
4073         int is_extent = 0;
4074
4075         while(cur != &rec->backrefs) {
4076                 node = to_extent_backref(cur);
4077                 cur = cur->next;
4078                 if (node->is_data)
4079                         return 0;
4080                 back = to_tree_backref(node);
4081                 if (node->full_backref)
4082                         return 0;
4083                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4084                         is_extent = 1;
4085         }
4086         return is_extent;
4087 }
4088
4089
4090 static int record_bad_block_io(struct btrfs_fs_info *info,
4091                                struct cache_tree *extent_cache,
4092                                u64 start, u64 len)
4093 {
4094         struct extent_record *rec;
4095         struct cache_extent *cache;
4096         struct btrfs_key key;
4097
4098         cache = lookup_cache_extent(extent_cache, start, len);
4099         if (!cache)
4100                 return 0;
4101
4102         rec = container_of(cache, struct extent_record, cache);
4103         if (!is_extent_tree_record(rec))
4104                 return 0;
4105
4106         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4107         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4108 }
4109
4110 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4111                        struct extent_buffer *buf, int slot)
4112 {
4113         if (btrfs_header_level(buf)) {
4114                 struct btrfs_key_ptr ptr1, ptr2;
4115
4116                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4117                                    sizeof(struct btrfs_key_ptr));
4118                 read_extent_buffer(buf, &ptr2,
4119                                    btrfs_node_key_ptr_offset(slot + 1),
4120                                    sizeof(struct btrfs_key_ptr));
4121                 write_extent_buffer(buf, &ptr1,
4122                                     btrfs_node_key_ptr_offset(slot + 1),
4123                                     sizeof(struct btrfs_key_ptr));
4124                 write_extent_buffer(buf, &ptr2,
4125                                     btrfs_node_key_ptr_offset(slot),
4126                                     sizeof(struct btrfs_key_ptr));
4127                 if (slot == 0) {
4128                         struct btrfs_disk_key key;
4129                         btrfs_node_key(buf, &key, 0);
4130                         btrfs_fixup_low_keys(root, path, &key,
4131                                              btrfs_header_level(buf) + 1);
4132                 }
4133         } else {
4134                 struct btrfs_item *item1, *item2;
4135                 struct btrfs_key k1, k2;
4136                 char *item1_data, *item2_data;
4137                 u32 item1_offset, item2_offset, item1_size, item2_size;
4138
4139                 item1 = btrfs_item_nr(slot);
4140                 item2 = btrfs_item_nr(slot + 1);
4141                 btrfs_item_key_to_cpu(buf, &k1, slot);
4142                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4143                 item1_offset = btrfs_item_offset(buf, item1);
4144                 item2_offset = btrfs_item_offset(buf, item2);
4145                 item1_size = btrfs_item_size(buf, item1);
4146                 item2_size = btrfs_item_size(buf, item2);
4147
4148                 item1_data = malloc(item1_size);
4149                 if (!item1_data)
4150                         return -ENOMEM;
4151                 item2_data = malloc(item2_size);
4152                 if (!item2_data) {
4153                         free(item1_data);
4154                         return -ENOMEM;
4155                 }
4156
4157                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4158                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4159
4160                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4161                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4162                 free(item1_data);
4163                 free(item2_data);
4164
4165                 btrfs_set_item_offset(buf, item1, item2_offset);
4166                 btrfs_set_item_offset(buf, item2, item1_offset);
4167                 btrfs_set_item_size(buf, item1, item2_size);
4168                 btrfs_set_item_size(buf, item2, item1_size);
4169
4170                 path->slots[0] = slot;
4171                 btrfs_set_item_key_unsafe(root, path, &k2);
4172                 path->slots[0] = slot + 1;
4173                 btrfs_set_item_key_unsafe(root, path, &k1);
4174         }
4175         return 0;
4176 }
4177
4178 static int fix_key_order(struct btrfs_trans_handle *trans,
4179                          struct btrfs_root *root,
4180                          struct btrfs_path *path)
4181 {
4182         struct extent_buffer *buf;
4183         struct btrfs_key k1, k2;
4184         int i;
4185         int level = path->lowest_level;
4186         int ret = -EIO;
4187
4188         buf = path->nodes[level];
4189         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4190                 if (level) {
4191                         btrfs_node_key_to_cpu(buf, &k1, i);
4192                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4193                 } else {
4194                         btrfs_item_key_to_cpu(buf, &k1, i);
4195                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4196                 }
4197                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4198                         continue;
4199                 ret = swap_values(root, path, buf, i);
4200                 if (ret)
4201                         break;
4202                 btrfs_mark_buffer_dirty(buf);
4203                 i = 0;
4204         }
4205         return ret;
4206 }
4207
4208 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4209                              struct btrfs_root *root,
4210                              struct btrfs_path *path,
4211                              struct extent_buffer *buf, int slot)
4212 {
4213         struct btrfs_key key;
4214         int nritems = btrfs_header_nritems(buf);
4215
4216         btrfs_item_key_to_cpu(buf, &key, slot);
4217
4218         /* These are all the keys we can deal with missing. */
4219         if (key.type != BTRFS_DIR_INDEX_KEY &&
4220             key.type != BTRFS_EXTENT_ITEM_KEY &&
4221             key.type != BTRFS_METADATA_ITEM_KEY &&
4222             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4223             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4224                 return -1;
4225
4226         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4227                (unsigned long long)key.objectid, key.type,
4228                (unsigned long long)key.offset, slot, buf->start);
4229         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4230                               btrfs_item_nr_offset(slot + 1),
4231                               sizeof(struct btrfs_item) *
4232                               (nritems - slot - 1));
4233         btrfs_set_header_nritems(buf, nritems - 1);
4234         if (slot == 0) {
4235                 struct btrfs_disk_key disk_key;
4236
4237                 btrfs_item_key(buf, &disk_key, 0);
4238                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4239         }
4240         btrfs_mark_buffer_dirty(buf);
4241         return 0;
4242 }
4243
4244 static int fix_item_offset(struct btrfs_trans_handle *trans,
4245                            struct btrfs_root *root,
4246                            struct btrfs_path *path)
4247 {
4248         struct extent_buffer *buf;
4249         int i;
4250         int ret = 0;
4251
4252         /* We should only get this for leaves */
4253         BUG_ON(path->lowest_level);
4254         buf = path->nodes[0];
4255 again:
4256         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4257                 unsigned int shift = 0, offset;
4258
4259                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4260                     BTRFS_LEAF_DATA_SIZE(root)) {
4261                         if (btrfs_item_end_nr(buf, i) >
4262                             BTRFS_LEAF_DATA_SIZE(root)) {
4263                                 ret = delete_bogus_item(trans, root, path,
4264                                                         buf, i);
4265                                 if (!ret)
4266                                         goto again;
4267                                 fprintf(stderr, "item is off the end of the "
4268                                         "leaf, can't fix\n");
4269                                 ret = -EIO;
4270                                 break;
4271                         }
4272                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4273                                 btrfs_item_end_nr(buf, i);
4274                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4275                            btrfs_item_offset_nr(buf, i - 1)) {
4276                         if (btrfs_item_end_nr(buf, i) >
4277                             btrfs_item_offset_nr(buf, i - 1)) {
4278                                 ret = delete_bogus_item(trans, root, path,
4279                                                         buf, i);
4280                                 if (!ret)
4281                                         goto again;
4282                                 fprintf(stderr, "items overlap, can't fix\n");
4283                                 ret = -EIO;
4284                                 break;
4285                         }
4286                         shift = btrfs_item_offset_nr(buf, i - 1) -
4287                                 btrfs_item_end_nr(buf, i);
4288                 }
4289                 if (!shift)
4290                         continue;
4291
4292                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4293                        i, shift, (unsigned long long)buf->start);
4294                 offset = btrfs_item_offset_nr(buf, i);
4295                 memmove_extent_buffer(buf,
4296                                       btrfs_leaf_data(buf) + offset + shift,
4297                                       btrfs_leaf_data(buf) + offset,
4298                                       btrfs_item_size_nr(buf, i));
4299                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4300                                       offset + shift);
4301                 btrfs_mark_buffer_dirty(buf);
4302         }
4303
4304         /*
4305          * We may have moved things, in which case we want to exit so we don't
4306          * write those changes out.  Once we have proper abort functionality in
4307          * progs this can be changed to something nicer.
4308          */
4309         BUG_ON(ret);
4310         return ret;
4311 }
4312
4313 /*
4314  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4315  * then just return -EIO.
4316  */
4317 static int try_to_fix_bad_block(struct btrfs_root *root,
4318                                 struct extent_buffer *buf,
4319                                 enum btrfs_tree_block_status status)
4320 {
4321         struct btrfs_trans_handle *trans;
4322         struct ulist *roots;
4323         struct ulist_node *node;
4324         struct btrfs_root *search_root;
4325         struct btrfs_path *path;
4326         struct ulist_iterator iter;
4327         struct btrfs_key root_key, key;
4328         int ret;
4329
4330         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4331             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4332                 return -EIO;
4333
4334         path = btrfs_alloc_path();
4335         if (!path)
4336                 return -EIO;
4337
4338         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4339                                    0, &roots);
4340         if (ret) {
4341                 btrfs_free_path(path);
4342                 return -EIO;
4343         }
4344
4345         ULIST_ITER_INIT(&iter);
4346         while ((node = ulist_next(roots, &iter))) {
4347                 root_key.objectid = node->val;
4348                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4349                 root_key.offset = (u64)-1;
4350
4351                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4352                 if (IS_ERR(root)) {
4353                         ret = -EIO;
4354                         break;
4355                 }
4356
4357
4358                 trans = btrfs_start_transaction(search_root, 0);
4359                 if (IS_ERR(trans)) {
4360                         ret = PTR_ERR(trans);
4361                         break;
4362                 }
4363
4364                 path->lowest_level = btrfs_header_level(buf);
4365                 path->skip_check_block = 1;
4366                 if (path->lowest_level)
4367                         btrfs_node_key_to_cpu(buf, &key, 0);
4368                 else
4369                         btrfs_item_key_to_cpu(buf, &key, 0);
4370                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4371                 if (ret) {
4372                         ret = -EIO;
4373                         btrfs_commit_transaction(trans, search_root);
4374                         break;
4375                 }
4376                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4377                         ret = fix_key_order(trans, search_root, path);
4378                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4379                         ret = fix_item_offset(trans, search_root, path);
4380                 if (ret) {
4381                         btrfs_commit_transaction(trans, search_root);
4382                         break;
4383                 }
4384                 btrfs_release_path(path);
4385                 btrfs_commit_transaction(trans, search_root);
4386         }
4387         ulist_free(roots);
4388         btrfs_free_path(path);
4389         return ret;
4390 }
4391
4392 static int check_block(struct btrfs_root *root,
4393                        struct cache_tree *extent_cache,
4394                        struct extent_buffer *buf, u64 flags)
4395 {
4396         struct extent_record *rec;
4397         struct cache_extent *cache;
4398         struct btrfs_key key;
4399         enum btrfs_tree_block_status status;
4400         int ret = 0;
4401         int level;
4402
4403         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4404         if (!cache)
4405                 return 1;
4406         rec = container_of(cache, struct extent_record, cache);
4407         rec->generation = btrfs_header_generation(buf);
4408
4409         level = btrfs_header_level(buf);
4410         if (btrfs_header_nritems(buf) > 0) {
4411
4412                 if (level == 0)
4413                         btrfs_item_key_to_cpu(buf, &key, 0);
4414                 else
4415                         btrfs_node_key_to_cpu(buf, &key, 0);
4416
4417                 rec->info_objectid = key.objectid;
4418         }
4419         rec->info_level = level;
4420
4421         if (btrfs_is_leaf(buf))
4422                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4423         else
4424                 status = btrfs_check_node(root, &rec->parent_key, buf);
4425
4426         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4427                 if (repair)
4428                         status = try_to_fix_bad_block(root, buf, status);
4429                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4430                         ret = -EIO;
4431                         fprintf(stderr, "bad block %llu\n",
4432                                 (unsigned long long)buf->start);
4433                 } else {
4434                         /*
4435                          * Signal to callers we need to start the scan over
4436                          * again since we'll have cowed blocks.
4437                          */
4438                         ret = -EAGAIN;
4439                 }
4440         } else {
4441                 rec->content_checked = 1;
4442                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4443                         rec->owner_ref_checked = 1;
4444                 else {
4445                         ret = check_owner_ref(root, rec, buf);
4446                         if (!ret)
4447                                 rec->owner_ref_checked = 1;
4448                 }
4449         }
4450         if (!ret)
4451                 maybe_free_extent_rec(extent_cache, rec);
4452         return ret;
4453 }
4454
4455 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4456                                                 u64 parent, u64 root)
4457 {
4458         struct list_head *cur = rec->backrefs.next;
4459         struct extent_backref *node;
4460         struct tree_backref *back;
4461
4462         while(cur != &rec->backrefs) {
4463                 node = to_extent_backref(cur);
4464                 cur = cur->next;
4465                 if (node->is_data)
4466                         continue;
4467                 back = to_tree_backref(node);
4468                 if (parent > 0) {
4469                         if (!node->full_backref)
4470                                 continue;
4471                         if (parent == back->parent)
4472                                 return back;
4473                 } else {
4474                         if (node->full_backref)
4475                                 continue;
4476                         if (back->root == root)
4477                                 return back;
4478                 }
4479         }
4480         return NULL;
4481 }
4482
4483 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4484                                                 u64 parent, u64 root)
4485 {
4486         struct tree_backref *ref = malloc(sizeof(*ref));
4487
4488         if (!ref)
4489                 return NULL;
4490         memset(&ref->node, 0, sizeof(ref->node));
4491         if (parent > 0) {
4492                 ref->parent = parent;
4493                 ref->node.full_backref = 1;
4494         } else {
4495                 ref->root = root;
4496                 ref->node.full_backref = 0;
4497         }
4498         list_add_tail(&ref->node.list, &rec->backrefs);
4499
4500         return ref;
4501 }
4502
4503 static struct data_backref *find_data_backref(struct extent_record *rec,
4504                                                 u64 parent, u64 root,
4505                                                 u64 owner, u64 offset,
4506                                                 int found_ref,
4507                                                 u64 disk_bytenr, u64 bytes)
4508 {
4509         struct list_head *cur = rec->backrefs.next;
4510         struct extent_backref *node;
4511         struct data_backref *back;
4512
4513         while(cur != &rec->backrefs) {
4514                 node = to_extent_backref(cur);
4515                 cur = cur->next;
4516                 if (!node->is_data)
4517                         continue;
4518                 back = to_data_backref(node);
4519                 if (parent > 0) {
4520                         if (!node->full_backref)
4521                                 continue;
4522                         if (parent == back->parent)
4523                                 return back;
4524                 } else {
4525                         if (node->full_backref)
4526                                 continue;
4527                         if (back->root == root && back->owner == owner &&
4528                             back->offset == offset) {
4529                                 if (found_ref && node->found_ref &&
4530                                     (back->bytes != bytes ||
4531                                     back->disk_bytenr != disk_bytenr))
4532                                         continue;
4533                                 return back;
4534                         }
4535                 }
4536         }
4537         return NULL;
4538 }
4539
4540 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4541                                                 u64 parent, u64 root,
4542                                                 u64 owner, u64 offset,
4543                                                 u64 max_size)
4544 {
4545         struct data_backref *ref = malloc(sizeof(*ref));
4546
4547         if (!ref)
4548                 return NULL;
4549         memset(&ref->node, 0, sizeof(ref->node));
4550         ref->node.is_data = 1;
4551
4552         if (parent > 0) {
4553                 ref->parent = parent;
4554                 ref->owner = 0;
4555                 ref->offset = 0;
4556                 ref->node.full_backref = 1;
4557         } else {
4558                 ref->root = root;
4559                 ref->owner = owner;
4560                 ref->offset = offset;
4561                 ref->node.full_backref = 0;
4562         }
4563         ref->bytes = max_size;
4564         ref->found_ref = 0;
4565         ref->num_refs = 0;
4566         list_add_tail(&ref->node.list, &rec->backrefs);
4567         if (max_size > rec->max_size)
4568                 rec->max_size = max_size;
4569         return ref;
4570 }
4571
4572 /* Check if the type of extent matches with its chunk */
4573 static void check_extent_type(struct extent_record *rec)
4574 {
4575         struct btrfs_block_group_cache *bg_cache;
4576
4577         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4578         if (!bg_cache)
4579                 return;
4580
4581         /* data extent, check chunk directly*/
4582         if (!rec->metadata) {
4583                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4584                         rec->wrong_chunk_type = 1;
4585                 return;
4586         }
4587
4588         /* metadata extent, check the obvious case first */
4589         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4590                                  BTRFS_BLOCK_GROUP_METADATA))) {
4591                 rec->wrong_chunk_type = 1;
4592                 return;
4593         }
4594
4595         /*
4596          * Check SYSTEM extent, as it's also marked as metadata, we can only
4597          * make sure it's a SYSTEM extent by its backref
4598          */
4599         if (!list_empty(&rec->backrefs)) {
4600                 struct extent_backref *node;
4601                 struct tree_backref *tback;
4602                 u64 bg_type;
4603
4604                 node = to_extent_backref(rec->backrefs.next);
4605                 if (node->is_data) {
4606                         /* tree block shouldn't have data backref */
4607                         rec->wrong_chunk_type = 1;
4608                         return;
4609                 }
4610                 tback = container_of(node, struct tree_backref, node);
4611
4612                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4613                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4614                 else
4615                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4616                 if (!(bg_cache->flags & bg_type))
4617                         rec->wrong_chunk_type = 1;
4618         }
4619 }
4620
4621 /*
4622  * Allocate a new extent record, fill default values from @tmpl and insert int
4623  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4624  * the cache, otherwise it fails.
4625  */
4626 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4627                 struct extent_record *tmpl)
4628 {
4629         struct extent_record *rec;
4630         int ret = 0;
4631
4632         rec = malloc(sizeof(*rec));
4633         if (!rec)
4634                 return -ENOMEM;
4635         rec->start = tmpl->start;
4636         rec->max_size = tmpl->max_size;
4637         rec->nr = max(tmpl->nr, tmpl->max_size);
4638         rec->found_rec = tmpl->found_rec;
4639         rec->content_checked = tmpl->content_checked;
4640         rec->owner_ref_checked = tmpl->owner_ref_checked;
4641         rec->num_duplicates = 0;
4642         rec->metadata = tmpl->metadata;
4643         rec->flag_block_full_backref = FLAG_UNSET;
4644         rec->bad_full_backref = 0;
4645         rec->crossing_stripes = 0;
4646         rec->wrong_chunk_type = 0;
4647         rec->is_root = tmpl->is_root;
4648         rec->refs = tmpl->refs;
4649         rec->extent_item_refs = tmpl->extent_item_refs;
4650         rec->parent_generation = tmpl->parent_generation;
4651         INIT_LIST_HEAD(&rec->backrefs);
4652         INIT_LIST_HEAD(&rec->dups);
4653         INIT_LIST_HEAD(&rec->list);
4654         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4655         rec->cache.start = tmpl->start;
4656         rec->cache.size = tmpl->nr;
4657         ret = insert_cache_extent(extent_cache, &rec->cache);
4658         if (ret) {
4659                 free(rec);
4660                 return ret;
4661         }
4662         bytes_used += rec->nr;
4663
4664         if (tmpl->metadata)
4665                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4666                                 global_info->tree_root->nodesize);
4667         check_extent_type(rec);
4668         return ret;
4669 }
4670
4671 /*
4672  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4673  * some are hints:
4674  * - refs              - if found, increase refs
4675  * - is_root           - if found, set
4676  * - content_checked   - if found, set
4677  * - owner_ref_checked - if found, set
4678  *
4679  * If not found, create a new one, initialize and insert.
4680  */
4681 static int add_extent_rec(struct cache_tree *extent_cache,
4682                 struct extent_record *tmpl)
4683 {
4684         struct extent_record *rec;
4685         struct cache_extent *cache;
4686         int ret = 0;
4687         int dup = 0;
4688
4689         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4690         if (cache) {
4691                 rec = container_of(cache, struct extent_record, cache);
4692                 if (tmpl->refs)
4693                         rec->refs++;
4694                 if (rec->nr == 1)
4695                         rec->nr = max(tmpl->nr, tmpl->max_size);
4696
4697                 /*
4698                  * We need to make sure to reset nr to whatever the extent
4699                  * record says was the real size, this way we can compare it to
4700                  * the backrefs.
4701                  */
4702                 if (tmpl->found_rec) {
4703                         if (tmpl->start != rec->start || rec->found_rec) {
4704                                 struct extent_record *tmp;
4705
4706                                 dup = 1;
4707                                 if (list_empty(&rec->list))
4708                                         list_add_tail(&rec->list,
4709                                                       &duplicate_extents);
4710
4711                                 /*
4712                                  * We have to do this song and dance in case we
4713                                  * find an extent record that falls inside of
4714                                  * our current extent record but does not have
4715                                  * the same objectid.
4716                                  */
4717                                 tmp = malloc(sizeof(*tmp));
4718                                 if (!tmp)
4719                                         return -ENOMEM;
4720                                 tmp->start = tmpl->start;
4721                                 tmp->max_size = tmpl->max_size;
4722                                 tmp->nr = tmpl->nr;
4723                                 tmp->found_rec = 1;
4724                                 tmp->metadata = tmpl->metadata;
4725                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4726                                 INIT_LIST_HEAD(&tmp->list);
4727                                 list_add_tail(&tmp->list, &rec->dups);
4728                                 rec->num_duplicates++;
4729                         } else {
4730                                 rec->nr = tmpl->nr;
4731                                 rec->found_rec = 1;
4732                         }
4733                 }
4734
4735                 if (tmpl->extent_item_refs && !dup) {
4736                         if (rec->extent_item_refs) {
4737                                 fprintf(stderr, "block %llu rec "
4738                                         "extent_item_refs %llu, passed %llu\n",
4739                                         (unsigned long long)tmpl->start,
4740                                         (unsigned long long)
4741                                                         rec->extent_item_refs,
4742                                         (unsigned long long)tmpl->extent_item_refs);
4743                         }
4744                         rec->extent_item_refs = tmpl->extent_item_refs;
4745                 }
4746                 if (tmpl->is_root)
4747                         rec->is_root = 1;
4748                 if (tmpl->content_checked)
4749                         rec->content_checked = 1;
4750                 if (tmpl->owner_ref_checked)
4751                         rec->owner_ref_checked = 1;
4752                 memcpy(&rec->parent_key, &tmpl->parent_key,
4753                                 sizeof(tmpl->parent_key));
4754                 if (tmpl->parent_generation)
4755                         rec->parent_generation = tmpl->parent_generation;
4756                 if (rec->max_size < tmpl->max_size)
4757                         rec->max_size = tmpl->max_size;
4758
4759                 /*
4760                  * A metadata extent can't cross stripe_len boundary, otherwise
4761                  * kernel scrub won't be able to handle it.
4762                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4763                  * it.
4764                  */
4765                 if (tmpl->metadata)
4766                         rec->crossing_stripes = check_crossing_stripes(
4767                                 rec->start, global_info->tree_root->nodesize);
4768                 check_extent_type(rec);
4769                 maybe_free_extent_rec(extent_cache, rec);
4770                 return ret;
4771         }
4772
4773         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4774
4775         return ret;
4776 }
4777
4778 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4779                             u64 parent, u64 root, int found_ref)
4780 {
4781         struct extent_record *rec;
4782         struct tree_backref *back;
4783         struct cache_extent *cache;
4784         int ret;
4785
4786         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4787         if (!cache) {
4788                 struct extent_record tmpl;
4789
4790                 memset(&tmpl, 0, sizeof(tmpl));
4791                 tmpl.start = bytenr;
4792                 tmpl.nr = 1;
4793                 tmpl.metadata = 1;
4794
4795                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4796                 if (ret)
4797                         return ret;
4798
4799                 /* really a bug in cache_extent implement now */
4800                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4801                 if (!cache)
4802                         return -ENOENT;
4803         }
4804
4805         rec = container_of(cache, struct extent_record, cache);
4806         if (rec->start != bytenr) {
4807                 /*
4808                  * Several cause, from unaligned bytenr to over lapping extents
4809                  */
4810                 return -EEXIST;
4811         }
4812
4813         back = find_tree_backref(rec, parent, root);
4814         if (!back) {
4815                 back = alloc_tree_backref(rec, parent, root);
4816                 if (!back)
4817                         return -ENOMEM;
4818         }
4819
4820         if (found_ref) {
4821                 if (back->node.found_ref) {
4822                         fprintf(stderr, "Extent back ref already exists "
4823                                 "for %llu parent %llu root %llu \n",
4824                                 (unsigned long long)bytenr,
4825                                 (unsigned long long)parent,
4826                                 (unsigned long long)root);
4827                 }
4828                 back->node.found_ref = 1;
4829         } else {
4830                 if (back->node.found_extent_tree) {
4831                         fprintf(stderr, "Extent back ref already exists "
4832                                 "for %llu parent %llu root %llu \n",
4833                                 (unsigned long long)bytenr,
4834                                 (unsigned long long)parent,
4835                                 (unsigned long long)root);
4836                 }
4837                 back->node.found_extent_tree = 1;
4838         }
4839         check_extent_type(rec);
4840         maybe_free_extent_rec(extent_cache, rec);
4841         return 0;
4842 }
4843
4844 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4845                             u64 parent, u64 root, u64 owner, u64 offset,
4846                             u32 num_refs, int found_ref, u64 max_size)
4847 {
4848         struct extent_record *rec;
4849         struct data_backref *back;
4850         struct cache_extent *cache;
4851         int ret;
4852
4853         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4854         if (!cache) {
4855                 struct extent_record tmpl;
4856
4857                 memset(&tmpl, 0, sizeof(tmpl));
4858                 tmpl.start = bytenr;
4859                 tmpl.nr = 1;
4860                 tmpl.max_size = max_size;
4861
4862                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4863                 if (ret)
4864                         return ret;
4865
4866                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4867                 if (!cache)
4868                         abort();
4869         }
4870
4871         rec = container_of(cache, struct extent_record, cache);
4872         if (rec->max_size < max_size)
4873                 rec->max_size = max_size;
4874
4875         /*
4876          * If found_ref is set then max_size is the real size and must match the
4877          * existing refs.  So if we have already found a ref then we need to
4878          * make sure that this ref matches the existing one, otherwise we need
4879          * to add a new backref so we can notice that the backrefs don't match
4880          * and we need to figure out who is telling the truth.  This is to
4881          * account for that awful fsync bug I introduced where we'd end up with
4882          * a btrfs_file_extent_item that would have its length include multiple
4883          * prealloc extents or point inside of a prealloc extent.
4884          */
4885         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4886                                  bytenr, max_size);
4887         if (!back) {
4888                 back = alloc_data_backref(rec, parent, root, owner, offset,
4889                                           max_size);
4890                 BUG_ON(!back);
4891         }
4892
4893         if (found_ref) {
4894                 BUG_ON(num_refs != 1);
4895                 if (back->node.found_ref)
4896                         BUG_ON(back->bytes != max_size);
4897                 back->node.found_ref = 1;
4898                 back->found_ref += 1;
4899                 back->bytes = max_size;
4900                 back->disk_bytenr = bytenr;
4901                 rec->refs += 1;
4902                 rec->content_checked = 1;
4903                 rec->owner_ref_checked = 1;
4904         } else {
4905                 if (back->node.found_extent_tree) {
4906                         fprintf(stderr, "Extent back ref already exists "
4907                                 "for %llu parent %llu root %llu "
4908                                 "owner %llu offset %llu num_refs %lu\n",
4909                                 (unsigned long long)bytenr,
4910                                 (unsigned long long)parent,
4911                                 (unsigned long long)root,
4912                                 (unsigned long long)owner,
4913                                 (unsigned long long)offset,
4914                                 (unsigned long)num_refs);
4915                 }
4916                 back->num_refs = num_refs;
4917                 back->node.found_extent_tree = 1;
4918         }
4919         maybe_free_extent_rec(extent_cache, rec);
4920         return 0;
4921 }
4922
4923 static int add_pending(struct cache_tree *pending,
4924                        struct cache_tree *seen, u64 bytenr, u32 size)
4925 {
4926         int ret;
4927         ret = add_cache_extent(seen, bytenr, size);
4928         if (ret)
4929                 return ret;
4930         add_cache_extent(pending, bytenr, size);
4931         return 0;
4932 }
4933
4934 static int pick_next_pending(struct cache_tree *pending,
4935                         struct cache_tree *reada,
4936                         struct cache_tree *nodes,
4937                         u64 last, struct block_info *bits, int bits_nr,
4938                         int *reada_bits)
4939 {
4940         unsigned long node_start = last;
4941         struct cache_extent *cache;
4942         int ret;
4943
4944         cache = search_cache_extent(reada, 0);
4945         if (cache) {
4946                 bits[0].start = cache->start;
4947                 bits[0].size = cache->size;
4948                 *reada_bits = 1;
4949                 return 1;
4950         }
4951         *reada_bits = 0;
4952         if (node_start > 32768)
4953                 node_start -= 32768;
4954
4955         cache = search_cache_extent(nodes, node_start);
4956         if (!cache)
4957                 cache = search_cache_extent(nodes, 0);
4958
4959         if (!cache) {
4960                  cache = search_cache_extent(pending, 0);
4961                  if (!cache)
4962                          return 0;
4963                  ret = 0;
4964                  do {
4965                          bits[ret].start = cache->start;
4966                          bits[ret].size = cache->size;
4967                          cache = next_cache_extent(cache);
4968                          ret++;
4969                  } while (cache && ret < bits_nr);
4970                  return ret;
4971         }
4972
4973         ret = 0;
4974         do {
4975                 bits[ret].start = cache->start;
4976                 bits[ret].size = cache->size;
4977                 cache = next_cache_extent(cache);
4978                 ret++;
4979         } while (cache && ret < bits_nr);
4980
4981         if (bits_nr - ret > 8) {
4982                 u64 lookup = bits[0].start + bits[0].size;
4983                 struct cache_extent *next;
4984                 next = search_cache_extent(pending, lookup);
4985                 while(next) {
4986                         if (next->start - lookup > 32768)
4987                                 break;
4988                         bits[ret].start = next->start;
4989                         bits[ret].size = next->size;
4990                         lookup = next->start + next->size;
4991                         ret++;
4992                         if (ret == bits_nr)
4993                                 break;
4994                         next = next_cache_extent(next);
4995                         if (!next)
4996                                 break;
4997                 }
4998         }
4999         return ret;
5000 }
5001
5002 static void free_chunk_record(struct cache_extent *cache)
5003 {
5004         struct chunk_record *rec;
5005
5006         rec = container_of(cache, struct chunk_record, cache);
5007         list_del_init(&rec->list);
5008         list_del_init(&rec->dextents);
5009         free(rec);
5010 }
5011
5012 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5013 {
5014         cache_tree_free_extents(chunk_cache, free_chunk_record);
5015 }
5016
5017 static void free_device_record(struct rb_node *node)
5018 {
5019         struct device_record *rec;
5020
5021         rec = container_of(node, struct device_record, node);
5022         free(rec);
5023 }
5024
5025 FREE_RB_BASED_TREE(device_cache, free_device_record);
5026
5027 int insert_block_group_record(struct block_group_tree *tree,
5028                               struct block_group_record *bg_rec)
5029 {
5030         int ret;
5031
5032         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5033         if (ret)
5034                 return ret;
5035
5036         list_add_tail(&bg_rec->list, &tree->block_groups);
5037         return 0;
5038 }
5039
5040 static void free_block_group_record(struct cache_extent *cache)
5041 {
5042         struct block_group_record *rec;
5043
5044         rec = container_of(cache, struct block_group_record, cache);
5045         list_del_init(&rec->list);
5046         free(rec);
5047 }
5048
5049 void free_block_group_tree(struct block_group_tree *tree)
5050 {
5051         cache_tree_free_extents(&tree->tree, free_block_group_record);
5052 }
5053
5054 int insert_device_extent_record(struct device_extent_tree *tree,
5055                                 struct device_extent_record *de_rec)
5056 {
5057         int ret;
5058
5059         /*
5060          * Device extent is a bit different from the other extents, because
5061          * the extents which belong to the different devices may have the
5062          * same start and size, so we need use the special extent cache
5063          * search/insert functions.
5064          */
5065         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5066         if (ret)
5067                 return ret;
5068
5069         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5070         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5071         return 0;
5072 }
5073
5074 static void free_device_extent_record(struct cache_extent *cache)
5075 {
5076         struct device_extent_record *rec;
5077
5078         rec = container_of(cache, struct device_extent_record, cache);
5079         if (!list_empty(&rec->chunk_list))
5080                 list_del_init(&rec->chunk_list);
5081         if (!list_empty(&rec->device_list))
5082                 list_del_init(&rec->device_list);
5083         free(rec);
5084 }
5085
5086 void free_device_extent_tree(struct device_extent_tree *tree)
5087 {
5088         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5089 }
5090
5091 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5092 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5093                                  struct extent_buffer *leaf, int slot)
5094 {
5095         struct btrfs_extent_ref_v0 *ref0;
5096         struct btrfs_key key;
5097         int ret;
5098
5099         btrfs_item_key_to_cpu(leaf, &key, slot);
5100         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5101         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5102                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5103                                 0, 0);
5104         } else {
5105                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5106                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5107         }
5108         return ret;
5109 }
5110 #endif
5111
5112 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5113                                             struct btrfs_key *key,
5114                                             int slot)
5115 {
5116         struct btrfs_chunk *ptr;
5117         struct chunk_record *rec;
5118         int num_stripes, i;
5119
5120         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5121         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5122
5123         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5124         if (!rec) {
5125                 fprintf(stderr, "memory allocation failed\n");
5126                 exit(-1);
5127         }
5128
5129         INIT_LIST_HEAD(&rec->list);
5130         INIT_LIST_HEAD(&rec->dextents);
5131         rec->bg_rec = NULL;
5132
5133         rec->cache.start = key->offset;
5134         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5135
5136         rec->generation = btrfs_header_generation(leaf);
5137
5138         rec->objectid = key->objectid;
5139         rec->type = key->type;
5140         rec->offset = key->offset;
5141
5142         rec->length = rec->cache.size;
5143         rec->owner = btrfs_chunk_owner(leaf, ptr);
5144         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5145         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5146         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5147         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5148         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5149         rec->num_stripes = num_stripes;
5150         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5151
5152         for (i = 0; i < rec->num_stripes; ++i) {
5153                 rec->stripes[i].devid =
5154                         btrfs_stripe_devid_nr(leaf, ptr, i);
5155                 rec->stripes[i].offset =
5156                         btrfs_stripe_offset_nr(leaf, ptr, i);
5157                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5158                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5159                                 BTRFS_UUID_SIZE);
5160         }
5161
5162         return rec;
5163 }
5164
5165 static int process_chunk_item(struct cache_tree *chunk_cache,
5166                               struct btrfs_key *key, struct extent_buffer *eb,
5167                               int slot)
5168 {
5169         struct chunk_record *rec;
5170         struct btrfs_chunk *chunk;
5171         int ret = 0;
5172
5173         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5174         /*
5175          * Do extra check for this chunk item,
5176          *
5177          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5178          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5179          * and owner<->key_type check.
5180          */
5181         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5182                                       key->offset);
5183         if (ret < 0) {
5184                 error("chunk(%llu, %llu) is not valid, ignore it",
5185                       key->offset, btrfs_chunk_length(eb, chunk));
5186                 return 0;
5187         }
5188         rec = btrfs_new_chunk_record(eb, key, slot);
5189         ret = insert_cache_extent(chunk_cache, &rec->cache);
5190         if (ret) {
5191                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5192                         rec->offset, rec->length);
5193                 free(rec);
5194         }
5195
5196         return ret;
5197 }
5198
5199 static int process_device_item(struct rb_root *dev_cache,
5200                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5201 {
5202         struct btrfs_dev_item *ptr;
5203         struct device_record *rec;
5204         int ret = 0;
5205
5206         ptr = btrfs_item_ptr(eb,
5207                 slot, struct btrfs_dev_item);
5208
5209         rec = malloc(sizeof(*rec));
5210         if (!rec) {
5211                 fprintf(stderr, "memory allocation failed\n");
5212                 return -ENOMEM;
5213         }
5214
5215         rec->devid = key->offset;
5216         rec->generation = btrfs_header_generation(eb);
5217
5218         rec->objectid = key->objectid;
5219         rec->type = key->type;
5220         rec->offset = key->offset;
5221
5222         rec->devid = btrfs_device_id(eb, ptr);
5223         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5224         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5225
5226         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5227         if (ret) {
5228                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5229                 free(rec);
5230         }
5231
5232         return ret;
5233 }
5234
5235 struct block_group_record *
5236 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5237                              int slot)
5238 {
5239         struct btrfs_block_group_item *ptr;
5240         struct block_group_record *rec;
5241
5242         rec = calloc(1, sizeof(*rec));
5243         if (!rec) {
5244                 fprintf(stderr, "memory allocation failed\n");
5245                 exit(-1);
5246         }
5247
5248         rec->cache.start = key->objectid;
5249         rec->cache.size = key->offset;
5250
5251         rec->generation = btrfs_header_generation(leaf);
5252
5253         rec->objectid = key->objectid;
5254         rec->type = key->type;
5255         rec->offset = key->offset;
5256
5257         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5258         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5259
5260         INIT_LIST_HEAD(&rec->list);
5261
5262         return rec;
5263 }
5264
5265 static int process_block_group_item(struct block_group_tree *block_group_cache,
5266                                     struct btrfs_key *key,
5267                                     struct extent_buffer *eb, int slot)
5268 {
5269         struct block_group_record *rec;
5270         int ret = 0;
5271
5272         rec = btrfs_new_block_group_record(eb, key, slot);
5273         ret = insert_block_group_record(block_group_cache, rec);
5274         if (ret) {
5275                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5276                         rec->objectid, rec->offset);
5277                 free(rec);
5278         }
5279
5280         return ret;
5281 }
5282
5283 struct device_extent_record *
5284 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5285                                struct btrfs_key *key, int slot)
5286 {
5287         struct device_extent_record *rec;
5288         struct btrfs_dev_extent *ptr;
5289
5290         rec = calloc(1, sizeof(*rec));
5291         if (!rec) {
5292                 fprintf(stderr, "memory allocation failed\n");
5293                 exit(-1);
5294         }
5295
5296         rec->cache.objectid = key->objectid;
5297         rec->cache.start = key->offset;
5298
5299         rec->generation = btrfs_header_generation(leaf);
5300
5301         rec->objectid = key->objectid;
5302         rec->type = key->type;
5303         rec->offset = key->offset;
5304
5305         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5306         rec->chunk_objecteid =
5307                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5308         rec->chunk_offset =
5309                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5310         rec->length = btrfs_dev_extent_length(leaf, ptr);
5311         rec->cache.size = rec->length;
5312
5313         INIT_LIST_HEAD(&rec->chunk_list);
5314         INIT_LIST_HEAD(&rec->device_list);
5315
5316         return rec;
5317 }
5318
5319 static int
5320 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5321                            struct btrfs_key *key, struct extent_buffer *eb,
5322                            int slot)
5323 {
5324         struct device_extent_record *rec;
5325         int ret;
5326
5327         rec = btrfs_new_device_extent_record(eb, key, slot);
5328         ret = insert_device_extent_record(dev_extent_cache, rec);
5329         if (ret) {
5330                 fprintf(stderr,
5331                         "Device extent[%llu, %llu, %llu] existed.\n",
5332                         rec->objectid, rec->offset, rec->length);
5333                 free(rec);
5334         }
5335
5336         return ret;
5337 }
5338
5339 static int process_extent_item(struct btrfs_root *root,
5340                                struct cache_tree *extent_cache,
5341                                struct extent_buffer *eb, int slot)
5342 {
5343         struct btrfs_extent_item *ei;
5344         struct btrfs_extent_inline_ref *iref;
5345         struct btrfs_extent_data_ref *dref;
5346         struct btrfs_shared_data_ref *sref;
5347         struct btrfs_key key;
5348         struct extent_record tmpl;
5349         unsigned long end;
5350         unsigned long ptr;
5351         int ret;
5352         int type;
5353         u32 item_size = btrfs_item_size_nr(eb, slot);
5354         u64 refs = 0;
5355         u64 offset;
5356         u64 num_bytes;
5357         int metadata = 0;
5358
5359         btrfs_item_key_to_cpu(eb, &key, slot);
5360
5361         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5362                 metadata = 1;
5363                 num_bytes = root->nodesize;
5364         } else {
5365                 num_bytes = key.offset;
5366         }
5367
5368         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5369                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5370                       key.objectid, root->sectorsize);
5371                 return -EIO;
5372         }
5373         if (item_size < sizeof(*ei)) {
5374 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5375                 struct btrfs_extent_item_v0 *ei0;
5376                 BUG_ON(item_size != sizeof(*ei0));
5377                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5378                 refs = btrfs_extent_refs_v0(eb, ei0);
5379 #else
5380                 BUG();
5381 #endif
5382                 memset(&tmpl, 0, sizeof(tmpl));
5383                 tmpl.start = key.objectid;
5384                 tmpl.nr = num_bytes;
5385                 tmpl.extent_item_refs = refs;
5386                 tmpl.metadata = metadata;
5387                 tmpl.found_rec = 1;
5388                 tmpl.max_size = num_bytes;
5389
5390                 return add_extent_rec(extent_cache, &tmpl);
5391         }
5392
5393         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5394         refs = btrfs_extent_refs(eb, ei);
5395         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5396                 metadata = 1;
5397         else
5398                 metadata = 0;
5399         if (metadata && num_bytes != root->nodesize) {
5400                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5401                       num_bytes, root->nodesize);
5402                 return -EIO;
5403         }
5404         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5405                 error("ignore invalid data extent, length %llu is not aligned to %u",
5406                       num_bytes, root->sectorsize);
5407                 return -EIO;
5408         }
5409
5410         memset(&tmpl, 0, sizeof(tmpl));
5411         tmpl.start = key.objectid;
5412         tmpl.nr = num_bytes;
5413         tmpl.extent_item_refs = refs;
5414         tmpl.metadata = metadata;
5415         tmpl.found_rec = 1;
5416         tmpl.max_size = num_bytes;
5417         add_extent_rec(extent_cache, &tmpl);
5418
5419         ptr = (unsigned long)(ei + 1);
5420         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5421             key.type == BTRFS_EXTENT_ITEM_KEY)
5422                 ptr += sizeof(struct btrfs_tree_block_info);
5423
5424         end = (unsigned long)ei + item_size;
5425         while (ptr < end) {
5426                 iref = (struct btrfs_extent_inline_ref *)ptr;
5427                 type = btrfs_extent_inline_ref_type(eb, iref);
5428                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5429                 switch (type) {
5430                 case BTRFS_TREE_BLOCK_REF_KEY:
5431                         ret = add_tree_backref(extent_cache, key.objectid,
5432                                         0, offset, 0);
5433                         if (ret < 0)
5434                                 error("add_tree_backref failed: %s",
5435                                       strerror(-ret));
5436                         break;
5437                 case BTRFS_SHARED_BLOCK_REF_KEY:
5438                         ret = add_tree_backref(extent_cache, key.objectid,
5439                                         offset, 0, 0);
5440                         if (ret < 0)
5441                                 error("add_tree_backref failed: %s",
5442                                       strerror(-ret));
5443                         break;
5444                 case BTRFS_EXTENT_DATA_REF_KEY:
5445                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5446                         add_data_backref(extent_cache, key.objectid, 0,
5447                                         btrfs_extent_data_ref_root(eb, dref),
5448                                         btrfs_extent_data_ref_objectid(eb,
5449                                                                        dref),
5450                                         btrfs_extent_data_ref_offset(eb, dref),
5451                                         btrfs_extent_data_ref_count(eb, dref),
5452                                         0, num_bytes);
5453                         break;
5454                 case BTRFS_SHARED_DATA_REF_KEY:
5455                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5456                         add_data_backref(extent_cache, key.objectid, offset,
5457                                         0, 0, 0,
5458                                         btrfs_shared_data_ref_count(eb, sref),
5459                                         0, num_bytes);
5460                         break;
5461                 default:
5462                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5463                                 key.objectid, key.type, num_bytes);
5464                         goto out;
5465                 }
5466                 ptr += btrfs_extent_inline_ref_size(type);
5467         }
5468         WARN_ON(ptr > end);
5469 out:
5470         return 0;
5471 }
5472
5473 static int check_cache_range(struct btrfs_root *root,
5474                              struct btrfs_block_group_cache *cache,
5475                              u64 offset, u64 bytes)
5476 {
5477         struct btrfs_free_space *entry;
5478         u64 *logical;
5479         u64 bytenr;
5480         int stripe_len;
5481         int i, nr, ret;
5482
5483         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5484                 bytenr = btrfs_sb_offset(i);
5485                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5486                                        cache->key.objectid, bytenr, 0,
5487                                        &logical, &nr, &stripe_len);
5488                 if (ret)
5489                         return ret;
5490
5491                 while (nr--) {
5492                         if (logical[nr] + stripe_len <= offset)
5493                                 continue;
5494                         if (offset + bytes <= logical[nr])
5495                                 continue;
5496                         if (logical[nr] == offset) {
5497                                 if (stripe_len >= bytes) {
5498                                         free(logical);
5499                                         return 0;
5500                                 }
5501                                 bytes -= stripe_len;
5502                                 offset += stripe_len;
5503                         } else if (logical[nr] < offset) {
5504                                 if (logical[nr] + stripe_len >=
5505                                     offset + bytes) {
5506                                         free(logical);
5507                                         return 0;
5508                                 }
5509                                 bytes = (offset + bytes) -
5510                                         (logical[nr] + stripe_len);
5511                                 offset = logical[nr] + stripe_len;
5512                         } else {
5513                                 /*
5514                                  * Could be tricky, the super may land in the
5515                                  * middle of the area we're checking.  First
5516                                  * check the easiest case, it's at the end.
5517                                  */
5518                                 if (logical[nr] + stripe_len >=
5519                                     bytes + offset) {
5520                                         bytes = logical[nr] - offset;
5521                                         continue;
5522                                 }
5523
5524                                 /* Check the left side */
5525                                 ret = check_cache_range(root, cache,
5526                                                         offset,
5527                                                         logical[nr] - offset);
5528                                 if (ret) {
5529                                         free(logical);
5530                                         return ret;
5531                                 }
5532
5533                                 /* Now we continue with the right side */
5534                                 bytes = (offset + bytes) -
5535                                         (logical[nr] + stripe_len);
5536                                 offset = logical[nr] + stripe_len;
5537                         }
5538                 }
5539
5540                 free(logical);
5541         }
5542
5543         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5544         if (!entry) {
5545                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5546                         offset, offset+bytes);
5547                 return -EINVAL;
5548         }
5549
5550         if (entry->offset != offset) {
5551                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5552                         entry->offset);
5553                 return -EINVAL;
5554         }
5555
5556         if (entry->bytes != bytes) {
5557                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5558                         bytes, entry->bytes, offset);
5559                 return -EINVAL;
5560         }
5561
5562         unlink_free_space(cache->free_space_ctl, entry);
5563         free(entry);
5564         return 0;
5565 }
5566
5567 static int verify_space_cache(struct btrfs_root *root,
5568                               struct btrfs_block_group_cache *cache)
5569 {
5570         struct btrfs_path *path;
5571         struct extent_buffer *leaf;
5572         struct btrfs_key key;
5573         u64 last;
5574         int ret = 0;
5575
5576         path = btrfs_alloc_path();
5577         if (!path)
5578                 return -ENOMEM;
5579
5580         root = root->fs_info->extent_root;
5581
5582         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5583
5584         key.objectid = last;
5585         key.offset = 0;
5586         key.type = BTRFS_EXTENT_ITEM_KEY;
5587
5588         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5589         if (ret < 0)
5590                 goto out;
5591         ret = 0;
5592         while (1) {
5593                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5594                         ret = btrfs_next_leaf(root, path);
5595                         if (ret < 0)
5596                                 goto out;
5597                         if (ret > 0) {
5598                                 ret = 0;
5599                                 break;
5600                         }
5601                 }
5602                 leaf = path->nodes[0];
5603                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5604                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5605                         break;
5606                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5607                     key.type != BTRFS_METADATA_ITEM_KEY) {
5608                         path->slots[0]++;
5609                         continue;
5610                 }
5611
5612                 if (last == key.objectid) {
5613                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5614                                 last = key.objectid + key.offset;
5615                         else
5616                                 last = key.objectid + root->nodesize;
5617                         path->slots[0]++;
5618                         continue;
5619                 }
5620
5621                 ret = check_cache_range(root, cache, last,
5622                                         key.objectid - last);
5623                 if (ret)
5624                         break;
5625                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5626                         last = key.objectid + key.offset;
5627                 else
5628                         last = key.objectid + root->nodesize;
5629                 path->slots[0]++;
5630         }
5631
5632         if (last < cache->key.objectid + cache->key.offset)
5633                 ret = check_cache_range(root, cache, last,
5634                                         cache->key.objectid +
5635                                         cache->key.offset - last);
5636
5637 out:
5638         btrfs_free_path(path);
5639
5640         if (!ret &&
5641             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5642                 fprintf(stderr, "There are still entries left in the space "
5643                         "cache\n");
5644                 ret = -EINVAL;
5645         }
5646
5647         return ret;
5648 }
5649
5650 static int check_space_cache(struct btrfs_root *root)
5651 {
5652         struct btrfs_block_group_cache *cache;
5653         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5654         int ret;
5655         int error = 0;
5656
5657         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5658             btrfs_super_generation(root->fs_info->super_copy) !=
5659             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5660                 printf("cache and super generation don't match, space cache "
5661                        "will be invalidated\n");
5662                 return 0;
5663         }
5664
5665         if (ctx.progress_enabled) {
5666                 ctx.tp = TASK_FREE_SPACE;
5667                 task_start(ctx.info);
5668         }
5669
5670         while (1) {
5671                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5672                 if (!cache)
5673                         break;
5674
5675                 start = cache->key.objectid + cache->key.offset;
5676                 if (!cache->free_space_ctl) {
5677                         if (btrfs_init_free_space_ctl(cache,
5678                                                       root->sectorsize)) {
5679                                 ret = -ENOMEM;
5680                                 break;
5681                         }
5682                 } else {
5683                         btrfs_remove_free_space_cache(cache);
5684                 }
5685
5686                 if (btrfs_fs_compat_ro(root->fs_info,
5687                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5688                         ret = exclude_super_stripes(root, cache);
5689                         if (ret) {
5690                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5691                                         strerror(-ret));
5692                                 error++;
5693                                 continue;
5694                         }
5695                         ret = load_free_space_tree(root->fs_info, cache);
5696                         free_excluded_extents(root, cache);
5697                         if (ret < 0) {
5698                                 fprintf(stderr, "could not load free space tree: %s\n",
5699                                         strerror(-ret));
5700                                 error++;
5701                                 continue;
5702                         }
5703                         error += ret;
5704                 } else {
5705                         ret = load_free_space_cache(root->fs_info, cache);
5706                         if (!ret)
5707                                 continue;
5708                 }
5709
5710                 ret = verify_space_cache(root, cache);
5711                 if (ret) {
5712                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5713                                 cache->key.objectid);
5714                         error++;
5715                 }
5716         }
5717
5718         task_stop(ctx.info);
5719
5720         return error ? -EINVAL : 0;
5721 }
5722
5723 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5724                         u64 num_bytes, unsigned long leaf_offset,
5725                         struct extent_buffer *eb) {
5726
5727         u64 offset = 0;
5728         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5729         char *data;
5730         unsigned long csum_offset;
5731         u32 csum;
5732         u32 csum_expected;
5733         u64 read_len;
5734         u64 data_checked = 0;
5735         u64 tmp;
5736         int ret = 0;
5737         int mirror;
5738         int num_copies;
5739
5740         if (num_bytes % root->sectorsize)
5741                 return -EINVAL;
5742
5743         data = malloc(num_bytes);
5744         if (!data)
5745                 return -ENOMEM;
5746
5747         while (offset < num_bytes) {
5748                 mirror = 0;
5749 again:
5750                 read_len = num_bytes - offset;
5751                 /* read as much space once a time */
5752                 ret = read_extent_data(root, data + offset,
5753                                 bytenr + offset, &read_len, mirror);
5754                 if (ret)
5755                         goto out;
5756                 data_checked = 0;
5757                 /* verify every 4k data's checksum */
5758                 while (data_checked < read_len) {
5759                         csum = ~(u32)0;
5760                         tmp = offset + data_checked;
5761
5762                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5763                                                csum, root->sectorsize);
5764                         btrfs_csum_final(csum, (u8 *)&csum);
5765
5766                         csum_offset = leaf_offset +
5767                                  tmp / root->sectorsize * csum_size;
5768                         read_extent_buffer(eb, (char *)&csum_expected,
5769                                            csum_offset, csum_size);
5770                         /* try another mirror */
5771                         if (csum != csum_expected) {
5772                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5773                                                 mirror, bytenr + tmp,
5774                                                 csum, csum_expected);
5775                                 num_copies = btrfs_num_copies(
5776                                                 &root->fs_info->mapping_tree,
5777                                                 bytenr, num_bytes);
5778                                 if (mirror < num_copies - 1) {
5779                                         mirror += 1;
5780                                         goto again;
5781                                 }
5782                         }
5783                         data_checked += root->sectorsize;
5784                 }
5785                 offset += read_len;
5786         }
5787 out:
5788         free(data);
5789         return ret;
5790 }
5791
5792 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5793                                u64 num_bytes)
5794 {
5795         struct btrfs_path *path;
5796         struct extent_buffer *leaf;
5797         struct btrfs_key key;
5798         int ret;
5799
5800         path = btrfs_alloc_path();
5801         if (!path) {
5802                 fprintf(stderr, "Error allocating path\n");
5803                 return -ENOMEM;
5804         }
5805
5806         key.objectid = bytenr;
5807         key.type = BTRFS_EXTENT_ITEM_KEY;
5808         key.offset = (u64)-1;
5809
5810 again:
5811         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5812                                 0, 0);
5813         if (ret < 0) {
5814                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5815                 btrfs_free_path(path);
5816                 return ret;
5817         } else if (ret) {
5818                 if (path->slots[0] > 0) {
5819                         path->slots[0]--;
5820                 } else {
5821                         ret = btrfs_prev_leaf(root, path);
5822                         if (ret < 0) {
5823                                 goto out;
5824                         } else if (ret > 0) {
5825                                 ret = 0;
5826                                 goto out;
5827                         }
5828                 }
5829         }
5830
5831         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5832
5833         /*
5834          * Block group items come before extent items if they have the same
5835          * bytenr, so walk back one more just in case.  Dear future traveller,
5836          * first congrats on mastering time travel.  Now if it's not too much
5837          * trouble could you go back to 2006 and tell Chris to make the
5838          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5839          * EXTENT_ITEM_KEY please?
5840          */
5841         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5842                 if (path->slots[0] > 0) {
5843                         path->slots[0]--;
5844                 } else {
5845                         ret = btrfs_prev_leaf(root, path);
5846                         if (ret < 0) {
5847                                 goto out;
5848                         } else if (ret > 0) {
5849                                 ret = 0;
5850                                 goto out;
5851                         }
5852                 }
5853                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5854         }
5855
5856         while (num_bytes) {
5857                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5858                         ret = btrfs_next_leaf(root, path);
5859                         if (ret < 0) {
5860                                 fprintf(stderr, "Error going to next leaf "
5861                                         "%d\n", ret);
5862                                 btrfs_free_path(path);
5863                                 return ret;
5864                         } else if (ret) {
5865                                 break;
5866                         }
5867                 }
5868                 leaf = path->nodes[0];
5869                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5870                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5871                         path->slots[0]++;
5872                         continue;
5873                 }
5874                 if (key.objectid + key.offset < bytenr) {
5875                         path->slots[0]++;
5876                         continue;
5877                 }
5878                 if (key.objectid > bytenr + num_bytes)
5879                         break;
5880
5881                 if (key.objectid == bytenr) {
5882                         if (key.offset >= num_bytes) {
5883                                 num_bytes = 0;
5884                                 break;
5885                         }
5886                         num_bytes -= key.offset;
5887                         bytenr += key.offset;
5888                 } else if (key.objectid < bytenr) {
5889                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5890                                 num_bytes = 0;
5891                                 break;
5892                         }
5893                         num_bytes = (bytenr + num_bytes) -
5894                                 (key.objectid + key.offset);
5895                         bytenr = key.objectid + key.offset;
5896                 } else {
5897                         if (key.objectid + key.offset < bytenr + num_bytes) {
5898                                 u64 new_start = key.objectid + key.offset;
5899                                 u64 new_bytes = bytenr + num_bytes - new_start;
5900
5901                                 /*
5902                                  * Weird case, the extent is in the middle of
5903                                  * our range, we'll have to search one side
5904                                  * and then the other.  Not sure if this happens
5905                                  * in real life, but no harm in coding it up
5906                                  * anyway just in case.
5907                                  */
5908                                 btrfs_release_path(path);
5909                                 ret = check_extent_exists(root, new_start,
5910                                                           new_bytes);
5911                                 if (ret) {
5912                                         fprintf(stderr, "Right section didn't "
5913                                                 "have a record\n");
5914                                         break;
5915                                 }
5916                                 num_bytes = key.objectid - bytenr;
5917                                 goto again;
5918                         }
5919                         num_bytes = key.objectid - bytenr;
5920                 }
5921                 path->slots[0]++;
5922         }
5923         ret = 0;
5924
5925 out:
5926         if (num_bytes && !ret) {
5927                 fprintf(stderr, "There are no extents for csum range "
5928                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5929                 ret = 1;
5930         }
5931
5932         btrfs_free_path(path);
5933         return ret;
5934 }
5935
5936 static int check_csums(struct btrfs_root *root)
5937 {
5938         struct btrfs_path *path;
5939         struct extent_buffer *leaf;
5940         struct btrfs_key key;
5941         u64 offset = 0, num_bytes = 0;
5942         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5943         int errors = 0;
5944         int ret;
5945         u64 data_len;
5946         unsigned long leaf_offset;
5947
5948         root = root->fs_info->csum_root;
5949         if (!extent_buffer_uptodate(root->node)) {
5950                 fprintf(stderr, "No valid csum tree found\n");
5951                 return -ENOENT;
5952         }
5953
5954         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5955         key.type = BTRFS_EXTENT_CSUM_KEY;
5956         key.offset = 0;
5957
5958         path = btrfs_alloc_path();
5959         if (!path)
5960                 return -ENOMEM;
5961
5962         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5963         if (ret < 0) {
5964                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5965                 btrfs_free_path(path);
5966                 return ret;
5967         }
5968
5969         if (ret > 0 && path->slots[0])
5970                 path->slots[0]--;
5971         ret = 0;
5972
5973         while (1) {
5974                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5975                         ret = btrfs_next_leaf(root, path);
5976                         if (ret < 0) {
5977                                 fprintf(stderr, "Error going to next leaf "
5978                                         "%d\n", ret);
5979                                 break;
5980                         }
5981                         if (ret)
5982                                 break;
5983                 }
5984                 leaf = path->nodes[0];
5985
5986                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5987                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5988                         path->slots[0]++;
5989                         continue;
5990                 }
5991
5992                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5993                               csum_size) * root->sectorsize;
5994                 if (!check_data_csum)
5995                         goto skip_csum_check;
5996                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5997                 ret = check_extent_csums(root, key.offset, data_len,
5998                                          leaf_offset, leaf);
5999                 if (ret)
6000                         break;
6001 skip_csum_check:
6002                 if (!num_bytes) {
6003                         offset = key.offset;
6004                 } else if (key.offset != offset + num_bytes) {
6005                         ret = check_extent_exists(root, offset, num_bytes);
6006                         if (ret) {
6007                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6008                                         "there is no extent record\n",
6009                                         offset, offset+num_bytes);
6010                                 errors++;
6011                         }
6012                         offset = key.offset;
6013                         num_bytes = 0;
6014                 }
6015                 num_bytes += data_len;
6016                 path->slots[0]++;
6017         }
6018
6019         btrfs_free_path(path);
6020         return errors;
6021 }
6022
6023 static int is_dropped_key(struct btrfs_key *key,
6024                           struct btrfs_key *drop_key) {
6025         if (key->objectid < drop_key->objectid)
6026                 return 1;
6027         else if (key->objectid == drop_key->objectid) {
6028                 if (key->type < drop_key->type)
6029                         return 1;
6030                 else if (key->type == drop_key->type) {
6031                         if (key->offset < drop_key->offset)
6032                                 return 1;
6033                 }
6034         }
6035         return 0;
6036 }
6037
6038 /*
6039  * Here are the rules for FULL_BACKREF.
6040  *
6041  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6042  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6043  *      FULL_BACKREF set.
6044  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6045  *    if it happened after the relocation occurred since we'll have dropped the
6046  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6047  *    have no real way to know for sure.
6048  *
6049  * We process the blocks one root at a time, and we start from the lowest root
6050  * objectid and go to the highest.  So we can just lookup the owner backref for
6051  * the record and if we don't find it then we know it doesn't exist and we have
6052  * a FULL BACKREF.
6053  *
6054  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6055  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6056  * be set or not and then we can check later once we've gathered all the refs.
6057  */
6058 static int calc_extent_flag(struct btrfs_root *root,
6059                            struct cache_tree *extent_cache,
6060                            struct extent_buffer *buf,
6061                            struct root_item_record *ri,
6062                            u64 *flags)
6063 {
6064         struct extent_record *rec;
6065         struct cache_extent *cache;
6066         struct tree_backref *tback;
6067         u64 owner = 0;
6068
6069         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6070         /* we have added this extent before */
6071         if (!cache)
6072                 return -ENOENT;
6073
6074         rec = container_of(cache, struct extent_record, cache);
6075
6076         /*
6077          * Except file/reloc tree, we can not have
6078          * FULL BACKREF MODE
6079          */
6080         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6081                 goto normal;
6082         /*
6083          * root node
6084          */
6085         if (buf->start == ri->bytenr)
6086                 goto normal;
6087
6088         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6089                 goto full_backref;
6090
6091         owner = btrfs_header_owner(buf);
6092         if (owner == ri->objectid)
6093                 goto normal;
6094
6095         tback = find_tree_backref(rec, 0, owner);
6096         if (!tback)
6097                 goto full_backref;
6098 normal:
6099         *flags = 0;
6100         if (rec->flag_block_full_backref != FLAG_UNSET &&
6101             rec->flag_block_full_backref != 0)
6102                 rec->bad_full_backref = 1;
6103         return 0;
6104 full_backref:
6105         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6106         if (rec->flag_block_full_backref != FLAG_UNSET &&
6107             rec->flag_block_full_backref != 1)
6108                 rec->bad_full_backref = 1;
6109         return 0;
6110 }
6111
6112 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6113 {
6114         fprintf(stderr, "Invalid key type(");
6115         print_key_type(stderr, 0, key_type);
6116         fprintf(stderr, ") found in root(");
6117         print_objectid(stderr, rootid, 0);
6118         fprintf(stderr, ")\n");
6119 }
6120
6121 /*
6122  * Check if the key is valid with its extent buffer.
6123  *
6124  * This is a early check in case invalid key exists in a extent buffer
6125  * This is not comprehensive yet, but should prevent wrong key/item passed
6126  * further
6127  */
6128 static int check_type_with_root(u64 rootid, u8 key_type)
6129 {
6130         switch (key_type) {
6131         /* Only valid in chunk tree */
6132         case BTRFS_DEV_ITEM_KEY:
6133         case BTRFS_CHUNK_ITEM_KEY:
6134                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6135                         goto err;
6136                 break;
6137         /* valid in csum and log tree */
6138         case BTRFS_CSUM_TREE_OBJECTID:
6139                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6140                       is_fstree(rootid)))
6141                         goto err;
6142                 break;
6143         case BTRFS_EXTENT_ITEM_KEY:
6144         case BTRFS_METADATA_ITEM_KEY:
6145         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6146                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6147                         goto err;
6148                 break;
6149         case BTRFS_ROOT_ITEM_KEY:
6150                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6151                         goto err;
6152                 break;
6153         case BTRFS_DEV_EXTENT_KEY:
6154                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6155                         goto err;
6156                 break;
6157         }
6158         return 0;
6159 err:
6160         report_mismatch_key_root(key_type, rootid);
6161         return -EINVAL;
6162 }
6163
6164 static int run_next_block(struct btrfs_root *root,
6165                           struct block_info *bits,
6166                           int bits_nr,
6167                           u64 *last,
6168                           struct cache_tree *pending,
6169                           struct cache_tree *seen,
6170                           struct cache_tree *reada,
6171                           struct cache_tree *nodes,
6172                           struct cache_tree *extent_cache,
6173                           struct cache_tree *chunk_cache,
6174                           struct rb_root *dev_cache,
6175                           struct block_group_tree *block_group_cache,
6176                           struct device_extent_tree *dev_extent_cache,
6177                           struct root_item_record *ri)
6178 {
6179         struct extent_buffer *buf;
6180         struct extent_record *rec = NULL;
6181         u64 bytenr;
6182         u32 size;
6183         u64 parent;
6184         u64 owner;
6185         u64 flags;
6186         u64 ptr;
6187         u64 gen = 0;
6188         int ret = 0;
6189         int i;
6190         int nritems;
6191         struct btrfs_key key;
6192         struct cache_extent *cache;
6193         int reada_bits;
6194
6195         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6196                                     bits_nr, &reada_bits);
6197         if (nritems == 0)
6198                 return 1;
6199
6200         if (!reada_bits) {
6201                 for(i = 0; i < nritems; i++) {
6202                         ret = add_cache_extent(reada, bits[i].start,
6203                                                bits[i].size);
6204                         if (ret == -EEXIST)
6205                                 continue;
6206
6207                         /* fixme, get the parent transid */
6208                         readahead_tree_block(root, bits[i].start,
6209                                              bits[i].size, 0);
6210                 }
6211         }
6212         *last = bits[0].start;
6213         bytenr = bits[0].start;
6214         size = bits[0].size;
6215
6216         cache = lookup_cache_extent(pending, bytenr, size);
6217         if (cache) {
6218                 remove_cache_extent(pending, cache);
6219                 free(cache);
6220         }
6221         cache = lookup_cache_extent(reada, bytenr, size);
6222         if (cache) {
6223                 remove_cache_extent(reada, cache);
6224                 free(cache);
6225         }
6226         cache = lookup_cache_extent(nodes, bytenr, size);
6227         if (cache) {
6228                 remove_cache_extent(nodes, cache);
6229                 free(cache);
6230         }
6231         cache = lookup_cache_extent(extent_cache, bytenr, size);
6232         if (cache) {
6233                 rec = container_of(cache, struct extent_record, cache);
6234                 gen = rec->parent_generation;
6235         }
6236
6237         /* fixme, get the real parent transid */
6238         buf = read_tree_block(root, bytenr, size, gen);
6239         if (!extent_buffer_uptodate(buf)) {
6240                 record_bad_block_io(root->fs_info,
6241                                     extent_cache, bytenr, size);
6242                 goto out;
6243         }
6244
6245         nritems = btrfs_header_nritems(buf);
6246
6247         flags = 0;
6248         if (!init_extent_tree) {
6249                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6250                                        btrfs_header_level(buf), 1, NULL,
6251                                        &flags);
6252                 if (ret < 0) {
6253                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6254                         if (ret < 0) {
6255                                 fprintf(stderr, "Couldn't calc extent flags\n");
6256                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6257                         }
6258                 }
6259         } else {
6260                 flags = 0;
6261                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6262                 if (ret < 0) {
6263                         fprintf(stderr, "Couldn't calc extent flags\n");
6264                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6265                 }
6266         }
6267
6268         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6269                 if (ri != NULL &&
6270                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6271                     ri->objectid == btrfs_header_owner(buf)) {
6272                         /*
6273                          * Ok we got to this block from it's original owner and
6274                          * we have FULL_BACKREF set.  Relocation can leave
6275                          * converted blocks over so this is altogether possible,
6276                          * however it's not possible if the generation > the
6277                          * last snapshot, so check for this case.
6278                          */
6279                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6280                             btrfs_header_generation(buf) > ri->last_snapshot) {
6281                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6282                                 rec->bad_full_backref = 1;
6283                         }
6284                 }
6285         } else {
6286                 if (ri != NULL &&
6287                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6288                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6289                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6290                         rec->bad_full_backref = 1;
6291                 }
6292         }
6293
6294         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6295                 rec->flag_block_full_backref = 1;
6296                 parent = bytenr;
6297                 owner = 0;
6298         } else {
6299                 rec->flag_block_full_backref = 0;
6300                 parent = 0;
6301                 owner = btrfs_header_owner(buf);
6302         }
6303
6304         ret = check_block(root, extent_cache, buf, flags);
6305         if (ret)
6306                 goto out;
6307
6308         if (btrfs_is_leaf(buf)) {
6309                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6310                 for (i = 0; i < nritems; i++) {
6311                         struct btrfs_file_extent_item *fi;
6312                         btrfs_item_key_to_cpu(buf, &key, i);
6313                         /*
6314                          * Check key type against the leaf owner.
6315                          * Could filter quite a lot of early error if
6316                          * owner is correct
6317                          */
6318                         if (check_type_with_root(btrfs_header_owner(buf),
6319                                                  key.type)) {
6320                                 fprintf(stderr, "ignoring invalid key\n");
6321                                 continue;
6322                         }
6323                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6324                                 process_extent_item(root, extent_cache, buf,
6325                                                     i);
6326                                 continue;
6327                         }
6328                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6329                                 process_extent_item(root, extent_cache, buf,
6330                                                     i);
6331                                 continue;
6332                         }
6333                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6334                                 total_csum_bytes +=
6335                                         btrfs_item_size_nr(buf, i);
6336                                 continue;
6337                         }
6338                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6339                                 process_chunk_item(chunk_cache, &key, buf, i);
6340                                 continue;
6341                         }
6342                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6343                                 process_device_item(dev_cache, &key, buf, i);
6344                                 continue;
6345                         }
6346                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6347                                 process_block_group_item(block_group_cache,
6348                                         &key, buf, i);
6349                                 continue;
6350                         }
6351                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6352                                 process_device_extent_item(dev_extent_cache,
6353                                         &key, buf, i);
6354                                 continue;
6355
6356                         }
6357                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6358 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6359                                 process_extent_ref_v0(extent_cache, buf, i);
6360 #else
6361                                 BUG();
6362 #endif
6363                                 continue;
6364                         }
6365
6366                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6367                                 ret = add_tree_backref(extent_cache,
6368                                                 key.objectid, 0, key.offset, 0);
6369                                 if (ret < 0)
6370                                         error("add_tree_backref failed: %s",
6371                                               strerror(-ret));
6372                                 continue;
6373                         }
6374                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6375                                 ret = add_tree_backref(extent_cache,
6376                                                 key.objectid, key.offset, 0, 0);
6377                                 if (ret < 0)
6378                                         error("add_tree_backref failed: %s",
6379                                               strerror(-ret));
6380                                 continue;
6381                         }
6382                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6383                                 struct btrfs_extent_data_ref *ref;
6384                                 ref = btrfs_item_ptr(buf, i,
6385                                                 struct btrfs_extent_data_ref);
6386                                 add_data_backref(extent_cache,
6387                                         key.objectid, 0,
6388                                         btrfs_extent_data_ref_root(buf, ref),
6389                                         btrfs_extent_data_ref_objectid(buf,
6390                                                                        ref),
6391                                         btrfs_extent_data_ref_offset(buf, ref),
6392                                         btrfs_extent_data_ref_count(buf, ref),
6393                                         0, root->sectorsize);
6394                                 continue;
6395                         }
6396                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6397                                 struct btrfs_shared_data_ref *ref;
6398                                 ref = btrfs_item_ptr(buf, i,
6399                                                 struct btrfs_shared_data_ref);
6400                                 add_data_backref(extent_cache,
6401                                         key.objectid, key.offset, 0, 0, 0,
6402                                         btrfs_shared_data_ref_count(buf, ref),
6403                                         0, root->sectorsize);
6404                                 continue;
6405                         }
6406                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6407                                 struct bad_item *bad;
6408
6409                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6410                                         continue;
6411                                 if (!owner)
6412                                         continue;
6413                                 bad = malloc(sizeof(struct bad_item));
6414                                 if (!bad)
6415                                         continue;
6416                                 INIT_LIST_HEAD(&bad->list);
6417                                 memcpy(&bad->key, &key,
6418                                        sizeof(struct btrfs_key));
6419                                 bad->root_id = owner;
6420                                 list_add_tail(&bad->list, &delete_items);
6421                                 continue;
6422                         }
6423                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6424                                 continue;
6425                         fi = btrfs_item_ptr(buf, i,
6426                                             struct btrfs_file_extent_item);
6427                         if (btrfs_file_extent_type(buf, fi) ==
6428                             BTRFS_FILE_EXTENT_INLINE)
6429                                 continue;
6430                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6431                                 continue;
6432
6433                         data_bytes_allocated +=
6434                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6435                         if (data_bytes_allocated < root->sectorsize) {
6436                                 abort();
6437                         }
6438                         data_bytes_referenced +=
6439                                 btrfs_file_extent_num_bytes(buf, fi);
6440                         add_data_backref(extent_cache,
6441                                 btrfs_file_extent_disk_bytenr(buf, fi),
6442                                 parent, owner, key.objectid, key.offset -
6443                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6444                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6445                 }
6446         } else {
6447                 int level;
6448                 struct btrfs_key first_key;
6449
6450                 first_key.objectid = 0;
6451
6452                 if (nritems > 0)
6453                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6454                 level = btrfs_header_level(buf);
6455                 for (i = 0; i < nritems; i++) {
6456                         struct extent_record tmpl;
6457
6458                         ptr = btrfs_node_blockptr(buf, i);
6459                         size = root->nodesize;
6460                         btrfs_node_key_to_cpu(buf, &key, i);
6461                         if (ri != NULL) {
6462                                 if ((level == ri->drop_level)
6463                                     && is_dropped_key(&key, &ri->drop_key)) {
6464                                         continue;
6465                                 }
6466                         }
6467
6468                         memset(&tmpl, 0, sizeof(tmpl));
6469                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6470                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6471                         tmpl.start = ptr;
6472                         tmpl.nr = size;
6473                         tmpl.refs = 1;
6474                         tmpl.metadata = 1;
6475                         tmpl.max_size = size;
6476                         ret = add_extent_rec(extent_cache, &tmpl);
6477                         if (ret < 0)
6478                                 goto out;
6479
6480                         ret = add_tree_backref(extent_cache, ptr, parent,
6481                                         owner, 1);
6482                         if (ret < 0) {
6483                                 error("add_tree_backref failed: %s",
6484                                       strerror(-ret));
6485                                 continue;
6486                         }
6487
6488                         if (level > 1) {
6489                                 add_pending(nodes, seen, ptr, size);
6490                         } else {
6491                                 add_pending(pending, seen, ptr, size);
6492                         }
6493                 }
6494                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6495                                       nritems) * sizeof(struct btrfs_key_ptr);
6496         }
6497         total_btree_bytes += buf->len;
6498         if (fs_root_objectid(btrfs_header_owner(buf)))
6499                 total_fs_tree_bytes += buf->len;
6500         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6501                 total_extent_tree_bytes += buf->len;
6502         if (!found_old_backref &&
6503             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6504             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6505             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6506                 found_old_backref = 1;
6507 out:
6508         free_extent_buffer(buf);
6509         return ret;
6510 }
6511
6512 static int add_root_to_pending(struct extent_buffer *buf,
6513                                struct cache_tree *extent_cache,
6514                                struct cache_tree *pending,
6515                                struct cache_tree *seen,
6516                                struct cache_tree *nodes,
6517                                u64 objectid)
6518 {
6519         struct extent_record tmpl;
6520         int ret;
6521
6522         if (btrfs_header_level(buf) > 0)
6523                 add_pending(nodes, seen, buf->start, buf->len);
6524         else
6525                 add_pending(pending, seen, buf->start, buf->len);
6526
6527         memset(&tmpl, 0, sizeof(tmpl));
6528         tmpl.start = buf->start;
6529         tmpl.nr = buf->len;
6530         tmpl.is_root = 1;
6531         tmpl.refs = 1;
6532         tmpl.metadata = 1;
6533         tmpl.max_size = buf->len;
6534         add_extent_rec(extent_cache, &tmpl);
6535
6536         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6537             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6538                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6539                                 0, 1);
6540         else
6541                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6542                                 1);
6543         return ret;
6544 }
6545
6546 /* as we fix the tree, we might be deleting blocks that
6547  * we're tracking for repair.  This hook makes sure we
6548  * remove any backrefs for blocks as we are fixing them.
6549  */
6550 static int free_extent_hook(struct btrfs_trans_handle *trans,
6551                             struct btrfs_root *root,
6552                             u64 bytenr, u64 num_bytes, u64 parent,
6553                             u64 root_objectid, u64 owner, u64 offset,
6554                             int refs_to_drop)
6555 {
6556         struct extent_record *rec;
6557         struct cache_extent *cache;
6558         int is_data;
6559         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6560
6561         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6562         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6563         if (!cache)
6564                 return 0;
6565
6566         rec = container_of(cache, struct extent_record, cache);
6567         if (is_data) {
6568                 struct data_backref *back;
6569                 back = find_data_backref(rec, parent, root_objectid, owner,
6570                                          offset, 1, bytenr, num_bytes);
6571                 if (!back)
6572                         goto out;
6573                 if (back->node.found_ref) {
6574                         back->found_ref -= refs_to_drop;
6575                         if (rec->refs)
6576                                 rec->refs -= refs_to_drop;
6577                 }
6578                 if (back->node.found_extent_tree) {
6579                         back->num_refs -= refs_to_drop;
6580                         if (rec->extent_item_refs)
6581                                 rec->extent_item_refs -= refs_to_drop;
6582                 }
6583                 if (back->found_ref == 0)
6584                         back->node.found_ref = 0;
6585                 if (back->num_refs == 0)
6586                         back->node.found_extent_tree = 0;
6587
6588                 if (!back->node.found_extent_tree && back->node.found_ref) {
6589                         list_del(&back->node.list);
6590                         free(back);
6591                 }
6592         } else {
6593                 struct tree_backref *back;
6594                 back = find_tree_backref(rec, parent, root_objectid);
6595                 if (!back)
6596                         goto out;
6597                 if (back->node.found_ref) {
6598                         if (rec->refs)
6599                                 rec->refs--;
6600                         back->node.found_ref = 0;
6601                 }
6602                 if (back->node.found_extent_tree) {
6603                         if (rec->extent_item_refs)
6604                                 rec->extent_item_refs--;
6605                         back->node.found_extent_tree = 0;
6606                 }
6607                 if (!back->node.found_extent_tree && back->node.found_ref) {
6608                         list_del(&back->node.list);
6609                         free(back);
6610                 }
6611         }
6612         maybe_free_extent_rec(extent_cache, rec);
6613 out:
6614         return 0;
6615 }
6616
6617 static int delete_extent_records(struct btrfs_trans_handle *trans,
6618                                  struct btrfs_root *root,
6619                                  struct btrfs_path *path,
6620                                  u64 bytenr, u64 new_len)
6621 {
6622         struct btrfs_key key;
6623         struct btrfs_key found_key;
6624         struct extent_buffer *leaf;
6625         int ret;
6626         int slot;
6627
6628
6629         key.objectid = bytenr;
6630         key.type = (u8)-1;
6631         key.offset = (u64)-1;
6632
6633         while(1) {
6634                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6635                                         &key, path, 0, 1);
6636                 if (ret < 0)
6637                         break;
6638
6639                 if (ret > 0) {
6640                         ret = 0;
6641                         if (path->slots[0] == 0)
6642                                 break;
6643                         path->slots[0]--;
6644                 }
6645                 ret = 0;
6646
6647                 leaf = path->nodes[0];
6648                 slot = path->slots[0];
6649
6650                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6651                 if (found_key.objectid != bytenr)
6652                         break;
6653
6654                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6655                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6656                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6657                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6658                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6659                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6660                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6661                         btrfs_release_path(path);
6662                         if (found_key.type == 0) {
6663                                 if (found_key.offset == 0)
6664                                         break;
6665                                 key.offset = found_key.offset - 1;
6666                                 key.type = found_key.type;
6667                         }
6668                         key.type = found_key.type - 1;
6669                         key.offset = (u64)-1;
6670                         continue;
6671                 }
6672
6673                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6674                         found_key.objectid, found_key.type, found_key.offset);
6675
6676                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6677                 if (ret)
6678                         break;
6679                 btrfs_release_path(path);
6680
6681                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6682                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6683                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6684                                 found_key.offset : root->nodesize;
6685
6686                         ret = btrfs_update_block_group(trans, root, bytenr,
6687                                                        bytes, 0, 0);
6688                         if (ret)
6689                                 break;
6690                 }
6691         }
6692
6693         btrfs_release_path(path);
6694         return ret;
6695 }
6696
6697 /*
6698  * for a single backref, this will allocate a new extent
6699  * and add the backref to it.
6700  */
6701 static int record_extent(struct btrfs_trans_handle *trans,
6702                          struct btrfs_fs_info *info,
6703                          struct btrfs_path *path,
6704                          struct extent_record *rec,
6705                          struct extent_backref *back,
6706                          int allocated, u64 flags)
6707 {
6708         int ret;
6709         struct btrfs_root *extent_root = info->extent_root;
6710         struct extent_buffer *leaf;
6711         struct btrfs_key ins_key;
6712         struct btrfs_extent_item *ei;
6713         struct tree_backref *tback;
6714         struct data_backref *dback;
6715         struct btrfs_tree_block_info *bi;
6716
6717         if (!back->is_data)
6718                 rec->max_size = max_t(u64, rec->max_size,
6719                                     info->extent_root->nodesize);
6720
6721         if (!allocated) {
6722                 u32 item_size = sizeof(*ei);
6723
6724                 if (!back->is_data)
6725                         item_size += sizeof(*bi);
6726
6727                 ins_key.objectid = rec->start;
6728                 ins_key.offset = rec->max_size;
6729                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6730
6731                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6732                                         &ins_key, item_size);
6733                 if (ret)
6734                         goto fail;
6735
6736                 leaf = path->nodes[0];
6737                 ei = btrfs_item_ptr(leaf, path->slots[0],
6738                                     struct btrfs_extent_item);
6739
6740                 btrfs_set_extent_refs(leaf, ei, 0);
6741                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6742
6743                 if (back->is_data) {
6744                         btrfs_set_extent_flags(leaf, ei,
6745                                                BTRFS_EXTENT_FLAG_DATA);
6746                 } else {
6747                         struct btrfs_disk_key copy_key;;
6748
6749                         tback = to_tree_backref(back);
6750                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6751                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6752                                              sizeof(*bi));
6753
6754                         btrfs_set_disk_key_objectid(&copy_key,
6755                                                     rec->info_objectid);
6756                         btrfs_set_disk_key_type(&copy_key, 0);
6757                         btrfs_set_disk_key_offset(&copy_key, 0);
6758
6759                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6760                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6761
6762                         btrfs_set_extent_flags(leaf, ei,
6763                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6764                 }
6765
6766                 btrfs_mark_buffer_dirty(leaf);
6767                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6768                                                rec->max_size, 1, 0);
6769                 if (ret)
6770                         goto fail;
6771                 btrfs_release_path(path);
6772         }
6773
6774         if (back->is_data) {
6775                 u64 parent;
6776                 int i;
6777
6778                 dback = to_data_backref(back);
6779                 if (back->full_backref)
6780                         parent = dback->parent;
6781                 else
6782                         parent = 0;
6783
6784                 for (i = 0; i < dback->found_ref; i++) {
6785                         /* if parent != 0, we're doing a full backref
6786                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6787                          * just makes the backref allocator create a data
6788                          * backref
6789                          */
6790                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6791                                                    rec->start, rec->max_size,
6792                                                    parent,
6793                                                    dback->root,
6794                                                    parent ?
6795                                                    BTRFS_FIRST_FREE_OBJECTID :
6796                                                    dback->owner,
6797                                                    dback->offset);
6798                         if (ret)
6799                                 break;
6800                 }
6801                 fprintf(stderr, "adding new data backref"
6802                                 " on %llu %s %llu owner %llu"
6803                                 " offset %llu found %d\n",
6804                                 (unsigned long long)rec->start,
6805                                 back->full_backref ?
6806                                 "parent" : "root",
6807                                 back->full_backref ?
6808                                 (unsigned long long)parent :
6809                                 (unsigned long long)dback->root,
6810                                 (unsigned long long)dback->owner,
6811                                 (unsigned long long)dback->offset,
6812                                 dback->found_ref);
6813         } else {
6814                 u64 parent;
6815
6816                 tback = to_tree_backref(back);
6817                 if (back->full_backref)
6818                         parent = tback->parent;
6819                 else
6820                         parent = 0;
6821
6822                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6823                                            rec->start, rec->max_size,
6824                                            parent, tback->root, 0, 0);
6825                 fprintf(stderr, "adding new tree backref on "
6826                         "start %llu len %llu parent %llu root %llu\n",
6827                         rec->start, rec->max_size, parent, tback->root);
6828         }
6829 fail:
6830         btrfs_release_path(path);
6831         return ret;
6832 }
6833
6834 static struct extent_entry *find_entry(struct list_head *entries,
6835                                        u64 bytenr, u64 bytes)
6836 {
6837         struct extent_entry *entry = NULL;
6838
6839         list_for_each_entry(entry, entries, list) {
6840                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6841                         return entry;
6842         }
6843
6844         return NULL;
6845 }
6846
6847 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6848 {
6849         struct extent_entry *entry, *best = NULL, *prev = NULL;
6850
6851         list_for_each_entry(entry, entries, list) {
6852                 if (!prev) {
6853                         prev = entry;
6854                         continue;
6855                 }
6856
6857                 /*
6858                  * If there are as many broken entries as entries then we know
6859                  * not to trust this particular entry.
6860                  */
6861                 if (entry->broken == entry->count)
6862                         continue;
6863
6864                 /*
6865                  * If our current entry == best then we can't be sure our best
6866                  * is really the best, so we need to keep searching.
6867                  */
6868                 if (best && best->count == entry->count) {
6869                         prev = entry;
6870                         best = NULL;
6871                         continue;
6872                 }
6873
6874                 /* Prev == entry, not good enough, have to keep searching */
6875                 if (!prev->broken && prev->count == entry->count)
6876                         continue;
6877
6878                 if (!best)
6879                         best = (prev->count > entry->count) ? prev : entry;
6880                 else if (best->count < entry->count)
6881                         best = entry;
6882                 prev = entry;
6883         }
6884
6885         return best;
6886 }
6887
6888 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6889                       struct data_backref *dback, struct extent_entry *entry)
6890 {
6891         struct btrfs_trans_handle *trans;
6892         struct btrfs_root *root;
6893         struct btrfs_file_extent_item *fi;
6894         struct extent_buffer *leaf;
6895         struct btrfs_key key;
6896         u64 bytenr, bytes;
6897         int ret, err;
6898
6899         key.objectid = dback->root;
6900         key.type = BTRFS_ROOT_ITEM_KEY;
6901         key.offset = (u64)-1;
6902         root = btrfs_read_fs_root(info, &key);
6903         if (IS_ERR(root)) {
6904                 fprintf(stderr, "Couldn't find root for our ref\n");
6905                 return -EINVAL;
6906         }
6907
6908         /*
6909          * The backref points to the original offset of the extent if it was
6910          * split, so we need to search down to the offset we have and then walk
6911          * forward until we find the backref we're looking for.
6912          */
6913         key.objectid = dback->owner;
6914         key.type = BTRFS_EXTENT_DATA_KEY;
6915         key.offset = dback->offset;
6916         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6917         if (ret < 0) {
6918                 fprintf(stderr, "Error looking up ref %d\n", ret);
6919                 return ret;
6920         }
6921
6922         while (1) {
6923                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6924                         ret = btrfs_next_leaf(root, path);
6925                         if (ret) {
6926                                 fprintf(stderr, "Couldn't find our ref, next\n");
6927                                 return -EINVAL;
6928                         }
6929                 }
6930                 leaf = path->nodes[0];
6931                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6932                 if (key.objectid != dback->owner ||
6933                     key.type != BTRFS_EXTENT_DATA_KEY) {
6934                         fprintf(stderr, "Couldn't find our ref, search\n");
6935                         return -EINVAL;
6936                 }
6937                 fi = btrfs_item_ptr(leaf, path->slots[0],
6938                                     struct btrfs_file_extent_item);
6939                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6940                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6941
6942                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6943                         break;
6944                 path->slots[0]++;
6945         }
6946
6947         btrfs_release_path(path);
6948
6949         trans = btrfs_start_transaction(root, 1);
6950         if (IS_ERR(trans))
6951                 return PTR_ERR(trans);
6952
6953         /*
6954          * Ok we have the key of the file extent we want to fix, now we can cow
6955          * down to the thing and fix it.
6956          */
6957         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6958         if (ret < 0) {
6959                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6960                         key.objectid, key.type, key.offset, ret);
6961                 goto out;
6962         }
6963         if (ret > 0) {
6964                 fprintf(stderr, "Well that's odd, we just found this key "
6965                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6966                         key.offset);
6967                 ret = -EINVAL;
6968                 goto out;
6969         }
6970         leaf = path->nodes[0];
6971         fi = btrfs_item_ptr(leaf, path->slots[0],
6972                             struct btrfs_file_extent_item);
6973
6974         if (btrfs_file_extent_compression(leaf, fi) &&
6975             dback->disk_bytenr != entry->bytenr) {
6976                 fprintf(stderr, "Ref doesn't match the record start and is "
6977                         "compressed, please take a btrfs-image of this file "
6978                         "system and send it to a btrfs developer so they can "
6979                         "complete this functionality for bytenr %Lu\n",
6980                         dback->disk_bytenr);
6981                 ret = -EINVAL;
6982                 goto out;
6983         }
6984
6985         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6986                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6987         } else if (dback->disk_bytenr > entry->bytenr) {
6988                 u64 off_diff, offset;
6989
6990                 off_diff = dback->disk_bytenr - entry->bytenr;
6991                 offset = btrfs_file_extent_offset(leaf, fi);
6992                 if (dback->disk_bytenr + offset +
6993                     btrfs_file_extent_num_bytes(leaf, fi) >
6994                     entry->bytenr + entry->bytes) {
6995                         fprintf(stderr, "Ref is past the entry end, please "
6996                                 "take a btrfs-image of this file system and "
6997                                 "send it to a btrfs developer, ref %Lu\n",
6998                                 dback->disk_bytenr);
6999                         ret = -EINVAL;
7000                         goto out;
7001                 }
7002                 offset += off_diff;
7003                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7004                 btrfs_set_file_extent_offset(leaf, fi, offset);
7005         } else if (dback->disk_bytenr < entry->bytenr) {
7006                 u64 offset;
7007
7008                 offset = btrfs_file_extent_offset(leaf, fi);
7009                 if (dback->disk_bytenr + offset < entry->bytenr) {
7010                         fprintf(stderr, "Ref is before the entry start, please"
7011                                 " take a btrfs-image of this file system and "
7012                                 "send it to a btrfs developer, ref %Lu\n",
7013                                 dback->disk_bytenr);
7014                         ret = -EINVAL;
7015                         goto out;
7016                 }
7017
7018                 offset += dback->disk_bytenr;
7019                 offset -= entry->bytenr;
7020                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7021                 btrfs_set_file_extent_offset(leaf, fi, offset);
7022         }
7023
7024         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7025
7026         /*
7027          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7028          * only do this if we aren't using compression, otherwise it's a
7029          * trickier case.
7030          */
7031         if (!btrfs_file_extent_compression(leaf, fi))
7032                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7033         else
7034                 printf("ram bytes may be wrong?\n");
7035         btrfs_mark_buffer_dirty(leaf);
7036 out:
7037         err = btrfs_commit_transaction(trans, root);
7038         btrfs_release_path(path);
7039         return ret ? ret : err;
7040 }
7041
7042 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7043                            struct extent_record *rec)
7044 {
7045         struct extent_backref *back;
7046         struct data_backref *dback;
7047         struct extent_entry *entry, *best = NULL;
7048         LIST_HEAD(entries);
7049         int nr_entries = 0;
7050         int broken_entries = 0;
7051         int ret = 0;
7052         short mismatch = 0;
7053
7054         /*
7055          * Metadata is easy and the backrefs should always agree on bytenr and
7056          * size, if not we've got bigger issues.
7057          */
7058         if (rec->metadata)
7059                 return 0;
7060
7061         list_for_each_entry(back, &rec->backrefs, list) {
7062                 if (back->full_backref || !back->is_data)
7063                         continue;
7064
7065                 dback = to_data_backref(back);
7066
7067                 /*
7068                  * We only pay attention to backrefs that we found a real
7069                  * backref for.
7070                  */
7071                 if (dback->found_ref == 0)
7072                         continue;
7073
7074                 /*
7075                  * For now we only catch when the bytes don't match, not the
7076                  * bytenr.  We can easily do this at the same time, but I want
7077                  * to have a fs image to test on before we just add repair
7078                  * functionality willy-nilly so we know we won't screw up the
7079                  * repair.
7080                  */
7081
7082                 entry = find_entry(&entries, dback->disk_bytenr,
7083                                    dback->bytes);
7084                 if (!entry) {
7085                         entry = malloc(sizeof(struct extent_entry));
7086                         if (!entry) {
7087                                 ret = -ENOMEM;
7088                                 goto out;
7089                         }
7090                         memset(entry, 0, sizeof(*entry));
7091                         entry->bytenr = dback->disk_bytenr;
7092                         entry->bytes = dback->bytes;
7093                         list_add_tail(&entry->list, &entries);
7094                         nr_entries++;
7095                 }
7096
7097                 /*
7098                  * If we only have on entry we may think the entries agree when
7099                  * in reality they don't so we have to do some extra checking.
7100                  */
7101                 if (dback->disk_bytenr != rec->start ||
7102                     dback->bytes != rec->nr || back->broken)
7103                         mismatch = 1;
7104
7105                 if (back->broken) {
7106                         entry->broken++;
7107                         broken_entries++;
7108                 }
7109
7110                 entry->count++;
7111         }
7112
7113         /* Yay all the backrefs agree, carry on good sir */
7114         if (nr_entries <= 1 && !mismatch)
7115                 goto out;
7116
7117         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7118                 "%Lu\n", rec->start);
7119
7120         /*
7121          * First we want to see if the backrefs can agree amongst themselves who
7122          * is right, so figure out which one of the entries has the highest
7123          * count.
7124          */
7125         best = find_most_right_entry(&entries);
7126
7127         /*
7128          * Ok so we may have an even split between what the backrefs think, so
7129          * this is where we use the extent ref to see what it thinks.
7130          */
7131         if (!best) {
7132                 entry = find_entry(&entries, rec->start, rec->nr);
7133                 if (!entry && (!broken_entries || !rec->found_rec)) {
7134                         fprintf(stderr, "Backrefs don't agree with each other "
7135                                 "and extent record doesn't agree with anybody,"
7136                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7137                                 rec->start, rec->nr);
7138                         ret = -EINVAL;
7139                         goto out;
7140                 } else if (!entry) {
7141                         /*
7142                          * Ok our backrefs were broken, we'll assume this is the
7143                          * correct value and add an entry for this range.
7144                          */
7145                         entry = malloc(sizeof(struct extent_entry));
7146                         if (!entry) {
7147                                 ret = -ENOMEM;
7148                                 goto out;
7149                         }
7150                         memset(entry, 0, sizeof(*entry));
7151                         entry->bytenr = rec->start;
7152                         entry->bytes = rec->nr;
7153                         list_add_tail(&entry->list, &entries);
7154                         nr_entries++;
7155                 }
7156                 entry->count++;
7157                 best = find_most_right_entry(&entries);
7158                 if (!best) {
7159                         fprintf(stderr, "Backrefs and extent record evenly "
7160                                 "split on who is right, this is going to "
7161                                 "require user input to fix bytenr %Lu bytes "
7162                                 "%Lu\n", rec->start, rec->nr);
7163                         ret = -EINVAL;
7164                         goto out;
7165                 }
7166         }
7167
7168         /*
7169          * I don't think this can happen currently as we'll abort() if we catch
7170          * this case higher up, but in case somebody removes that we still can't
7171          * deal with it properly here yet, so just bail out of that's the case.
7172          */
7173         if (best->bytenr != rec->start) {
7174                 fprintf(stderr, "Extent start and backref starts don't match, "
7175                         "please use btrfs-image on this file system and send "
7176                         "it to a btrfs developer so they can make fsck fix "
7177                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7178                         rec->start, rec->nr);
7179                 ret = -EINVAL;
7180                 goto out;
7181         }
7182
7183         /*
7184          * Ok great we all agreed on an extent record, let's go find the real
7185          * references and fix up the ones that don't match.
7186          */
7187         list_for_each_entry(back, &rec->backrefs, list) {
7188                 if (back->full_backref || !back->is_data)
7189                         continue;
7190
7191                 dback = to_data_backref(back);
7192
7193                 /*
7194                  * Still ignoring backrefs that don't have a real ref attached
7195                  * to them.
7196                  */
7197                 if (dback->found_ref == 0)
7198                         continue;
7199
7200                 if (dback->bytes == best->bytes &&
7201                     dback->disk_bytenr == best->bytenr)
7202                         continue;
7203
7204                 ret = repair_ref(info, path, dback, best);
7205                 if (ret)
7206                         goto out;
7207         }
7208
7209         /*
7210          * Ok we messed with the actual refs, which means we need to drop our
7211          * entire cache and go back and rescan.  I know this is a huge pain and
7212          * adds a lot of extra work, but it's the only way to be safe.  Once all
7213          * the backrefs agree we may not need to do anything to the extent
7214          * record itself.
7215          */
7216         ret = -EAGAIN;
7217 out:
7218         while (!list_empty(&entries)) {
7219                 entry = list_entry(entries.next, struct extent_entry, list);
7220                 list_del_init(&entry->list);
7221                 free(entry);
7222         }
7223         return ret;
7224 }
7225
7226 static int process_duplicates(struct btrfs_root *root,
7227                               struct cache_tree *extent_cache,
7228                               struct extent_record *rec)
7229 {
7230         struct extent_record *good, *tmp;
7231         struct cache_extent *cache;
7232         int ret;
7233
7234         /*
7235          * If we found a extent record for this extent then return, or if we
7236          * have more than one duplicate we are likely going to need to delete
7237          * something.
7238          */
7239         if (rec->found_rec || rec->num_duplicates > 1)
7240                 return 0;
7241
7242         /* Shouldn't happen but just in case */
7243         BUG_ON(!rec->num_duplicates);
7244
7245         /*
7246          * So this happens if we end up with a backref that doesn't match the
7247          * actual extent entry.  So either the backref is bad or the extent
7248          * entry is bad.  Either way we want to have the extent_record actually
7249          * reflect what we found in the extent_tree, so we need to take the
7250          * duplicate out and use that as the extent_record since the only way we
7251          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7252          */
7253         remove_cache_extent(extent_cache, &rec->cache);
7254
7255         good = to_extent_record(rec->dups.next);
7256         list_del_init(&good->list);
7257         INIT_LIST_HEAD(&good->backrefs);
7258         INIT_LIST_HEAD(&good->dups);
7259         good->cache.start = good->start;
7260         good->cache.size = good->nr;
7261         good->content_checked = 0;
7262         good->owner_ref_checked = 0;
7263         good->num_duplicates = 0;
7264         good->refs = rec->refs;
7265         list_splice_init(&rec->backrefs, &good->backrefs);
7266         while (1) {
7267                 cache = lookup_cache_extent(extent_cache, good->start,
7268                                             good->nr);
7269                 if (!cache)
7270                         break;
7271                 tmp = container_of(cache, struct extent_record, cache);
7272
7273                 /*
7274                  * If we find another overlapping extent and it's found_rec is
7275                  * set then it's a duplicate and we need to try and delete
7276                  * something.
7277                  */
7278                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7279                         if (list_empty(&good->list))
7280                                 list_add_tail(&good->list,
7281                                               &duplicate_extents);
7282                         good->num_duplicates += tmp->num_duplicates + 1;
7283                         list_splice_init(&tmp->dups, &good->dups);
7284                         list_del_init(&tmp->list);
7285                         list_add_tail(&tmp->list, &good->dups);
7286                         remove_cache_extent(extent_cache, &tmp->cache);
7287                         continue;
7288                 }
7289
7290                 /*
7291                  * Ok we have another non extent item backed extent rec, so lets
7292                  * just add it to this extent and carry on like we did above.
7293                  */
7294                 good->refs += tmp->refs;
7295                 list_splice_init(&tmp->backrefs, &good->backrefs);
7296                 remove_cache_extent(extent_cache, &tmp->cache);
7297                 free(tmp);
7298         }
7299         ret = insert_cache_extent(extent_cache, &good->cache);
7300         BUG_ON(ret);
7301         free(rec);
7302         return good->num_duplicates ? 0 : 1;
7303 }
7304
7305 static int delete_duplicate_records(struct btrfs_root *root,
7306                                     struct extent_record *rec)
7307 {
7308         struct btrfs_trans_handle *trans;
7309         LIST_HEAD(delete_list);
7310         struct btrfs_path *path;
7311         struct extent_record *tmp, *good, *n;
7312         int nr_del = 0;
7313         int ret = 0, err;
7314         struct btrfs_key key;
7315
7316         path = btrfs_alloc_path();
7317         if (!path) {
7318                 ret = -ENOMEM;
7319                 goto out;
7320         }
7321
7322         good = rec;
7323         /* Find the record that covers all of the duplicates. */
7324         list_for_each_entry(tmp, &rec->dups, list) {
7325                 if (good->start < tmp->start)
7326                         continue;
7327                 if (good->nr > tmp->nr)
7328                         continue;
7329
7330                 if (tmp->start + tmp->nr < good->start + good->nr) {
7331                         fprintf(stderr, "Ok we have overlapping extents that "
7332                                 "aren't completely covered by each other, this "
7333                                 "is going to require more careful thought.  "
7334                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7335                                 tmp->start, tmp->nr, good->start, good->nr);
7336                         abort();
7337                 }
7338                 good = tmp;
7339         }
7340
7341         if (good != rec)
7342                 list_add_tail(&rec->list, &delete_list);
7343
7344         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7345                 if (tmp == good)
7346                         continue;
7347                 list_move_tail(&tmp->list, &delete_list);
7348         }
7349
7350         root = root->fs_info->extent_root;
7351         trans = btrfs_start_transaction(root, 1);
7352         if (IS_ERR(trans)) {
7353                 ret = PTR_ERR(trans);
7354                 goto out;
7355         }
7356
7357         list_for_each_entry(tmp, &delete_list, list) {
7358                 if (tmp->found_rec == 0)
7359                         continue;
7360                 key.objectid = tmp->start;
7361                 key.type = BTRFS_EXTENT_ITEM_KEY;
7362                 key.offset = tmp->nr;
7363
7364                 /* Shouldn't happen but just in case */
7365                 if (tmp->metadata) {
7366                         fprintf(stderr, "Well this shouldn't happen, extent "
7367                                 "record overlaps but is metadata? "
7368                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7369                         abort();
7370                 }
7371
7372                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7373                 if (ret) {
7374                         if (ret > 0)
7375                                 ret = -EINVAL;
7376                         break;
7377                 }
7378                 ret = btrfs_del_item(trans, root, path);
7379                 if (ret)
7380                         break;
7381                 btrfs_release_path(path);
7382                 nr_del++;
7383         }
7384         err = btrfs_commit_transaction(trans, root);
7385         if (err && !ret)
7386                 ret = err;
7387 out:
7388         while (!list_empty(&delete_list)) {
7389                 tmp = to_extent_record(delete_list.next);
7390                 list_del_init(&tmp->list);
7391                 if (tmp == rec)
7392                         continue;
7393                 free(tmp);
7394         }
7395
7396         while (!list_empty(&rec->dups)) {
7397                 tmp = to_extent_record(rec->dups.next);
7398                 list_del_init(&tmp->list);
7399                 free(tmp);
7400         }
7401
7402         btrfs_free_path(path);
7403
7404         if (!ret && !nr_del)
7405                 rec->num_duplicates = 0;
7406
7407         return ret ? ret : nr_del;
7408 }
7409
7410 static int find_possible_backrefs(struct btrfs_fs_info *info,
7411                                   struct btrfs_path *path,
7412                                   struct cache_tree *extent_cache,
7413                                   struct extent_record *rec)
7414 {
7415         struct btrfs_root *root;
7416         struct extent_backref *back;
7417         struct data_backref *dback;
7418         struct cache_extent *cache;
7419         struct btrfs_file_extent_item *fi;
7420         struct btrfs_key key;
7421         u64 bytenr, bytes;
7422         int ret;
7423
7424         list_for_each_entry(back, &rec->backrefs, list) {
7425                 /* Don't care about full backrefs (poor unloved backrefs) */
7426                 if (back->full_backref || !back->is_data)
7427                         continue;
7428
7429                 dback = to_data_backref(back);
7430
7431                 /* We found this one, we don't need to do a lookup */
7432                 if (dback->found_ref)
7433                         continue;
7434
7435                 key.objectid = dback->root;
7436                 key.type = BTRFS_ROOT_ITEM_KEY;
7437                 key.offset = (u64)-1;
7438
7439                 root = btrfs_read_fs_root(info, &key);
7440
7441                 /* No root, definitely a bad ref, skip */
7442                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7443                         continue;
7444                 /* Other err, exit */
7445                 if (IS_ERR(root))
7446                         return PTR_ERR(root);
7447
7448                 key.objectid = dback->owner;
7449                 key.type = BTRFS_EXTENT_DATA_KEY;
7450                 key.offset = dback->offset;
7451                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7452                 if (ret) {
7453                         btrfs_release_path(path);
7454                         if (ret < 0)
7455                                 return ret;
7456                         /* Didn't find it, we can carry on */
7457                         ret = 0;
7458                         continue;
7459                 }
7460
7461                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7462                                     struct btrfs_file_extent_item);
7463                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7464                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7465                 btrfs_release_path(path);
7466                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7467                 if (cache) {
7468                         struct extent_record *tmp;
7469                         tmp = container_of(cache, struct extent_record, cache);
7470
7471                         /*
7472                          * If we found an extent record for the bytenr for this
7473                          * particular backref then we can't add it to our
7474                          * current extent record.  We only want to add backrefs
7475                          * that don't have a corresponding extent item in the
7476                          * extent tree since they likely belong to this record
7477                          * and we need to fix it if it doesn't match bytenrs.
7478                          */
7479                         if  (tmp->found_rec)
7480                                 continue;
7481                 }
7482
7483                 dback->found_ref += 1;
7484                 dback->disk_bytenr = bytenr;
7485                 dback->bytes = bytes;
7486
7487                 /*
7488                  * Set this so the verify backref code knows not to trust the
7489                  * values in this backref.
7490                  */
7491                 back->broken = 1;
7492         }
7493
7494         return 0;
7495 }
7496
7497 /*
7498  * Record orphan data ref into corresponding root.
7499  *
7500  * Return 0 if the extent item contains data ref and recorded.
7501  * Return 1 if the extent item contains no useful data ref
7502  *   On that case, it may contains only shared_dataref or metadata backref
7503  *   or the file extent exists(this should be handled by the extent bytenr
7504  *   recovery routine)
7505  * Return <0 if something goes wrong.
7506  */
7507 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7508                                       struct extent_record *rec)
7509 {
7510         struct btrfs_key key;
7511         struct btrfs_root *dest_root;
7512         struct extent_backref *back;
7513         struct data_backref *dback;
7514         struct orphan_data_extent *orphan;
7515         struct btrfs_path *path;
7516         int recorded_data_ref = 0;
7517         int ret = 0;
7518
7519         if (rec->metadata)
7520                 return 1;
7521         path = btrfs_alloc_path();
7522         if (!path)
7523                 return -ENOMEM;
7524         list_for_each_entry(back, &rec->backrefs, list) {
7525                 if (back->full_backref || !back->is_data ||
7526                     !back->found_extent_tree)
7527                         continue;
7528                 dback = to_data_backref(back);
7529                 if (dback->found_ref)
7530                         continue;
7531                 key.objectid = dback->root;
7532                 key.type = BTRFS_ROOT_ITEM_KEY;
7533                 key.offset = (u64)-1;
7534
7535                 dest_root = btrfs_read_fs_root(fs_info, &key);
7536
7537                 /* For non-exist root we just skip it */
7538                 if (IS_ERR(dest_root) || !dest_root)
7539                         continue;
7540
7541                 key.objectid = dback->owner;
7542                 key.type = BTRFS_EXTENT_DATA_KEY;
7543                 key.offset = dback->offset;
7544
7545                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7546                 /*
7547                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7548                  * we need to record it for inode/file extent rebuild.
7549                  * For ret > 0, we record it only for file extent rebuild.
7550                  * For ret == 0, the file extent exists but only bytenr
7551                  * mismatch, let the original bytenr fix routine to handle,
7552                  * don't record it.
7553                  */
7554                 if (ret == 0)
7555                         continue;
7556                 ret = 0;
7557                 orphan = malloc(sizeof(*orphan));
7558                 if (!orphan) {
7559                         ret = -ENOMEM;
7560                         goto out;
7561                 }
7562                 INIT_LIST_HEAD(&orphan->list);
7563                 orphan->root = dback->root;
7564                 orphan->objectid = dback->owner;
7565                 orphan->offset = dback->offset;
7566                 orphan->disk_bytenr = rec->cache.start;
7567                 orphan->disk_len = rec->cache.size;
7568                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7569                 recorded_data_ref = 1;
7570         }
7571 out:
7572         btrfs_free_path(path);
7573         if (!ret)
7574                 return !recorded_data_ref;
7575         else
7576                 return ret;
7577 }
7578
7579 /*
7580  * when an incorrect extent item is found, this will delete
7581  * all of the existing entries for it and recreate them
7582  * based on what the tree scan found.
7583  */
7584 static int fixup_extent_refs(struct btrfs_fs_info *info,
7585                              struct cache_tree *extent_cache,
7586                              struct extent_record *rec)
7587 {
7588         struct btrfs_trans_handle *trans = NULL;
7589         int ret;
7590         struct btrfs_path *path;
7591         struct list_head *cur = rec->backrefs.next;
7592         struct cache_extent *cache;
7593         struct extent_backref *back;
7594         int allocated = 0;
7595         u64 flags = 0;
7596
7597         if (rec->flag_block_full_backref)
7598                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7599
7600         path = btrfs_alloc_path();
7601         if (!path)
7602                 return -ENOMEM;
7603
7604         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7605                 /*
7606                  * Sometimes the backrefs themselves are so broken they don't
7607                  * get attached to any meaningful rec, so first go back and
7608                  * check any of our backrefs that we couldn't find and throw
7609                  * them into the list if we find the backref so that
7610                  * verify_backrefs can figure out what to do.
7611                  */
7612                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7613                 if (ret < 0)
7614                         goto out;
7615         }
7616
7617         /* step one, make sure all of the backrefs agree */
7618         ret = verify_backrefs(info, path, rec);
7619         if (ret < 0)
7620                 goto out;
7621
7622         trans = btrfs_start_transaction(info->extent_root, 1);
7623         if (IS_ERR(trans)) {
7624                 ret = PTR_ERR(trans);
7625                 goto out;
7626         }
7627
7628         /* step two, delete all the existing records */
7629         ret = delete_extent_records(trans, info->extent_root, path,
7630                                     rec->start, rec->max_size);
7631
7632         if (ret < 0)
7633                 goto out;
7634
7635         /* was this block corrupt?  If so, don't add references to it */
7636         cache = lookup_cache_extent(info->corrupt_blocks,
7637                                     rec->start, rec->max_size);
7638         if (cache) {
7639                 ret = 0;
7640                 goto out;
7641         }
7642
7643         /* step three, recreate all the refs we did find */
7644         while(cur != &rec->backrefs) {
7645                 back = to_extent_backref(cur);
7646                 cur = cur->next;
7647
7648                 /*
7649                  * if we didn't find any references, don't create a
7650                  * new extent record
7651                  */
7652                 if (!back->found_ref)
7653                         continue;
7654
7655                 rec->bad_full_backref = 0;
7656                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7657                 allocated = 1;
7658
7659                 if (ret)
7660                         goto out;
7661         }
7662 out:
7663         if (trans) {
7664                 int err = btrfs_commit_transaction(trans, info->extent_root);
7665                 if (!ret)
7666                         ret = err;
7667         }
7668
7669         btrfs_free_path(path);
7670         return ret;
7671 }
7672
7673 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7674                               struct extent_record *rec)
7675 {
7676         struct btrfs_trans_handle *trans;
7677         struct btrfs_root *root = fs_info->extent_root;
7678         struct btrfs_path *path;
7679         struct btrfs_extent_item *ei;
7680         struct btrfs_key key;
7681         u64 flags;
7682         int ret = 0;
7683
7684         key.objectid = rec->start;
7685         if (rec->metadata) {
7686                 key.type = BTRFS_METADATA_ITEM_KEY;
7687                 key.offset = rec->info_level;
7688         } else {
7689                 key.type = BTRFS_EXTENT_ITEM_KEY;
7690                 key.offset = rec->max_size;
7691         }
7692
7693         path = btrfs_alloc_path();
7694         if (!path)
7695                 return -ENOMEM;
7696
7697         trans = btrfs_start_transaction(root, 0);
7698         if (IS_ERR(trans)) {
7699                 btrfs_free_path(path);
7700                 return PTR_ERR(trans);
7701         }
7702
7703         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7704         if (ret < 0) {
7705                 btrfs_free_path(path);
7706                 btrfs_commit_transaction(trans, root);
7707                 return ret;
7708         } else if (ret) {
7709                 fprintf(stderr, "Didn't find extent for %llu\n",
7710                         (unsigned long long)rec->start);
7711                 btrfs_free_path(path);
7712                 btrfs_commit_transaction(trans, root);
7713                 return -ENOENT;
7714         }
7715
7716         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7717                             struct btrfs_extent_item);
7718         flags = btrfs_extent_flags(path->nodes[0], ei);
7719         if (rec->flag_block_full_backref) {
7720                 fprintf(stderr, "setting full backref on %llu\n",
7721                         (unsigned long long)key.objectid);
7722                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7723         } else {
7724                 fprintf(stderr, "clearing full backref on %llu\n",
7725                         (unsigned long long)key.objectid);
7726                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7727         }
7728         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7729         btrfs_mark_buffer_dirty(path->nodes[0]);
7730         btrfs_free_path(path);
7731         return btrfs_commit_transaction(trans, root);
7732 }
7733
7734 /* right now we only prune from the extent allocation tree */
7735 static int prune_one_block(struct btrfs_trans_handle *trans,
7736                            struct btrfs_fs_info *info,
7737                            struct btrfs_corrupt_block *corrupt)
7738 {
7739         int ret;
7740         struct btrfs_path path;
7741         struct extent_buffer *eb;
7742         u64 found;
7743         int slot;
7744         int nritems;
7745         int level = corrupt->level + 1;
7746
7747         btrfs_init_path(&path);
7748 again:
7749         /* we want to stop at the parent to our busted block */
7750         path.lowest_level = level;
7751
7752         ret = btrfs_search_slot(trans, info->extent_root,
7753                                 &corrupt->key, &path, -1, 1);
7754
7755         if (ret < 0)
7756                 goto out;
7757
7758         eb = path.nodes[level];
7759         if (!eb) {
7760                 ret = -ENOENT;
7761                 goto out;
7762         }
7763
7764         /*
7765          * hopefully the search gave us the block we want to prune,
7766          * lets try that first
7767          */
7768         slot = path.slots[level];
7769         found =  btrfs_node_blockptr(eb, slot);
7770         if (found == corrupt->cache.start)
7771                 goto del_ptr;
7772
7773         nritems = btrfs_header_nritems(eb);
7774
7775         /* the search failed, lets scan this node and hope we find it */
7776         for (slot = 0; slot < nritems; slot++) {
7777                 found =  btrfs_node_blockptr(eb, slot);
7778                 if (found == corrupt->cache.start)
7779                         goto del_ptr;
7780         }
7781         /*
7782          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7783          * to this block
7784          */
7785         if (eb == info->extent_root->node) {
7786                 ret = -ENOENT;
7787                 goto out;
7788         } else {
7789                 level++;
7790                 btrfs_release_path(&path);
7791                 goto again;
7792         }
7793
7794 del_ptr:
7795         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7796         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7797
7798 out:
7799         btrfs_release_path(&path);
7800         return ret;
7801 }
7802
7803 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7804 {
7805         struct btrfs_trans_handle *trans = NULL;
7806         struct cache_extent *cache;
7807         struct btrfs_corrupt_block *corrupt;
7808
7809         while (1) {
7810                 cache = search_cache_extent(info->corrupt_blocks, 0);
7811                 if (!cache)
7812                         break;
7813                 if (!trans) {
7814                         trans = btrfs_start_transaction(info->extent_root, 1);
7815                         if (IS_ERR(trans))
7816                                 return PTR_ERR(trans);
7817                 }
7818                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7819                 prune_one_block(trans, info, corrupt);
7820                 remove_cache_extent(info->corrupt_blocks, cache);
7821         }
7822         if (trans)
7823                 return btrfs_commit_transaction(trans, info->extent_root);
7824         return 0;
7825 }
7826
7827 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7828 {
7829         struct btrfs_block_group_cache *cache;
7830         u64 start, end;
7831         int ret;
7832
7833         while (1) {
7834                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7835                                             &start, &end, EXTENT_DIRTY);
7836                 if (ret)
7837                         break;
7838                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7839                                    GFP_NOFS);
7840         }
7841
7842         start = 0;
7843         while (1) {
7844                 cache = btrfs_lookup_first_block_group(fs_info, start);
7845                 if (!cache)
7846                         break;
7847                 if (cache->cached)
7848                         cache->cached = 0;
7849                 start = cache->key.objectid + cache->key.offset;
7850         }
7851 }
7852
7853 static int check_extent_refs(struct btrfs_root *root,
7854                              struct cache_tree *extent_cache)
7855 {
7856         struct extent_record *rec;
7857         struct cache_extent *cache;
7858         int err = 0;
7859         int ret = 0;
7860         int fixed = 0;
7861         int had_dups = 0;
7862         int recorded = 0;
7863
7864         if (repair) {
7865                 /*
7866                  * if we're doing a repair, we have to make sure
7867                  * we don't allocate from the problem extents.
7868                  * In the worst case, this will be all the
7869                  * extents in the FS
7870                  */
7871                 cache = search_cache_extent(extent_cache, 0);
7872                 while(cache) {
7873                         rec = container_of(cache, struct extent_record, cache);
7874                         set_extent_dirty(root->fs_info->excluded_extents,
7875                                          rec->start,
7876                                          rec->start + rec->max_size - 1,
7877                                          GFP_NOFS);
7878                         cache = next_cache_extent(cache);
7879                 }
7880
7881                 /* pin down all the corrupted blocks too */
7882                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7883                 while(cache) {
7884                         set_extent_dirty(root->fs_info->excluded_extents,
7885                                          cache->start,
7886                                          cache->start + cache->size - 1,
7887                                          GFP_NOFS);
7888                         cache = next_cache_extent(cache);
7889                 }
7890                 prune_corrupt_blocks(root->fs_info);
7891                 reset_cached_block_groups(root->fs_info);
7892         }
7893
7894         reset_cached_block_groups(root->fs_info);
7895
7896         /*
7897          * We need to delete any duplicate entries we find first otherwise we
7898          * could mess up the extent tree when we have backrefs that actually
7899          * belong to a different extent item and not the weird duplicate one.
7900          */
7901         while (repair && !list_empty(&duplicate_extents)) {
7902                 rec = to_extent_record(duplicate_extents.next);
7903                 list_del_init(&rec->list);
7904
7905                 /* Sometimes we can find a backref before we find an actual
7906                  * extent, so we need to process it a little bit to see if there
7907                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7908                  * if this is a backref screwup.  If we need to delete stuff
7909                  * process_duplicates() will return 0, otherwise it will return
7910                  * 1 and we
7911                  */
7912                 if (process_duplicates(root, extent_cache, rec))
7913                         continue;
7914                 ret = delete_duplicate_records(root, rec);
7915                 if (ret < 0)
7916                         return ret;
7917                 /*
7918                  * delete_duplicate_records will return the number of entries
7919                  * deleted, so if it's greater than 0 then we know we actually
7920                  * did something and we need to remove.
7921                  */
7922                 if (ret)
7923                         had_dups = 1;
7924         }
7925
7926         if (had_dups)
7927                 return -EAGAIN;
7928
7929         while(1) {
7930                 int cur_err = 0;
7931
7932                 fixed = 0;
7933                 recorded = 0;
7934                 cache = search_cache_extent(extent_cache, 0);
7935                 if (!cache)
7936                         break;
7937                 rec = container_of(cache, struct extent_record, cache);
7938                 if (rec->num_duplicates) {
7939                         fprintf(stderr, "extent item %llu has multiple extent "
7940                                 "items\n", (unsigned long long)rec->start);
7941                         err = 1;
7942                         cur_err = 1;
7943                 }
7944
7945                 if (rec->refs != rec->extent_item_refs) {
7946                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7947                                 (unsigned long long)rec->start,
7948                                 (unsigned long long)rec->nr);
7949                         fprintf(stderr, "extent item %llu, found %llu\n",
7950                                 (unsigned long long)rec->extent_item_refs,
7951                                 (unsigned long long)rec->refs);
7952                         ret = record_orphan_data_extents(root->fs_info, rec);
7953                         if (ret < 0)
7954                                 goto repair_abort;
7955                         if (ret == 0) {
7956                                 recorded = 1;
7957                         } else {
7958                                 /*
7959                                  * we can't use the extent to repair file
7960                                  * extent, let the fallback method handle it.
7961                                  */
7962                                 if (!fixed && repair) {
7963                                         ret = fixup_extent_refs(
7964                                                         root->fs_info,
7965                                                         extent_cache, rec);
7966                                         if (ret)
7967                                                 goto repair_abort;
7968                                         fixed = 1;
7969                                 }
7970                         }
7971                         err = 1;
7972                         cur_err = 1;
7973                 }
7974                 if (all_backpointers_checked(rec, 1)) {
7975                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7976                                 (unsigned long long)rec->start,
7977                                 (unsigned long long)rec->nr);
7978
7979                         if (!fixed && !recorded && repair) {
7980                                 ret = fixup_extent_refs(root->fs_info,
7981                                                         extent_cache, rec);
7982                                 if (ret)
7983                                         goto repair_abort;
7984                                 fixed = 1;
7985                         }
7986                         cur_err = 1;
7987                         err = 1;
7988                 }
7989                 if (!rec->owner_ref_checked) {
7990                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7991                                 (unsigned long long)rec->start,
7992                                 (unsigned long long)rec->nr);
7993                         if (!fixed && !recorded && repair) {
7994                                 ret = fixup_extent_refs(root->fs_info,
7995                                                         extent_cache, rec);
7996                                 if (ret)
7997                                         goto repair_abort;
7998                                 fixed = 1;
7999                         }
8000                         err = 1;
8001                         cur_err = 1;
8002                 }
8003                 if (rec->bad_full_backref) {
8004                         fprintf(stderr, "bad full backref, on [%llu]\n",
8005                                 (unsigned long long)rec->start);
8006                         if (repair) {
8007                                 ret = fixup_extent_flags(root->fs_info, rec);
8008                                 if (ret)
8009                                         goto repair_abort;
8010                                 fixed = 1;
8011                         }
8012                         err = 1;
8013                         cur_err = 1;
8014                 }
8015                 /*
8016                  * Although it's not a extent ref's problem, we reuse this
8017                  * routine for error reporting.
8018                  * No repair function yet.
8019                  */
8020                 if (rec->crossing_stripes) {
8021                         fprintf(stderr,
8022                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8023                                 rec->start, rec->start + rec->max_size);
8024                         err = 1;
8025                         cur_err = 1;
8026                 }
8027
8028                 if (rec->wrong_chunk_type) {
8029                         fprintf(stderr,
8030                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8031                                 rec->start, rec->start + rec->max_size);
8032                         err = 1;
8033                         cur_err = 1;
8034                 }
8035
8036                 remove_cache_extent(extent_cache, cache);
8037                 free_all_extent_backrefs(rec);
8038                 if (!init_extent_tree && repair && (!cur_err || fixed))
8039                         clear_extent_dirty(root->fs_info->excluded_extents,
8040                                            rec->start,
8041                                            rec->start + rec->max_size - 1,
8042                                            GFP_NOFS);
8043                 free(rec);
8044         }
8045 repair_abort:
8046         if (repair) {
8047                 if (ret && ret != -EAGAIN) {
8048                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8049                         exit(1);
8050                 } else if (!ret) {
8051                         struct btrfs_trans_handle *trans;
8052
8053                         root = root->fs_info->extent_root;
8054                         trans = btrfs_start_transaction(root, 1);
8055                         if (IS_ERR(trans)) {
8056                                 ret = PTR_ERR(trans);
8057                                 goto repair_abort;
8058                         }
8059
8060                         btrfs_fix_block_accounting(trans, root);
8061                         ret = btrfs_commit_transaction(trans, root);
8062                         if (ret)
8063                                 goto repair_abort;
8064                 }
8065                 if (err)
8066                         fprintf(stderr, "repaired damaged extent references\n");
8067                 return ret;
8068         }
8069         return err;
8070 }
8071
8072 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8073 {
8074         u64 stripe_size;
8075
8076         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8077                 stripe_size = length;
8078                 stripe_size /= num_stripes;
8079         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8080                 stripe_size = length * 2;
8081                 stripe_size /= num_stripes;
8082         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8083                 stripe_size = length;
8084                 stripe_size /= (num_stripes - 1);
8085         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8086                 stripe_size = length;
8087                 stripe_size /= (num_stripes - 2);
8088         } else {
8089                 stripe_size = length;
8090         }
8091         return stripe_size;
8092 }
8093
8094 /*
8095  * Check the chunk with its block group/dev list ref:
8096  * Return 0 if all refs seems valid.
8097  * Return 1 if part of refs seems valid, need later check for rebuild ref
8098  * like missing block group and needs to search extent tree to rebuild them.
8099  * Return -1 if essential refs are missing and unable to rebuild.
8100  */
8101 static int check_chunk_refs(struct chunk_record *chunk_rec,
8102                             struct block_group_tree *block_group_cache,
8103                             struct device_extent_tree *dev_extent_cache,
8104                             int silent)
8105 {
8106         struct cache_extent *block_group_item;
8107         struct block_group_record *block_group_rec;
8108         struct cache_extent *dev_extent_item;
8109         struct device_extent_record *dev_extent_rec;
8110         u64 devid;
8111         u64 offset;
8112         u64 length;
8113         int metadump_v2 = 0;
8114         int i;
8115         int ret = 0;
8116
8117         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8118                                                chunk_rec->offset,
8119                                                chunk_rec->length);
8120         if (block_group_item) {
8121                 block_group_rec = container_of(block_group_item,
8122                                                struct block_group_record,
8123                                                cache);
8124                 if (chunk_rec->length != block_group_rec->offset ||
8125                     chunk_rec->offset != block_group_rec->objectid ||
8126                     (!metadump_v2 &&
8127                      chunk_rec->type_flags != block_group_rec->flags)) {
8128                         if (!silent)
8129                                 fprintf(stderr,
8130                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8131                                         chunk_rec->objectid,
8132                                         chunk_rec->type,
8133                                         chunk_rec->offset,
8134                                         chunk_rec->length,
8135                                         chunk_rec->offset,
8136                                         chunk_rec->type_flags,
8137                                         block_group_rec->objectid,
8138                                         block_group_rec->type,
8139                                         block_group_rec->offset,
8140                                         block_group_rec->offset,
8141                                         block_group_rec->objectid,
8142                                         block_group_rec->flags);
8143                         ret = -1;
8144                 } else {
8145                         list_del_init(&block_group_rec->list);
8146                         chunk_rec->bg_rec = block_group_rec;
8147                 }
8148         } else {
8149                 if (!silent)
8150                         fprintf(stderr,
8151                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8152                                 chunk_rec->objectid,
8153                                 chunk_rec->type,
8154                                 chunk_rec->offset,
8155                                 chunk_rec->length,
8156                                 chunk_rec->offset,
8157                                 chunk_rec->type_flags);
8158                 ret = 1;
8159         }
8160
8161         if (metadump_v2)
8162                 return ret;
8163
8164         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8165                                     chunk_rec->num_stripes);
8166         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8167                 devid = chunk_rec->stripes[i].devid;
8168                 offset = chunk_rec->stripes[i].offset;
8169                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8170                                                        devid, offset, length);
8171                 if (dev_extent_item) {
8172                         dev_extent_rec = container_of(dev_extent_item,
8173                                                 struct device_extent_record,
8174                                                 cache);
8175                         if (dev_extent_rec->objectid != devid ||
8176                             dev_extent_rec->offset != offset ||
8177                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8178                             dev_extent_rec->length != length) {
8179                                 if (!silent)
8180                                         fprintf(stderr,
8181                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8182                                                 chunk_rec->objectid,
8183                                                 chunk_rec->type,
8184                                                 chunk_rec->offset,
8185                                                 chunk_rec->stripes[i].devid,
8186                                                 chunk_rec->stripes[i].offset,
8187                                                 dev_extent_rec->objectid,
8188                                                 dev_extent_rec->offset,
8189                                                 dev_extent_rec->length);
8190                                 ret = -1;
8191                         } else {
8192                                 list_move(&dev_extent_rec->chunk_list,
8193                                           &chunk_rec->dextents);
8194                         }
8195                 } else {
8196                         if (!silent)
8197                                 fprintf(stderr,
8198                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8199                                         chunk_rec->objectid,
8200                                         chunk_rec->type,
8201                                         chunk_rec->offset,
8202                                         chunk_rec->stripes[i].devid,
8203                                         chunk_rec->stripes[i].offset);
8204                         ret = -1;
8205                 }
8206         }
8207         return ret;
8208 }
8209
8210 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8211 int check_chunks(struct cache_tree *chunk_cache,
8212                  struct block_group_tree *block_group_cache,
8213                  struct device_extent_tree *dev_extent_cache,
8214                  struct list_head *good, struct list_head *bad,
8215                  struct list_head *rebuild, int silent)
8216 {
8217         struct cache_extent *chunk_item;
8218         struct chunk_record *chunk_rec;
8219         struct block_group_record *bg_rec;
8220         struct device_extent_record *dext_rec;
8221         int err;
8222         int ret = 0;
8223
8224         chunk_item = first_cache_extent(chunk_cache);
8225         while (chunk_item) {
8226                 chunk_rec = container_of(chunk_item, struct chunk_record,
8227                                          cache);
8228                 err = check_chunk_refs(chunk_rec, block_group_cache,
8229                                        dev_extent_cache, silent);
8230                 if (err < 0)
8231                         ret = err;
8232                 if (err == 0 && good)
8233                         list_add_tail(&chunk_rec->list, good);
8234                 if (err > 0 && rebuild)
8235                         list_add_tail(&chunk_rec->list, rebuild);
8236                 if (err < 0 && bad)
8237                         list_add_tail(&chunk_rec->list, bad);
8238                 chunk_item = next_cache_extent(chunk_item);
8239         }
8240
8241         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8242                 if (!silent)
8243                         fprintf(stderr,
8244                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8245                                 bg_rec->objectid,
8246                                 bg_rec->offset,
8247                                 bg_rec->flags);
8248                 if (!ret)
8249                         ret = 1;
8250         }
8251
8252         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8253                             chunk_list) {
8254                 if (!silent)
8255                         fprintf(stderr,
8256                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8257                                 dext_rec->objectid,
8258                                 dext_rec->offset,
8259                                 dext_rec->length);
8260                 if (!ret)
8261                         ret = 1;
8262         }
8263         return ret;
8264 }
8265
8266
8267 static int check_device_used(struct device_record *dev_rec,
8268                              struct device_extent_tree *dext_cache)
8269 {
8270         struct cache_extent *cache;
8271         struct device_extent_record *dev_extent_rec;
8272         u64 total_byte = 0;
8273
8274         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8275         while (cache) {
8276                 dev_extent_rec = container_of(cache,
8277                                               struct device_extent_record,
8278                                               cache);
8279                 if (dev_extent_rec->objectid != dev_rec->devid)
8280                         break;
8281
8282                 list_del_init(&dev_extent_rec->device_list);
8283                 total_byte += dev_extent_rec->length;
8284                 cache = next_cache_extent(cache);
8285         }
8286
8287         if (total_byte != dev_rec->byte_used) {
8288                 fprintf(stderr,
8289                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8290                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8291                         dev_rec->type, dev_rec->offset);
8292                 return -1;
8293         } else {
8294                 return 0;
8295         }
8296 }
8297
8298 /* check btrfs_dev_item -> btrfs_dev_extent */
8299 static int check_devices(struct rb_root *dev_cache,
8300                          struct device_extent_tree *dev_extent_cache)
8301 {
8302         struct rb_node *dev_node;
8303         struct device_record *dev_rec;
8304         struct device_extent_record *dext_rec;
8305         int err;
8306         int ret = 0;
8307
8308         dev_node = rb_first(dev_cache);
8309         while (dev_node) {
8310                 dev_rec = container_of(dev_node, struct device_record, node);
8311                 err = check_device_used(dev_rec, dev_extent_cache);
8312                 if (err)
8313                         ret = err;
8314
8315                 dev_node = rb_next(dev_node);
8316         }
8317         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8318                             device_list) {
8319                 fprintf(stderr,
8320                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8321                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8322                 if (!ret)
8323                         ret = 1;
8324         }
8325         return ret;
8326 }
8327
8328 static int add_root_item_to_list(struct list_head *head,
8329                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8330                                   u8 level, u8 drop_level,
8331                                   int level_size, struct btrfs_key *drop_key)
8332 {
8333
8334         struct root_item_record *ri_rec;
8335         ri_rec = malloc(sizeof(*ri_rec));
8336         if (!ri_rec)
8337                 return -ENOMEM;
8338         ri_rec->bytenr = bytenr;
8339         ri_rec->objectid = objectid;
8340         ri_rec->level = level;
8341         ri_rec->level_size = level_size;
8342         ri_rec->drop_level = drop_level;
8343         ri_rec->last_snapshot = last_snapshot;
8344         if (drop_key)
8345                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8346         list_add_tail(&ri_rec->list, head);
8347
8348         return 0;
8349 }
8350
8351 static void free_root_item_list(struct list_head *list)
8352 {
8353         struct root_item_record *ri_rec;
8354
8355         while (!list_empty(list)) {
8356                 ri_rec = list_first_entry(list, struct root_item_record,
8357                                           list);
8358                 list_del_init(&ri_rec->list);
8359                 free(ri_rec);
8360         }
8361 }
8362
8363 static int deal_root_from_list(struct list_head *list,
8364                                struct btrfs_root *root,
8365                                struct block_info *bits,
8366                                int bits_nr,
8367                                struct cache_tree *pending,
8368                                struct cache_tree *seen,
8369                                struct cache_tree *reada,
8370                                struct cache_tree *nodes,
8371                                struct cache_tree *extent_cache,
8372                                struct cache_tree *chunk_cache,
8373                                struct rb_root *dev_cache,
8374                                struct block_group_tree *block_group_cache,
8375                                struct device_extent_tree *dev_extent_cache)
8376 {
8377         int ret = 0;
8378         u64 last;
8379
8380         while (!list_empty(list)) {
8381                 struct root_item_record *rec;
8382                 struct extent_buffer *buf;
8383                 rec = list_entry(list->next,
8384                                  struct root_item_record, list);
8385                 last = 0;
8386                 buf = read_tree_block(root->fs_info->tree_root,
8387                                       rec->bytenr, rec->level_size, 0);
8388                 if (!extent_buffer_uptodate(buf)) {
8389                         free_extent_buffer(buf);
8390                         ret = -EIO;
8391                         break;
8392                 }
8393                 ret = add_root_to_pending(buf, extent_cache, pending,
8394                                     seen, nodes, rec->objectid);
8395                 if (ret < 0)
8396                         break;
8397                 /*
8398                  * To rebuild extent tree, we need deal with snapshot
8399                  * one by one, otherwise we deal with node firstly which
8400                  * can maximize readahead.
8401                  */
8402                 while (1) {
8403                         ret = run_next_block(root, bits, bits_nr, &last,
8404                                              pending, seen, reada, nodes,
8405                                              extent_cache, chunk_cache,
8406                                              dev_cache, block_group_cache,
8407                                              dev_extent_cache, rec);
8408                         if (ret != 0)
8409                                 break;
8410                 }
8411                 free_extent_buffer(buf);
8412                 list_del(&rec->list);
8413                 free(rec);
8414                 if (ret < 0)
8415                         break;
8416         }
8417         while (ret >= 0) {
8418                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8419                                      reada, nodes, extent_cache, chunk_cache,
8420                                      dev_cache, block_group_cache,
8421                                      dev_extent_cache, NULL);
8422                 if (ret != 0) {
8423                         if (ret > 0)
8424                                 ret = 0;
8425                         break;
8426                 }
8427         }
8428         return ret;
8429 }
8430
8431 static int check_chunks_and_extents(struct btrfs_root *root)
8432 {
8433         struct rb_root dev_cache;
8434         struct cache_tree chunk_cache;
8435         struct block_group_tree block_group_cache;
8436         struct device_extent_tree dev_extent_cache;
8437         struct cache_tree extent_cache;
8438         struct cache_tree seen;
8439         struct cache_tree pending;
8440         struct cache_tree reada;
8441         struct cache_tree nodes;
8442         struct extent_io_tree excluded_extents;
8443         struct cache_tree corrupt_blocks;
8444         struct btrfs_path path;
8445         struct btrfs_key key;
8446         struct btrfs_key found_key;
8447         int ret, err = 0;
8448         struct block_info *bits;
8449         int bits_nr;
8450         struct extent_buffer *leaf;
8451         int slot;
8452         struct btrfs_root_item ri;
8453         struct list_head dropping_trees;
8454         struct list_head normal_trees;
8455         struct btrfs_root *root1;
8456         u64 objectid;
8457         u32 level_size;
8458         u8 level;
8459
8460         dev_cache = RB_ROOT;
8461         cache_tree_init(&chunk_cache);
8462         block_group_tree_init(&block_group_cache);
8463         device_extent_tree_init(&dev_extent_cache);
8464
8465         cache_tree_init(&extent_cache);
8466         cache_tree_init(&seen);
8467         cache_tree_init(&pending);
8468         cache_tree_init(&nodes);
8469         cache_tree_init(&reada);
8470         cache_tree_init(&corrupt_blocks);
8471         extent_io_tree_init(&excluded_extents);
8472         INIT_LIST_HEAD(&dropping_trees);
8473         INIT_LIST_HEAD(&normal_trees);
8474
8475         if (repair) {
8476                 root->fs_info->excluded_extents = &excluded_extents;
8477                 root->fs_info->fsck_extent_cache = &extent_cache;
8478                 root->fs_info->free_extent_hook = free_extent_hook;
8479                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8480         }
8481
8482         bits_nr = 1024;
8483         bits = malloc(bits_nr * sizeof(struct block_info));
8484         if (!bits) {
8485                 perror("malloc");
8486                 exit(1);
8487         }
8488
8489         if (ctx.progress_enabled) {
8490                 ctx.tp = TASK_EXTENTS;
8491                 task_start(ctx.info);
8492         }
8493
8494 again:
8495         root1 = root->fs_info->tree_root;
8496         level = btrfs_header_level(root1->node);
8497         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8498                                     root1->node->start, 0, level, 0,
8499                                     root1->nodesize, NULL);
8500         if (ret < 0)
8501                 goto out;
8502         root1 = root->fs_info->chunk_root;
8503         level = btrfs_header_level(root1->node);
8504         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8505                                     root1->node->start, 0, level, 0,
8506                                     root1->nodesize, NULL);
8507         if (ret < 0)
8508                 goto out;
8509         btrfs_init_path(&path);
8510         key.offset = 0;
8511         key.objectid = 0;
8512         key.type = BTRFS_ROOT_ITEM_KEY;
8513         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8514                                         &key, &path, 0, 0);
8515         if (ret < 0)
8516                 goto out;
8517         while(1) {
8518                 leaf = path.nodes[0];
8519                 slot = path.slots[0];
8520                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8521                         ret = btrfs_next_leaf(root, &path);
8522                         if (ret != 0)
8523                                 break;
8524                         leaf = path.nodes[0];
8525                         slot = path.slots[0];
8526                 }
8527                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8528                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8529                         unsigned long offset;
8530                         u64 last_snapshot;
8531
8532                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8533                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8534                         last_snapshot = btrfs_root_last_snapshot(&ri);
8535                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8536                                 level = btrfs_root_level(&ri);
8537                                 level_size = root->nodesize;
8538                                 ret = add_root_item_to_list(&normal_trees,
8539                                                 found_key.objectid,
8540                                                 btrfs_root_bytenr(&ri),
8541                                                 last_snapshot, level,
8542                                                 0, level_size, NULL);
8543                                 if (ret < 0)
8544                                         goto out;
8545                         } else {
8546                                 level = btrfs_root_level(&ri);
8547                                 level_size = root->nodesize;
8548                                 objectid = found_key.objectid;
8549                                 btrfs_disk_key_to_cpu(&found_key,
8550                                                       &ri.drop_progress);
8551                                 ret = add_root_item_to_list(&dropping_trees,
8552                                                 objectid,
8553                                                 btrfs_root_bytenr(&ri),
8554                                                 last_snapshot, level,
8555                                                 ri.drop_level,
8556                                                 level_size, &found_key);
8557                                 if (ret < 0)
8558                                         goto out;
8559                         }
8560                 }
8561                 path.slots[0]++;
8562         }
8563         btrfs_release_path(&path);
8564
8565         /*
8566          * check_block can return -EAGAIN if it fixes something, please keep
8567          * this in mind when dealing with return values from these functions, if
8568          * we get -EAGAIN we want to fall through and restart the loop.
8569          */
8570         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8571                                   &seen, &reada, &nodes, &extent_cache,
8572                                   &chunk_cache, &dev_cache, &block_group_cache,
8573                                   &dev_extent_cache);
8574         if (ret < 0) {
8575                 if (ret == -EAGAIN)
8576                         goto loop;
8577                 goto out;
8578         }
8579         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8580                                   &pending, &seen, &reada, &nodes,
8581                                   &extent_cache, &chunk_cache, &dev_cache,
8582                                   &block_group_cache, &dev_extent_cache);
8583         if (ret < 0) {
8584                 if (ret == -EAGAIN)
8585                         goto loop;
8586                 goto out;
8587         }
8588
8589         ret = check_chunks(&chunk_cache, &block_group_cache,
8590                            &dev_extent_cache, NULL, NULL, NULL, 0);
8591         if (ret) {
8592                 if (ret == -EAGAIN)
8593                         goto loop;
8594                 err = ret;
8595         }
8596
8597         ret = check_extent_refs(root, &extent_cache);
8598         if (ret < 0) {
8599                 if (ret == -EAGAIN)
8600                         goto loop;
8601                 goto out;
8602         }
8603
8604         ret = check_devices(&dev_cache, &dev_extent_cache);
8605         if (ret && err)
8606                 ret = err;
8607
8608 out:
8609         task_stop(ctx.info);
8610         if (repair) {
8611                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8612                 extent_io_tree_cleanup(&excluded_extents);
8613                 root->fs_info->fsck_extent_cache = NULL;
8614                 root->fs_info->free_extent_hook = NULL;
8615                 root->fs_info->corrupt_blocks = NULL;
8616                 root->fs_info->excluded_extents = NULL;
8617         }
8618         free(bits);
8619         free_chunk_cache_tree(&chunk_cache);
8620         free_device_cache_tree(&dev_cache);
8621         free_block_group_tree(&block_group_cache);
8622         free_device_extent_tree(&dev_extent_cache);
8623         free_extent_cache_tree(&seen);
8624         free_extent_cache_tree(&pending);
8625         free_extent_cache_tree(&reada);
8626         free_extent_cache_tree(&nodes);
8627         return ret;
8628 loop:
8629         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8630         free_extent_cache_tree(&seen);
8631         free_extent_cache_tree(&pending);
8632         free_extent_cache_tree(&reada);
8633         free_extent_cache_tree(&nodes);
8634         free_chunk_cache_tree(&chunk_cache);
8635         free_block_group_tree(&block_group_cache);
8636         free_device_cache_tree(&dev_cache);
8637         free_device_extent_tree(&dev_extent_cache);
8638         free_extent_record_cache(root->fs_info, &extent_cache);
8639         free_root_item_list(&normal_trees);
8640         free_root_item_list(&dropping_trees);
8641         extent_io_tree_cleanup(&excluded_extents);
8642         goto again;
8643 }
8644
8645 /*
8646  * Check backrefs of a tree block given by @bytenr or @eb.
8647  *
8648  * @root:       the root containing the @bytenr or @eb
8649  * @eb:         tree block extent buffer, can be NULL
8650  * @bytenr:     bytenr of the tree block to search
8651  * @level:      tree level of the tree block
8652  * @owner:      owner of the tree block
8653  *
8654  * Return >0 for any error found and output error message
8655  * Return 0 for no error found
8656  */
8657 static int check_tree_block_ref(struct btrfs_root *root,
8658                                 struct extent_buffer *eb, u64 bytenr,
8659                                 int level, u64 owner)
8660 {
8661         struct btrfs_key key;
8662         struct btrfs_root *extent_root = root->fs_info->extent_root;
8663         struct btrfs_path path;
8664         struct btrfs_extent_item *ei;
8665         struct btrfs_extent_inline_ref *iref;
8666         struct extent_buffer *leaf;
8667         unsigned long end;
8668         unsigned long ptr;
8669         int slot;
8670         int skinny_level;
8671         int type;
8672         u32 nodesize = root->nodesize;
8673         u32 item_size;
8674         u64 offset;
8675         int found_ref = 0;
8676         int err = 0;
8677         int ret;
8678
8679         btrfs_init_path(&path);
8680         key.objectid = bytenr;
8681         if (btrfs_fs_incompat(root->fs_info,
8682                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8683                 key.type = BTRFS_METADATA_ITEM_KEY;
8684         else
8685                 key.type = BTRFS_EXTENT_ITEM_KEY;
8686         key.offset = (u64)-1;
8687
8688         /* Search for the backref in extent tree */
8689         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8690         if (ret < 0) {
8691                 err |= BACKREF_MISSING;
8692                 goto out;
8693         }
8694         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8695         if (ret) {
8696                 err |= BACKREF_MISSING;
8697                 goto out;
8698         }
8699
8700         leaf = path.nodes[0];
8701         slot = path.slots[0];
8702         btrfs_item_key_to_cpu(leaf, &key, slot);
8703
8704         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8705
8706         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8707                 skinny_level = (int)key.offset;
8708                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8709         } else {
8710                 struct btrfs_tree_block_info *info;
8711
8712                 info = (struct btrfs_tree_block_info *)(ei + 1);
8713                 skinny_level = btrfs_tree_block_level(leaf, info);
8714                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8715         }
8716
8717         if (eb) {
8718                 u64 header_gen;
8719                 u64 extent_gen;
8720
8721                 if (!(btrfs_extent_flags(leaf, ei) &
8722                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8723                         error(
8724                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8725                                 key.objectid, nodesize,
8726                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8727                         err = BACKREF_MISMATCH;
8728                 }
8729                 header_gen = btrfs_header_generation(eb);
8730                 extent_gen = btrfs_extent_generation(leaf, ei);
8731                 if (header_gen != extent_gen) {
8732                         error(
8733         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8734                                 key.objectid, nodesize, header_gen,
8735                                 extent_gen);
8736                         err = BACKREF_MISMATCH;
8737                 }
8738                 if (level != skinny_level) {
8739                         error(
8740                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8741                                 key.objectid, nodesize, level, skinny_level);
8742                         err = BACKREF_MISMATCH;
8743                 }
8744                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8745                         error(
8746                         "extent[%llu %u] is referred by other roots than %llu",
8747                                 key.objectid, nodesize, root->objectid);
8748                         err = BACKREF_MISMATCH;
8749                 }
8750         }
8751
8752         /*
8753          * Iterate the extent/metadata item to find the exact backref
8754          */
8755         item_size = btrfs_item_size_nr(leaf, slot);
8756         ptr = (unsigned long)iref;
8757         end = (unsigned long)ei + item_size;
8758         while (ptr < end) {
8759                 iref = (struct btrfs_extent_inline_ref *)ptr;
8760                 type = btrfs_extent_inline_ref_type(leaf, iref);
8761                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8762
8763                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8764                         (offset == root->objectid || offset == owner)) {
8765                         found_ref = 1;
8766                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8767                         /* Check if the backref points to valid referencer */
8768                         found_ref = !check_tree_block_ref(root, NULL, offset,
8769                                                           level + 1, owner);
8770                 }
8771
8772                 if (found_ref)
8773                         break;
8774                 ptr += btrfs_extent_inline_ref_size(type);
8775         }
8776
8777         /*
8778          * Inlined extent item doesn't have what we need, check
8779          * TREE_BLOCK_REF_KEY
8780          */
8781         if (!found_ref) {
8782                 btrfs_release_path(&path);
8783                 key.objectid = bytenr;
8784                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8785                 key.offset = root->objectid;
8786
8787                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8788                 if (!ret)
8789                         found_ref = 1;
8790         }
8791         if (!found_ref)
8792                 err |= BACKREF_MISSING;
8793 out:
8794         btrfs_release_path(&path);
8795         if (eb && (err & BACKREF_MISSING))
8796                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8797                         bytenr, nodesize, owner, level);
8798         return err;
8799 }
8800
8801 /*
8802  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8803  *
8804  * Return >0 any error found and output error message
8805  * Return 0 for no error found
8806  */
8807 static int check_extent_data_item(struct btrfs_root *root,
8808                                   struct extent_buffer *eb, int slot)
8809 {
8810         struct btrfs_file_extent_item *fi;
8811         struct btrfs_path path;
8812         struct btrfs_root *extent_root = root->fs_info->extent_root;
8813         struct btrfs_key fi_key;
8814         struct btrfs_key dbref_key;
8815         struct extent_buffer *leaf;
8816         struct btrfs_extent_item *ei;
8817         struct btrfs_extent_inline_ref *iref;
8818         struct btrfs_extent_data_ref *dref;
8819         u64 owner;
8820         u64 file_extent_gen;
8821         u64 disk_bytenr;
8822         u64 disk_num_bytes;
8823         u64 extent_num_bytes;
8824         u64 extent_flags;
8825         u64 extent_gen;
8826         u32 item_size;
8827         unsigned long end;
8828         unsigned long ptr;
8829         int type;
8830         u64 ref_root;
8831         int found_dbackref = 0;
8832         int err = 0;
8833         int ret;
8834
8835         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8836         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8837         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8838
8839         /* Nothing to check for hole and inline data extents */
8840         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8841             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8842                 return 0;
8843
8844         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8845         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8846         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8847
8848         /* Check unaligned disk_num_bytes and num_bytes */
8849         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8850                 error(
8851 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8852                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8853                         root->sectorsize);
8854                 err |= BYTES_UNALIGNED;
8855         } else {
8856                 data_bytes_allocated += disk_num_bytes;
8857         }
8858         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8859                 error(
8860 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8861                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8862                         root->sectorsize);
8863                 err |= BYTES_UNALIGNED;
8864         } else {
8865                 data_bytes_referenced += extent_num_bytes;
8866         }
8867         owner = btrfs_header_owner(eb);
8868
8869         /* Check the extent item of the file extent in extent tree */
8870         btrfs_init_path(&path);
8871         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8872         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8873         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8874
8875         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8876         if (ret) {
8877                 err |= BACKREF_MISSING;
8878                 goto error;
8879         }
8880
8881         leaf = path.nodes[0];
8882         slot = path.slots[0];
8883         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8884
8885         extent_flags = btrfs_extent_flags(leaf, ei);
8886         extent_gen = btrfs_extent_generation(leaf, ei);
8887
8888         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8889                 error(
8890                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8891                     disk_bytenr, disk_num_bytes,
8892                     BTRFS_EXTENT_FLAG_DATA);
8893                 err |= BACKREF_MISMATCH;
8894         }
8895
8896         if (file_extent_gen < extent_gen) {
8897                 error(
8898 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8899                         disk_bytenr, disk_num_bytes, file_extent_gen,
8900                         extent_gen);
8901                 err |= BACKREF_MISMATCH;
8902         }
8903
8904         /* Check data backref inside that extent item */
8905         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8906         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8907         ptr = (unsigned long)iref;
8908         end = (unsigned long)ei + item_size;
8909         while (ptr < end) {
8910                 iref = (struct btrfs_extent_inline_ref *)ptr;
8911                 type = btrfs_extent_inline_ref_type(leaf, iref);
8912                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8913
8914                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8915                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8916                         if (ref_root == owner || ref_root == root->objectid)
8917                                 found_dbackref = 1;
8918                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8919                         found_dbackref = !check_tree_block_ref(root, NULL,
8920                                 btrfs_extent_inline_ref_offset(leaf, iref),
8921                                 0, owner);
8922                 }
8923
8924                 if (found_dbackref)
8925                         break;
8926                 ptr += btrfs_extent_inline_ref_size(type);
8927         }
8928
8929         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8930         if (!found_dbackref) {
8931                 btrfs_release_path(&path);
8932
8933                 btrfs_init_path(&path);
8934                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8935                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8936                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8937                                 fi_key.objectid, fi_key.offset);
8938
8939                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8940                                         &dbref_key, &path, 0, 0);
8941                 if (!ret)
8942                         found_dbackref = 1;
8943         }
8944
8945         if (!found_dbackref)
8946                 err |= BACKREF_MISSING;
8947 error:
8948         btrfs_release_path(&path);
8949         if (err & BACKREF_MISSING) {
8950                 error("data extent[%llu %llu] backref lost",
8951                       disk_bytenr, disk_num_bytes);
8952         }
8953         return err;
8954 }
8955
8956 /*
8957  * Get real tree block level for the case like shared block
8958  * Return >= 0 as tree level
8959  * Return <0 for error
8960  */
8961 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8962 {
8963         struct extent_buffer *eb;
8964         struct btrfs_path path;
8965         struct btrfs_key key;
8966         struct btrfs_extent_item *ei;
8967         u64 flags;
8968         u64 transid;
8969         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8970         u8 backref_level;
8971         u8 header_level;
8972         int ret;
8973
8974         /* Search extent tree for extent generation and level */
8975         key.objectid = bytenr;
8976         key.type = BTRFS_METADATA_ITEM_KEY;
8977         key.offset = (u64)-1;
8978
8979         btrfs_init_path(&path);
8980         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8981         if (ret < 0)
8982                 goto release_out;
8983         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8984         if (ret < 0)
8985                 goto release_out;
8986         if (ret > 0) {
8987                 ret = -ENOENT;
8988                 goto release_out;
8989         }
8990
8991         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8992         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8993                             struct btrfs_extent_item);
8994         flags = btrfs_extent_flags(path.nodes[0], ei);
8995         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8996                 ret = -ENOENT;
8997                 goto release_out;
8998         }
8999
9000         /* Get transid for later read_tree_block() check */
9001         transid = btrfs_extent_generation(path.nodes[0], ei);
9002
9003         /* Get backref level as one source */
9004         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9005                 backref_level = key.offset;
9006         } else {
9007                 struct btrfs_tree_block_info *info;
9008
9009                 info = (struct btrfs_tree_block_info *)(ei + 1);
9010                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9011         }
9012         btrfs_release_path(&path);
9013
9014         /* Get level from tree block as an alternative source */
9015         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9016         if (!extent_buffer_uptodate(eb)) {
9017                 free_extent_buffer(eb);
9018                 return -EIO;
9019         }
9020         header_level = btrfs_header_level(eb);
9021         free_extent_buffer(eb);
9022
9023         if (header_level != backref_level)
9024                 return -EIO;
9025         return header_level;
9026
9027 release_out:
9028         btrfs_release_path(&path);
9029         return ret;
9030 }
9031
9032 /*
9033  * Check if a tree block backref is valid (points to a valid tree block)
9034  * if level == -1, level will be resolved
9035  * Return >0 for any error found and print error message
9036  */
9037 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9038                                     u64 bytenr, int level)
9039 {
9040         struct btrfs_root *root;
9041         struct btrfs_key key;
9042         struct btrfs_path path;
9043         struct extent_buffer *eb;
9044         struct extent_buffer *node;
9045         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9046         int err = 0;
9047         int ret;
9048
9049         /* Query level for level == -1 special case */
9050         if (level == -1)
9051                 level = query_tree_block_level(fs_info, bytenr);
9052         if (level < 0) {
9053                 err |= REFERENCER_MISSING;
9054                 goto out;
9055         }
9056
9057         key.objectid = root_id;
9058         key.type = BTRFS_ROOT_ITEM_KEY;
9059         key.offset = (u64)-1;
9060
9061         root = btrfs_read_fs_root(fs_info, &key);
9062         if (IS_ERR(root)) {
9063                 err |= REFERENCER_MISSING;
9064                 goto out;
9065         }
9066
9067         /* Read out the tree block to get item/node key */
9068         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9069         if (!extent_buffer_uptodate(eb)) {
9070                 err |= REFERENCER_MISSING;
9071                 free_extent_buffer(eb);
9072                 goto out;
9073         }
9074
9075         /* Empty tree, no need to check key */
9076         if (!btrfs_header_nritems(eb) && !level) {
9077                 free_extent_buffer(eb);
9078                 goto out;
9079         }
9080
9081         if (level)
9082                 btrfs_node_key_to_cpu(eb, &key, 0);
9083         else
9084                 btrfs_item_key_to_cpu(eb, &key, 0);
9085
9086         free_extent_buffer(eb);
9087
9088         btrfs_init_path(&path);
9089         path.lowest_level = level;
9090         /* Search with the first key, to ensure we can reach it */
9091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9092         if (ret < 0) {
9093                 err |= REFERENCER_MISSING;
9094                 goto release_out;
9095         }
9096
9097         node = path.nodes[level];
9098         if (btrfs_header_bytenr(node) != bytenr) {
9099                 error(
9100         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9101                         bytenr, nodesize, bytenr,
9102                         btrfs_header_bytenr(node));
9103                 err |= REFERENCER_MISMATCH;
9104         }
9105         if (btrfs_header_level(node) != level) {
9106                 error(
9107         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9108                         bytenr, nodesize, level,
9109                         btrfs_header_level(node));
9110                 err |= REFERENCER_MISMATCH;
9111         }
9112
9113 release_out:
9114         btrfs_release_path(&path);
9115 out:
9116         if (err & REFERENCER_MISSING) {
9117                 if (level < 0)
9118                         error("extent [%llu %d] lost referencer (owner: %llu)",
9119                                 bytenr, nodesize, root_id);
9120                 else
9121                         error(
9122                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9123                                 bytenr, nodesize, root_id, level);
9124         }
9125
9126         return err;
9127 }
9128
9129 /*
9130  * Check referencer for shared block backref
9131  * If level == -1, this function will resolve the level.
9132  */
9133 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9134                                      u64 parent, u64 bytenr, int level)
9135 {
9136         struct extent_buffer *eb;
9137         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9138         u32 nr;
9139         int found_parent = 0;
9140         int i;
9141
9142         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9143         if (!extent_buffer_uptodate(eb))
9144                 goto out;
9145
9146         if (level == -1)
9147                 level = query_tree_block_level(fs_info, bytenr);
9148         if (level < 0)
9149                 goto out;
9150
9151         if (level + 1 != btrfs_header_level(eb))
9152                 goto out;
9153
9154         nr = btrfs_header_nritems(eb);
9155         for (i = 0; i < nr; i++) {
9156                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9157                         found_parent = 1;
9158                         break;
9159                 }
9160         }
9161 out:
9162         free_extent_buffer(eb);
9163         if (!found_parent) {
9164                 error(
9165         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9166                         bytenr, nodesize, parent, level);
9167                 return REFERENCER_MISSING;
9168         }
9169         return 0;
9170 }
9171
9172 /*
9173  * Check referencer for normal (inlined) data ref
9174  * If len == 0, it will be resolved by searching in extent tree
9175  */
9176 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9177                                      u64 root_id, u64 objectid, u64 offset,
9178                                      u64 bytenr, u64 len, u32 count)
9179 {
9180         struct btrfs_root *root;
9181         struct btrfs_root *extent_root = fs_info->extent_root;
9182         struct btrfs_key key;
9183         struct btrfs_path path;
9184         struct extent_buffer *leaf;
9185         struct btrfs_file_extent_item *fi;
9186         u32 found_count = 0;
9187         int slot;
9188         int ret = 0;
9189
9190         if (!len) {
9191                 key.objectid = bytenr;
9192                 key.type = BTRFS_EXTENT_ITEM_KEY;
9193                 key.offset = (u64)-1;
9194
9195                 btrfs_init_path(&path);
9196                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9197                 if (ret < 0)
9198                         goto out;
9199                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9200                 if (ret)
9201                         goto out;
9202                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9203                 if (key.objectid != bytenr ||
9204                     key.type != BTRFS_EXTENT_ITEM_KEY)
9205                         goto out;
9206                 len = key.offset;
9207                 btrfs_release_path(&path);
9208         }
9209         key.objectid = root_id;
9210         key.type = BTRFS_ROOT_ITEM_KEY;
9211         key.offset = (u64)-1;
9212         btrfs_init_path(&path);
9213
9214         root = btrfs_read_fs_root(fs_info, &key);
9215         if (IS_ERR(root))
9216                 goto out;
9217
9218         key.objectid = objectid;
9219         key.type = BTRFS_EXTENT_DATA_KEY;
9220         /*
9221          * It can be nasty as data backref offset is
9222          * file offset - file extent offset, which is smaller or
9223          * equal to original backref offset.  The only special case is
9224          * overflow.  So we need to special check and do further search.
9225          */
9226         key.offset = offset & (1ULL << 63) ? 0 : offset;
9227
9228         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9229         if (ret < 0)
9230                 goto out;
9231
9232         /*
9233          * Search afterwards to get correct one
9234          * NOTE: As we must do a comprehensive check on the data backref to
9235          * make sure the dref count also matches, we must iterate all file
9236          * extents for that inode.
9237          */
9238         while (1) {
9239                 leaf = path.nodes[0];
9240                 slot = path.slots[0];
9241
9242                 btrfs_item_key_to_cpu(leaf, &key, slot);
9243                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9244                         break;
9245                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9246                 /*
9247                  * Except normal disk bytenr and disk num bytes, we still
9248                  * need to do extra check on dbackref offset as
9249                  * dbackref offset = file_offset - file_extent_offset
9250                  */
9251                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9252                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9253                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9254                     offset)
9255                         found_count++;
9256
9257                 ret = btrfs_next_item(root, &path);
9258                 if (ret)
9259                         break;
9260         }
9261 out:
9262         btrfs_release_path(&path);
9263         if (found_count != count) {
9264                 error(
9265 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9266                         bytenr, len, root_id, objectid, offset, count, found_count);
9267                 return REFERENCER_MISSING;
9268         }
9269         return 0;
9270 }
9271
9272 /*
9273  * Check if the referencer of a shared data backref exists
9274  */
9275 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9276                                      u64 parent, u64 bytenr)
9277 {
9278         struct extent_buffer *eb;
9279         struct btrfs_key key;
9280         struct btrfs_file_extent_item *fi;
9281         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9282         u32 nr;
9283         int found_parent = 0;
9284         int i;
9285
9286         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9287         if (!extent_buffer_uptodate(eb))
9288                 goto out;
9289
9290         nr = btrfs_header_nritems(eb);
9291         for (i = 0; i < nr; i++) {
9292                 btrfs_item_key_to_cpu(eb, &key, i);
9293                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9294                         continue;
9295
9296                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9297                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9298                         continue;
9299
9300                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9301                         found_parent = 1;
9302                         break;
9303                 }
9304         }
9305
9306 out:
9307         free_extent_buffer(eb);
9308         if (!found_parent) {
9309                 error("shared extent %llu referencer lost (parent: %llu)",
9310                         bytenr, parent);
9311                 return REFERENCER_MISSING;
9312         }
9313         return 0;
9314 }
9315
9316 /*
9317  * This function will check a given extent item, including its backref and
9318  * itself (like crossing stripe boundary and type)
9319  *
9320  * Since we don't use extent_record anymore, introduce new error bit
9321  */
9322 static int check_extent_item(struct btrfs_fs_info *fs_info,
9323                              struct extent_buffer *eb, int slot)
9324 {
9325         struct btrfs_extent_item *ei;
9326         struct btrfs_extent_inline_ref *iref;
9327         struct btrfs_extent_data_ref *dref;
9328         unsigned long end;
9329         unsigned long ptr;
9330         int type;
9331         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9332         u32 item_size = btrfs_item_size_nr(eb, slot);
9333         u64 flags;
9334         u64 offset;
9335         int metadata = 0;
9336         int level;
9337         struct btrfs_key key;
9338         int ret;
9339         int err = 0;
9340
9341         btrfs_item_key_to_cpu(eb, &key, slot);
9342         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9343                 bytes_used += key.offset;
9344         else
9345                 bytes_used += nodesize;
9346
9347         if (item_size < sizeof(*ei)) {
9348                 /*
9349                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9350                  * old thing when on disk format is still un-determined.
9351                  * No need to care about it anymore
9352                  */
9353                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9354                 return -ENOTTY;
9355         }
9356
9357         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9358         flags = btrfs_extent_flags(eb, ei);
9359
9360         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9361                 metadata = 1;
9362         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9363                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9364                       key.objectid, key.objectid + nodesize);
9365                 err |= CROSSING_STRIPE_BOUNDARY;
9366         }
9367
9368         ptr = (unsigned long)(ei + 1);
9369
9370         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9371                 /* Old EXTENT_ITEM metadata */
9372                 struct btrfs_tree_block_info *info;
9373
9374                 info = (struct btrfs_tree_block_info *)ptr;
9375                 level = btrfs_tree_block_level(eb, info);
9376                 ptr += sizeof(struct btrfs_tree_block_info);
9377         } else {
9378                 /* New METADATA_ITEM */
9379                 level = key.offset;
9380         }
9381         end = (unsigned long)ei + item_size;
9382
9383         if (ptr >= end) {
9384                 err |= ITEM_SIZE_MISMATCH;
9385                 goto out;
9386         }
9387
9388         /* Now check every backref in this extent item */
9389 next:
9390         iref = (struct btrfs_extent_inline_ref *)ptr;
9391         type = btrfs_extent_inline_ref_type(eb, iref);
9392         offset = btrfs_extent_inline_ref_offset(eb, iref);
9393         switch (type) {
9394         case BTRFS_TREE_BLOCK_REF_KEY:
9395                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9396                                                level);
9397                 err |= ret;
9398                 break;
9399         case BTRFS_SHARED_BLOCK_REF_KEY:
9400                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9401                                                  level);
9402                 err |= ret;
9403                 break;
9404         case BTRFS_EXTENT_DATA_REF_KEY:
9405                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9406                 ret = check_extent_data_backref(fs_info,
9407                                 btrfs_extent_data_ref_root(eb, dref),
9408                                 btrfs_extent_data_ref_objectid(eb, dref),
9409                                 btrfs_extent_data_ref_offset(eb, dref),
9410                                 key.objectid, key.offset,
9411                                 btrfs_extent_data_ref_count(eb, dref));
9412                 err |= ret;
9413                 break;
9414         case BTRFS_SHARED_DATA_REF_KEY:
9415                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9416                 err |= ret;
9417                 break;
9418         default:
9419                 error("extent[%llu %d %llu] has unknown ref type: %d",
9420                         key.objectid, key.type, key.offset, type);
9421                 err |= UNKNOWN_TYPE;
9422                 goto out;
9423         }
9424
9425         ptr += btrfs_extent_inline_ref_size(type);
9426         if (ptr < end)
9427                 goto next;
9428
9429 out:
9430         return err;
9431 }
9432
9433 /*
9434  * Check if a dev extent item is referred correctly by its chunk
9435  */
9436 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9437                                  struct extent_buffer *eb, int slot)
9438 {
9439         struct btrfs_root *chunk_root = fs_info->chunk_root;
9440         struct btrfs_dev_extent *ptr;
9441         struct btrfs_path path;
9442         struct btrfs_key chunk_key;
9443         struct btrfs_key devext_key;
9444         struct btrfs_chunk *chunk;
9445         struct extent_buffer *l;
9446         int num_stripes;
9447         u64 length;
9448         int i;
9449         int found_chunk = 0;
9450         int ret;
9451
9452         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9453         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9454         length = btrfs_dev_extent_length(eb, ptr);
9455
9456         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9457         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9458         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9459
9460         btrfs_init_path(&path);
9461         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9462         if (ret)
9463                 goto out;
9464
9465         l = path.nodes[0];
9466         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9467         if (btrfs_chunk_length(l, chunk) != length)
9468                 goto out;
9469
9470         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9471         for (i = 0; i < num_stripes; i++) {
9472                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9473                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9474
9475                 if (devid == devext_key.objectid &&
9476                     offset == devext_key.offset) {
9477                         found_chunk = 1;
9478                         break;
9479                 }
9480         }
9481 out:
9482         btrfs_release_path(&path);
9483         if (!found_chunk) {
9484                 error(
9485                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9486                         devext_key.objectid, devext_key.offset, length);
9487                 return REFERENCER_MISSING;
9488         }
9489         return 0;
9490 }
9491
9492 /*
9493  * Check if the used space is correct with the dev item
9494  */
9495 static int check_dev_item(struct btrfs_fs_info *fs_info,
9496                           struct extent_buffer *eb, int slot)
9497 {
9498         struct btrfs_root *dev_root = fs_info->dev_root;
9499         struct btrfs_dev_item *dev_item;
9500         struct btrfs_path path;
9501         struct btrfs_key key;
9502         struct btrfs_dev_extent *ptr;
9503         u64 dev_id;
9504         u64 used;
9505         u64 total = 0;
9506         int ret;
9507
9508         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9509         dev_id = btrfs_device_id(eb, dev_item);
9510         used = btrfs_device_bytes_used(eb, dev_item);
9511
9512         key.objectid = dev_id;
9513         key.type = BTRFS_DEV_EXTENT_KEY;
9514         key.offset = 0;
9515
9516         btrfs_init_path(&path);
9517         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9518         if (ret < 0) {
9519                 btrfs_item_key_to_cpu(eb, &key, slot);
9520                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9521                         key.objectid, key.type, key.offset);
9522                 btrfs_release_path(&path);
9523                 return REFERENCER_MISSING;
9524         }
9525
9526         /* Iterate dev_extents to calculate the used space of a device */
9527         while (1) {
9528                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9529
9530                 if (key.objectid > dev_id)
9531                         break;
9532                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9533                         goto next;
9534
9535                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9536                                      struct btrfs_dev_extent);
9537                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9538 next:
9539                 ret = btrfs_next_item(dev_root, &path);
9540                 if (ret)
9541                         break;
9542         }
9543         btrfs_release_path(&path);
9544
9545         if (used != total) {
9546                 btrfs_item_key_to_cpu(eb, &key, slot);
9547                 error(
9548 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9549                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9550                         BTRFS_DEV_EXTENT_KEY, dev_id);
9551                 return ACCOUNTING_MISMATCH;
9552         }
9553         return 0;
9554 }
9555
9556 /*
9557  * Check a block group item with its referener (chunk) and its used space
9558  * with extent/metadata item
9559  */
9560 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9561                                   struct extent_buffer *eb, int slot)
9562 {
9563         struct btrfs_root *extent_root = fs_info->extent_root;
9564         struct btrfs_root *chunk_root = fs_info->chunk_root;
9565         struct btrfs_block_group_item *bi;
9566         struct btrfs_block_group_item bg_item;
9567         struct btrfs_path path;
9568         struct btrfs_key bg_key;
9569         struct btrfs_key chunk_key;
9570         struct btrfs_key extent_key;
9571         struct btrfs_chunk *chunk;
9572         struct extent_buffer *leaf;
9573         struct btrfs_extent_item *ei;
9574         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9575         u64 flags;
9576         u64 bg_flags;
9577         u64 used;
9578         u64 total = 0;
9579         int ret;
9580         int err = 0;
9581
9582         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9583         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9584         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9585         used = btrfs_block_group_used(&bg_item);
9586         bg_flags = btrfs_block_group_flags(&bg_item);
9587
9588         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9589         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9590         chunk_key.offset = bg_key.objectid;
9591
9592         btrfs_init_path(&path);
9593         /* Search for the referencer chunk */
9594         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9595         if (ret) {
9596                 error(
9597                 "block group[%llu %llu] did not find the related chunk item",
9598                         bg_key.objectid, bg_key.offset);
9599                 err |= REFERENCER_MISSING;
9600         } else {
9601                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9602                                         struct btrfs_chunk);
9603                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9604                                                 bg_key.offset) {
9605                         error(
9606         "block group[%llu %llu] related chunk item length does not match",
9607                                 bg_key.objectid, bg_key.offset);
9608                         err |= REFERENCER_MISMATCH;
9609                 }
9610         }
9611         btrfs_release_path(&path);
9612
9613         /* Search from the block group bytenr */
9614         extent_key.objectid = bg_key.objectid;
9615         extent_key.type = 0;
9616         extent_key.offset = 0;
9617
9618         btrfs_init_path(&path);
9619         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9620         if (ret < 0)
9621                 goto out;
9622
9623         /* Iterate extent tree to account used space */
9624         while (1) {
9625                 leaf = path.nodes[0];
9626                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9627                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9628                         break;
9629
9630                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9631                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9632                         goto next;
9633                 if (extent_key.objectid < bg_key.objectid)
9634                         goto next;
9635
9636                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9637                         total += nodesize;
9638                 else
9639                         total += extent_key.offset;
9640
9641                 ei = btrfs_item_ptr(leaf, path.slots[0],
9642                                     struct btrfs_extent_item);
9643                 flags = btrfs_extent_flags(leaf, ei);
9644                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9645                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9646                                 error(
9647                         "bad extent[%llu, %llu) type mismatch with chunk",
9648                                         extent_key.objectid,
9649                                         extent_key.objectid + extent_key.offset);
9650                                 err |= CHUNK_TYPE_MISMATCH;
9651                         }
9652                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9653                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9654                                     BTRFS_BLOCK_GROUP_METADATA))) {
9655                                 error(
9656                         "bad extent[%llu, %llu) type mismatch with chunk",
9657                                         extent_key.objectid,
9658                                         extent_key.objectid + nodesize);
9659                                 err |= CHUNK_TYPE_MISMATCH;
9660                         }
9661                 }
9662 next:
9663                 ret = btrfs_next_item(extent_root, &path);
9664                 if (ret)
9665                         break;
9666         }
9667
9668 out:
9669         btrfs_release_path(&path);
9670
9671         if (total != used) {
9672                 error(
9673                 "block group[%llu %llu] used %llu but extent items used %llu",
9674                         bg_key.objectid, bg_key.offset, used, total);
9675                 err |= ACCOUNTING_MISMATCH;
9676         }
9677         return err;
9678 }
9679
9680 /*
9681  * Check a chunk item.
9682  * Including checking all referred dev_extents and block group
9683  */
9684 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9685                             struct extent_buffer *eb, int slot)
9686 {
9687         struct btrfs_root *extent_root = fs_info->extent_root;
9688         struct btrfs_root *dev_root = fs_info->dev_root;
9689         struct btrfs_path path;
9690         struct btrfs_key chunk_key;
9691         struct btrfs_key bg_key;
9692         struct btrfs_key devext_key;
9693         struct btrfs_chunk *chunk;
9694         struct extent_buffer *leaf;
9695         struct btrfs_block_group_item *bi;
9696         struct btrfs_block_group_item bg_item;
9697         struct btrfs_dev_extent *ptr;
9698         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9699         u64 length;
9700         u64 chunk_end;
9701         u64 type;
9702         u64 profile;
9703         int num_stripes;
9704         u64 offset;
9705         u64 objectid;
9706         int i;
9707         int ret;
9708         int err = 0;
9709
9710         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9711         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9712         length = btrfs_chunk_length(eb, chunk);
9713         chunk_end = chunk_key.offset + length;
9714         if (!IS_ALIGNED(length, sectorsize)) {
9715                 error("chunk[%llu %llu) not aligned to %u",
9716                         chunk_key.offset, chunk_end, sectorsize);
9717                 err |= BYTES_UNALIGNED;
9718                 goto out;
9719         }
9720
9721         type = btrfs_chunk_type(eb, chunk);
9722         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9723         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9724                 error("chunk[%llu %llu) has no chunk type",
9725                         chunk_key.offset, chunk_end);
9726                 err |= UNKNOWN_TYPE;
9727         }
9728         if (profile && (profile & (profile - 1))) {
9729                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9730                         chunk_key.offset, chunk_end, profile);
9731                 err |= UNKNOWN_TYPE;
9732         }
9733
9734         bg_key.objectid = chunk_key.offset;
9735         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9736         bg_key.offset = length;
9737
9738         btrfs_init_path(&path);
9739         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9740         if (ret) {
9741                 error(
9742                 "chunk[%llu %llu) did not find the related block group item",
9743                         chunk_key.offset, chunk_end);
9744                 err |= REFERENCER_MISSING;
9745         } else{
9746                 leaf = path.nodes[0];
9747                 bi = btrfs_item_ptr(leaf, path.slots[0],
9748                                     struct btrfs_block_group_item);
9749                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9750                                    sizeof(bg_item));
9751                 if (btrfs_block_group_flags(&bg_item) != type) {
9752                         error(
9753 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9754                                 chunk_key.offset, chunk_end, type,
9755                                 btrfs_block_group_flags(&bg_item));
9756                         err |= REFERENCER_MISSING;
9757                 }
9758         }
9759
9760         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9761         for (i = 0; i < num_stripes; i++) {
9762                 btrfs_release_path(&path);
9763                 btrfs_init_path(&path);
9764                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9765                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9766                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9767
9768                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9769                                         0, 0);
9770                 if (ret)
9771                         goto not_match_dev;
9772
9773                 leaf = path.nodes[0];
9774                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9775                                      struct btrfs_dev_extent);
9776                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9777                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9778                 if (objectid != chunk_key.objectid ||
9779                     offset != chunk_key.offset ||
9780                     btrfs_dev_extent_length(leaf, ptr) != length)
9781                         goto not_match_dev;
9782                 continue;
9783 not_match_dev:
9784                 err |= BACKREF_MISSING;
9785                 error(
9786                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9787                         chunk_key.objectid, chunk_end, i);
9788                 continue;
9789         }
9790         btrfs_release_path(&path);
9791 out:
9792         return err;
9793 }
9794
9795 /*
9796  * Main entry function to check known items and update related accounting info
9797  */
9798 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9799 {
9800         struct btrfs_fs_info *fs_info = root->fs_info;
9801         struct btrfs_key key;
9802         int slot = 0;
9803         int type;
9804         struct btrfs_extent_data_ref *dref;
9805         int ret;
9806         int err = 0;
9807
9808 next:
9809         btrfs_item_key_to_cpu(eb, &key, slot);
9810         type = key.type;
9811
9812         switch (type) {
9813         case BTRFS_EXTENT_DATA_KEY:
9814                 ret = check_extent_data_item(root, eb, slot);
9815                 err |= ret;
9816                 break;
9817         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9818                 ret = check_block_group_item(fs_info, eb, slot);
9819                 err |= ret;
9820                 break;
9821         case BTRFS_DEV_ITEM_KEY:
9822                 ret = check_dev_item(fs_info, eb, slot);
9823                 err |= ret;
9824                 break;
9825         case BTRFS_CHUNK_ITEM_KEY:
9826                 ret = check_chunk_item(fs_info, eb, slot);
9827                 err |= ret;
9828                 break;
9829         case BTRFS_DEV_EXTENT_KEY:
9830                 ret = check_dev_extent_item(fs_info, eb, slot);
9831                 err |= ret;
9832                 break;
9833         case BTRFS_EXTENT_ITEM_KEY:
9834         case BTRFS_METADATA_ITEM_KEY:
9835                 ret = check_extent_item(fs_info, eb, slot);
9836                 err |= ret;
9837                 break;
9838         case BTRFS_EXTENT_CSUM_KEY:
9839                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9840                 break;
9841         case BTRFS_TREE_BLOCK_REF_KEY:
9842                 ret = check_tree_block_backref(fs_info, key.offset,
9843                                                key.objectid, -1);
9844                 err |= ret;
9845                 break;
9846         case BTRFS_EXTENT_DATA_REF_KEY:
9847                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9848                 ret = check_extent_data_backref(fs_info,
9849                                 btrfs_extent_data_ref_root(eb, dref),
9850                                 btrfs_extent_data_ref_objectid(eb, dref),
9851                                 btrfs_extent_data_ref_offset(eb, dref),
9852                                 key.objectid, 0,
9853                                 btrfs_extent_data_ref_count(eb, dref));
9854                 err |= ret;
9855                 break;
9856         case BTRFS_SHARED_BLOCK_REF_KEY:
9857                 ret = check_shared_block_backref(fs_info, key.offset,
9858                                                  key.objectid, -1);
9859                 err |= ret;
9860                 break;
9861         case BTRFS_SHARED_DATA_REF_KEY:
9862                 ret = check_shared_data_backref(fs_info, key.offset,
9863                                                 key.objectid);
9864                 err |= ret;
9865                 break;
9866         default:
9867                 break;
9868         }
9869
9870         if (++slot < btrfs_header_nritems(eb))
9871                 goto next;
9872
9873         return err;
9874 }
9875
9876 /*
9877  * Helper function for later fs/subvol tree check.  To determine if a tree
9878  * block should be checked.
9879  * This function will ensure only the direct referencer with lowest rootid to
9880  * check a fs/subvolume tree block.
9881  *
9882  * Backref check at extent tree would detect errors like missing subvolume
9883  * tree, so we can do aggressive check to reduce duplicated checks.
9884  */
9885 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9886 {
9887         struct btrfs_root *extent_root = root->fs_info->extent_root;
9888         struct btrfs_key key;
9889         struct btrfs_path path;
9890         struct extent_buffer *leaf;
9891         int slot;
9892         struct btrfs_extent_item *ei;
9893         unsigned long ptr;
9894         unsigned long end;
9895         int type;
9896         u32 item_size;
9897         u64 offset;
9898         struct btrfs_extent_inline_ref *iref;
9899         int ret;
9900
9901         btrfs_init_path(&path);
9902         key.objectid = btrfs_header_bytenr(eb);
9903         key.type = BTRFS_METADATA_ITEM_KEY;
9904         key.offset = (u64)-1;
9905
9906         /*
9907          * Any failure in backref resolving means we can't determine
9908          * whom the tree block belongs to.
9909          * So in that case, we need to check that tree block
9910          */
9911         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9912         if (ret < 0)
9913                 goto need_check;
9914
9915         ret = btrfs_previous_extent_item(extent_root, &path,
9916                                          btrfs_header_bytenr(eb));
9917         if (ret)
9918                 goto need_check;
9919
9920         leaf = path.nodes[0];
9921         slot = path.slots[0];
9922         btrfs_item_key_to_cpu(leaf, &key, slot);
9923         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9924
9925         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9926                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9927         } else {
9928                 struct btrfs_tree_block_info *info;
9929
9930                 info = (struct btrfs_tree_block_info *)(ei + 1);
9931                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9932         }
9933
9934         item_size = btrfs_item_size_nr(leaf, slot);
9935         ptr = (unsigned long)iref;
9936         end = (unsigned long)ei + item_size;
9937         while (ptr < end) {
9938                 iref = (struct btrfs_extent_inline_ref *)ptr;
9939                 type = btrfs_extent_inline_ref_type(leaf, iref);
9940                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9941
9942                 /*
9943                  * We only check the tree block if current root is
9944                  * the lowest referencer of it.
9945                  */
9946                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9947                     offset < root->objectid) {
9948                         btrfs_release_path(&path);
9949                         return 0;
9950                 }
9951
9952                 ptr += btrfs_extent_inline_ref_size(type);
9953         }
9954         /*
9955          * Normally we should also check keyed tree block ref, but that may be
9956          * very time consuming.  Inlined ref should already make us skip a lot
9957          * of refs now.  So skip search keyed tree block ref.
9958          */
9959
9960 need_check:
9961         btrfs_release_path(&path);
9962         return 1;
9963 }
9964
9965 /*
9966  * Traversal function for tree block. We will do:
9967  * 1) Skip shared fs/subvolume tree blocks
9968  * 2) Update related bytes accounting
9969  * 3) Pre-order traversal
9970  */
9971 static int traverse_tree_block(struct btrfs_root *root,
9972                                 struct extent_buffer *node)
9973 {
9974         struct extent_buffer *eb;
9975         struct btrfs_key key;
9976         struct btrfs_key drop_key;
9977         int level;
9978         u64 nr;
9979         int i;
9980         int err = 0;
9981         int ret;
9982
9983         /*
9984          * Skip shared fs/subvolume tree block, in that case they will
9985          * be checked by referencer with lowest rootid
9986          */
9987         if (is_fstree(root->objectid) && !should_check(root, node))
9988                 return 0;
9989
9990         /* Update bytes accounting */
9991         total_btree_bytes += node->len;
9992         if (fs_root_objectid(btrfs_header_owner(node)))
9993                 total_fs_tree_bytes += node->len;
9994         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9995                 total_extent_tree_bytes += node->len;
9996         if (!found_old_backref &&
9997             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9998             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9999             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10000                 found_old_backref = 1;
10001
10002         /* pre-order tranversal, check itself first */
10003         level = btrfs_header_level(node);
10004         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10005                                    btrfs_header_level(node),
10006                                    btrfs_header_owner(node));
10007         err |= ret;
10008         if (err)
10009                 error(
10010         "check %s failed root %llu bytenr %llu level %d, force continue check",
10011                         level ? "node":"leaf", root->objectid,
10012                         btrfs_header_bytenr(node), btrfs_header_level(node));
10013
10014         if (!level) {
10015                 btree_space_waste += btrfs_leaf_free_space(root, node);
10016                 ret = check_leaf_items(root, node);
10017                 err |= ret;
10018                 return err;
10019         }
10020
10021         nr = btrfs_header_nritems(node);
10022         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10023         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10024                 sizeof(struct btrfs_key_ptr);
10025
10026         /* Then check all its children */
10027         for (i = 0; i < nr; i++) {
10028                 u64 blocknr = btrfs_node_blockptr(node, i);
10029
10030                 btrfs_node_key_to_cpu(node, &key, i);
10031                 if (level == root->root_item.drop_level &&
10032                     is_dropped_key(&key, &drop_key))
10033                         continue;
10034
10035                 /*
10036                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10037                  * to call the function itself.
10038                  */
10039                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10040                 if (extent_buffer_uptodate(eb)) {
10041                         ret = traverse_tree_block(root, eb);
10042                         err |= ret;
10043                 }
10044                 free_extent_buffer(eb);
10045         }
10046
10047         return err;
10048 }
10049
10050 /*
10051  * Low memory usage version check_chunks_and_extents.
10052  */
10053 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10054 {
10055         struct btrfs_path path;
10056         struct btrfs_key key;
10057         struct btrfs_root *root1;
10058         struct btrfs_root *cur_root;
10059         int err = 0;
10060         int ret;
10061
10062         root1 = root->fs_info->chunk_root;
10063         ret = traverse_tree_block(root1, root1->node);
10064         err |= ret;
10065
10066         root1 = root->fs_info->tree_root;
10067         ret = traverse_tree_block(root1, root1->node);
10068         err |= ret;
10069
10070         btrfs_init_path(&path);
10071         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10072         key.offset = 0;
10073         key.type = BTRFS_ROOT_ITEM_KEY;
10074
10075         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10076         if (ret) {
10077                 error("cannot find extent treet in tree_root");
10078                 goto out;
10079         }
10080
10081         while (1) {
10082                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10083                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10084                         goto next;
10085                 key.offset = (u64)-1;
10086
10087                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10088                 if (IS_ERR(cur_root) || !cur_root) {
10089                         error("failed to read tree: %lld", key.objectid);
10090                         goto next;
10091                 }
10092
10093                 ret = traverse_tree_block(cur_root, cur_root->node);
10094                 err |= ret;
10095
10096 next:
10097                 ret = btrfs_next_item(root1, &path);
10098                 if (ret)
10099                         goto out;
10100         }
10101
10102 out:
10103         btrfs_release_path(&path);
10104         return err;
10105 }
10106
10107 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10108                            struct btrfs_root *root, int overwrite)
10109 {
10110         struct extent_buffer *c;
10111         struct extent_buffer *old = root->node;
10112         int level;
10113         int ret;
10114         struct btrfs_disk_key disk_key = {0,0,0};
10115
10116         level = 0;
10117
10118         if (overwrite) {
10119                 c = old;
10120                 extent_buffer_get(c);
10121                 goto init;
10122         }
10123         c = btrfs_alloc_free_block(trans, root,
10124                                    root->nodesize,
10125                                    root->root_key.objectid,
10126                                    &disk_key, level, 0, 0);
10127         if (IS_ERR(c)) {
10128                 c = old;
10129                 extent_buffer_get(c);
10130                 overwrite = 1;
10131         }
10132 init:
10133         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10134         btrfs_set_header_level(c, level);
10135         btrfs_set_header_bytenr(c, c->start);
10136         btrfs_set_header_generation(c, trans->transid);
10137         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10138         btrfs_set_header_owner(c, root->root_key.objectid);
10139
10140         write_extent_buffer(c, root->fs_info->fsid,
10141                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10142
10143         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10144                             btrfs_header_chunk_tree_uuid(c),
10145                             BTRFS_UUID_SIZE);
10146
10147         btrfs_mark_buffer_dirty(c);
10148         /*
10149          * this case can happen in the following case:
10150          *
10151          * 1.overwrite previous root.
10152          *
10153          * 2.reinit reloc data root, this is because we skip pin
10154          * down reloc data tree before which means we can allocate
10155          * same block bytenr here.
10156          */
10157         if (old->start == c->start) {
10158                 btrfs_set_root_generation(&root->root_item,
10159                                           trans->transid);
10160                 root->root_item.level = btrfs_header_level(root->node);
10161                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10162                                         &root->root_key, &root->root_item);
10163                 if (ret) {
10164                         free_extent_buffer(c);
10165                         return ret;
10166                 }
10167         }
10168         free_extent_buffer(old);
10169         root->node = c;
10170         add_root_to_dirty_list(root);
10171         return 0;
10172 }
10173
10174 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10175                                 struct extent_buffer *eb, int tree_root)
10176 {
10177         struct extent_buffer *tmp;
10178         struct btrfs_root_item *ri;
10179         struct btrfs_key key;
10180         u64 bytenr;
10181         u32 nodesize;
10182         int level = btrfs_header_level(eb);
10183         int nritems;
10184         int ret;
10185         int i;
10186
10187         /*
10188          * If we have pinned this block before, don't pin it again.
10189          * This can not only avoid forever loop with broken filesystem
10190          * but also give us some speedups.
10191          */
10192         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10193                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10194                 return 0;
10195
10196         btrfs_pin_extent(fs_info, eb->start, eb->len);
10197
10198         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10199         nritems = btrfs_header_nritems(eb);
10200         for (i = 0; i < nritems; i++) {
10201                 if (level == 0) {
10202                         btrfs_item_key_to_cpu(eb, &key, i);
10203                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10204                                 continue;
10205                         /* Skip the extent root and reloc roots */
10206                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10207                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10208                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10209                                 continue;
10210                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10211                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10212
10213                         /*
10214                          * If at any point we start needing the real root we
10215                          * will have to build a stump root for the root we are
10216                          * in, but for now this doesn't actually use the root so
10217                          * just pass in extent_root.
10218                          */
10219                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10220                                               nodesize, 0);
10221                         if (!extent_buffer_uptodate(tmp)) {
10222                                 fprintf(stderr, "Error reading root block\n");
10223                                 return -EIO;
10224                         }
10225                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10226                         free_extent_buffer(tmp);
10227                         if (ret)
10228                                 return ret;
10229                 } else {
10230                         bytenr = btrfs_node_blockptr(eb, i);
10231
10232                         /* If we aren't the tree root don't read the block */
10233                         if (level == 1 && !tree_root) {
10234                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10235                                 continue;
10236                         }
10237
10238                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10239                                               nodesize, 0);
10240                         if (!extent_buffer_uptodate(tmp)) {
10241                                 fprintf(stderr, "Error reading tree block\n");
10242                                 return -EIO;
10243                         }
10244                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10245                         free_extent_buffer(tmp);
10246                         if (ret)
10247                                 return ret;
10248                 }
10249         }
10250
10251         return 0;
10252 }
10253
10254 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10255 {
10256         int ret;
10257
10258         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10259         if (ret)
10260                 return ret;
10261
10262         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10263 }
10264
10265 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10266 {
10267         struct btrfs_block_group_cache *cache;
10268         struct btrfs_path *path;
10269         struct extent_buffer *leaf;
10270         struct btrfs_chunk *chunk;
10271         struct btrfs_key key;
10272         int ret;
10273         u64 start;
10274
10275         path = btrfs_alloc_path();
10276         if (!path)
10277                 return -ENOMEM;
10278
10279         key.objectid = 0;
10280         key.type = BTRFS_CHUNK_ITEM_KEY;
10281         key.offset = 0;
10282
10283         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10284         if (ret < 0) {
10285                 btrfs_free_path(path);
10286                 return ret;
10287         }
10288
10289         /*
10290          * We do this in case the block groups were screwed up and had alloc
10291          * bits that aren't actually set on the chunks.  This happens with
10292          * restored images every time and could happen in real life I guess.
10293          */
10294         fs_info->avail_data_alloc_bits = 0;
10295         fs_info->avail_metadata_alloc_bits = 0;
10296         fs_info->avail_system_alloc_bits = 0;
10297
10298         /* First we need to create the in-memory block groups */
10299         while (1) {
10300                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10301                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10302                         if (ret < 0) {
10303                                 btrfs_free_path(path);
10304                                 return ret;
10305                         }
10306                         if (ret) {
10307                                 ret = 0;
10308                                 break;
10309                         }
10310                 }
10311                 leaf = path->nodes[0];
10312                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10313                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10314                         path->slots[0]++;
10315                         continue;
10316                 }
10317
10318                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10319                                        struct btrfs_chunk);
10320                 btrfs_add_block_group(fs_info, 0,
10321                                       btrfs_chunk_type(leaf, chunk),
10322                                       key.objectid, key.offset,
10323                                       btrfs_chunk_length(leaf, chunk));
10324                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10325                                  key.offset + btrfs_chunk_length(leaf, chunk),
10326                                  GFP_NOFS);
10327                 path->slots[0]++;
10328         }
10329         start = 0;
10330         while (1) {
10331                 cache = btrfs_lookup_first_block_group(fs_info, start);
10332                 if (!cache)
10333                         break;
10334                 cache->cached = 1;
10335                 start = cache->key.objectid + cache->key.offset;
10336         }
10337
10338         btrfs_free_path(path);
10339         return 0;
10340 }
10341
10342 static int reset_balance(struct btrfs_trans_handle *trans,
10343                          struct btrfs_fs_info *fs_info)
10344 {
10345         struct btrfs_root *root = fs_info->tree_root;
10346         struct btrfs_path *path;
10347         struct extent_buffer *leaf;
10348         struct btrfs_key key;
10349         int del_slot, del_nr = 0;
10350         int ret;
10351         int found = 0;
10352
10353         path = btrfs_alloc_path();
10354         if (!path)
10355                 return -ENOMEM;
10356
10357         key.objectid = BTRFS_BALANCE_OBJECTID;
10358         key.type = BTRFS_BALANCE_ITEM_KEY;
10359         key.offset = 0;
10360
10361         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10362         if (ret) {
10363                 if (ret > 0)
10364                         ret = 0;
10365                 if (!ret)
10366                         goto reinit_data_reloc;
10367                 else
10368                         goto out;
10369         }
10370
10371         ret = btrfs_del_item(trans, root, path);
10372         if (ret)
10373                 goto out;
10374         btrfs_release_path(path);
10375
10376         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10377         key.type = BTRFS_ROOT_ITEM_KEY;
10378         key.offset = 0;
10379
10380         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10381         if (ret < 0)
10382                 goto out;
10383         while (1) {
10384                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10385                         if (!found)
10386                                 break;
10387
10388                         if (del_nr) {
10389                                 ret = btrfs_del_items(trans, root, path,
10390                                                       del_slot, del_nr);
10391                                 del_nr = 0;
10392                                 if (ret)
10393                                         goto out;
10394                         }
10395                         key.offset++;
10396                         btrfs_release_path(path);
10397
10398                         found = 0;
10399                         ret = btrfs_search_slot(trans, root, &key, path,
10400                                                 -1, 1);
10401                         if (ret < 0)
10402                                 goto out;
10403                         continue;
10404                 }
10405                 found = 1;
10406                 leaf = path->nodes[0];
10407                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10408                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10409                         break;
10410                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10411                         path->slots[0]++;
10412                         continue;
10413                 }
10414                 if (!del_nr) {
10415                         del_slot = path->slots[0];
10416                         del_nr = 1;
10417                 } else {
10418                         del_nr++;
10419                 }
10420                 path->slots[0]++;
10421         }
10422
10423         if (del_nr) {
10424                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10425                 if (ret)
10426                         goto out;
10427         }
10428         btrfs_release_path(path);
10429
10430 reinit_data_reloc:
10431         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10432         key.type = BTRFS_ROOT_ITEM_KEY;
10433         key.offset = (u64)-1;
10434         root = btrfs_read_fs_root(fs_info, &key);
10435         if (IS_ERR(root)) {
10436                 fprintf(stderr, "Error reading data reloc tree\n");
10437                 ret = PTR_ERR(root);
10438                 goto out;
10439         }
10440         record_root_in_trans(trans, root);
10441         ret = btrfs_fsck_reinit_root(trans, root, 0);
10442         if (ret)
10443                 goto out;
10444         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10445 out:
10446         btrfs_free_path(path);
10447         return ret;
10448 }
10449
10450 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10451                               struct btrfs_fs_info *fs_info)
10452 {
10453         u64 start = 0;
10454         int ret;
10455
10456         /*
10457          * The only reason we don't do this is because right now we're just
10458          * walking the trees we find and pinning down their bytes, we don't look
10459          * at any of the leaves.  In order to do mixed groups we'd have to check
10460          * the leaves of any fs roots and pin down the bytes for any file
10461          * extents we find.  Not hard but why do it if we don't have to?
10462          */
10463         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10464                 fprintf(stderr, "We don't support re-initing the extent tree "
10465                         "for mixed block groups yet, please notify a btrfs "
10466                         "developer you want to do this so they can add this "
10467                         "functionality.\n");
10468                 return -EINVAL;
10469         }
10470
10471         /*
10472          * first we need to walk all of the trees except the extent tree and pin
10473          * down the bytes that are in use so we don't overwrite any existing
10474          * metadata.
10475          */
10476         ret = pin_metadata_blocks(fs_info);
10477         if (ret) {
10478                 fprintf(stderr, "error pinning down used bytes\n");
10479                 return ret;
10480         }
10481
10482         /*
10483          * Need to drop all the block groups since we're going to recreate all
10484          * of them again.
10485          */
10486         btrfs_free_block_groups(fs_info);
10487         ret = reset_block_groups(fs_info);
10488         if (ret) {
10489                 fprintf(stderr, "error resetting the block groups\n");
10490                 return ret;
10491         }
10492
10493         /* Ok we can allocate now, reinit the extent root */
10494         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10495         if (ret) {
10496                 fprintf(stderr, "extent root initialization failed\n");
10497                 /*
10498                  * When the transaction code is updated we should end the
10499                  * transaction, but for now progs only knows about commit so
10500                  * just return an error.
10501                  */
10502                 return ret;
10503         }
10504
10505         /*
10506          * Now we have all the in-memory block groups setup so we can make
10507          * allocations properly, and the metadata we care about is safe since we
10508          * pinned all of it above.
10509          */
10510         while (1) {
10511                 struct btrfs_block_group_cache *cache;
10512
10513                 cache = btrfs_lookup_first_block_group(fs_info, start);
10514                 if (!cache)
10515                         break;
10516                 start = cache->key.objectid + cache->key.offset;
10517                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10518                                         &cache->key, &cache->item,
10519                                         sizeof(cache->item));
10520                 if (ret) {
10521                         fprintf(stderr, "Error adding block group\n");
10522                         return ret;
10523                 }
10524                 btrfs_extent_post_op(trans, fs_info->extent_root);
10525         }
10526
10527         ret = reset_balance(trans, fs_info);
10528         if (ret)
10529                 fprintf(stderr, "error resetting the pending balance\n");
10530
10531         return ret;
10532 }
10533
10534 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10535 {
10536         struct btrfs_path *path;
10537         struct btrfs_trans_handle *trans;
10538         struct btrfs_key key;
10539         int ret;
10540
10541         printf("Recowing metadata block %llu\n", eb->start);
10542         key.objectid = btrfs_header_owner(eb);
10543         key.type = BTRFS_ROOT_ITEM_KEY;
10544         key.offset = (u64)-1;
10545
10546         root = btrfs_read_fs_root(root->fs_info, &key);
10547         if (IS_ERR(root)) {
10548                 fprintf(stderr, "Couldn't find owner root %llu\n",
10549                         key.objectid);
10550                 return PTR_ERR(root);
10551         }
10552
10553         path = btrfs_alloc_path();
10554         if (!path)
10555                 return -ENOMEM;
10556
10557         trans = btrfs_start_transaction(root, 1);
10558         if (IS_ERR(trans)) {
10559                 btrfs_free_path(path);
10560                 return PTR_ERR(trans);
10561         }
10562
10563         path->lowest_level = btrfs_header_level(eb);
10564         if (path->lowest_level)
10565                 btrfs_node_key_to_cpu(eb, &key, 0);
10566         else
10567                 btrfs_item_key_to_cpu(eb, &key, 0);
10568
10569         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10570         btrfs_commit_transaction(trans, root);
10571         btrfs_free_path(path);
10572         return ret;
10573 }
10574
10575 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10576 {
10577         struct btrfs_path *path;
10578         struct btrfs_trans_handle *trans;
10579         struct btrfs_key key;
10580         int ret;
10581
10582         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10583                bad->key.type, bad->key.offset);
10584         key.objectid = bad->root_id;
10585         key.type = BTRFS_ROOT_ITEM_KEY;
10586         key.offset = (u64)-1;
10587
10588         root = btrfs_read_fs_root(root->fs_info, &key);
10589         if (IS_ERR(root)) {
10590                 fprintf(stderr, "Couldn't find owner root %llu\n",
10591                         key.objectid);
10592                 return PTR_ERR(root);
10593         }
10594
10595         path = btrfs_alloc_path();
10596         if (!path)
10597                 return -ENOMEM;
10598
10599         trans = btrfs_start_transaction(root, 1);
10600         if (IS_ERR(trans)) {
10601                 btrfs_free_path(path);
10602                 return PTR_ERR(trans);
10603         }
10604
10605         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10606         if (ret) {
10607                 if (ret > 0)
10608                         ret = 0;
10609                 goto out;
10610         }
10611         ret = btrfs_del_item(trans, root, path);
10612 out:
10613         btrfs_commit_transaction(trans, root);
10614         btrfs_free_path(path);
10615         return ret;
10616 }
10617
10618 static int zero_log_tree(struct btrfs_root *root)
10619 {
10620         struct btrfs_trans_handle *trans;
10621         int ret;
10622
10623         trans = btrfs_start_transaction(root, 1);
10624         if (IS_ERR(trans)) {
10625                 ret = PTR_ERR(trans);
10626                 return ret;
10627         }
10628         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10629         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10630         ret = btrfs_commit_transaction(trans, root);
10631         return ret;
10632 }
10633
10634 static int populate_csum(struct btrfs_trans_handle *trans,
10635                          struct btrfs_root *csum_root, char *buf, u64 start,
10636                          u64 len)
10637 {
10638         u64 offset = 0;
10639         u64 sectorsize;
10640         int ret = 0;
10641
10642         while (offset < len) {
10643                 sectorsize = csum_root->sectorsize;
10644                 ret = read_extent_data(csum_root, buf, start + offset,
10645                                        &sectorsize, 0);
10646                 if (ret)
10647                         break;
10648                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10649                                             start + offset, buf, sectorsize);
10650                 if (ret)
10651                         break;
10652                 offset += sectorsize;
10653         }
10654         return ret;
10655 }
10656
10657 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10658                                       struct btrfs_root *csum_root,
10659                                       struct btrfs_root *cur_root)
10660 {
10661         struct btrfs_path *path;
10662         struct btrfs_key key;
10663         struct extent_buffer *node;
10664         struct btrfs_file_extent_item *fi;
10665         char *buf = NULL;
10666         u64 start = 0;
10667         u64 len = 0;
10668         int slot = 0;
10669         int ret = 0;
10670
10671         path = btrfs_alloc_path();
10672         if (!path)
10673                 return -ENOMEM;
10674         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10675         if (!buf) {
10676                 ret = -ENOMEM;
10677                 goto out;
10678         }
10679
10680         key.objectid = 0;
10681         key.offset = 0;
10682         key.type = 0;
10683
10684         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10685         if (ret < 0)
10686                 goto out;
10687         /* Iterate all regular file extents and fill its csum */
10688         while (1) {
10689                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10690
10691                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10692                         goto next;
10693                 node = path->nodes[0];
10694                 slot = path->slots[0];
10695                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10696                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10697                         goto next;
10698                 start = btrfs_file_extent_disk_bytenr(node, fi);
10699                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10700
10701                 ret = populate_csum(trans, csum_root, buf, start, len);
10702                 if (ret == -EEXIST)
10703                         ret = 0;
10704                 if (ret < 0)
10705                         goto out;
10706 next:
10707                 /*
10708                  * TODO: if next leaf is corrupted, jump to nearest next valid
10709                  * leaf.
10710                  */
10711                 ret = btrfs_next_item(cur_root, path);
10712                 if (ret < 0)
10713                         goto out;
10714                 if (ret > 0) {
10715                         ret = 0;
10716                         goto out;
10717                 }
10718         }
10719
10720 out:
10721         btrfs_free_path(path);
10722         free(buf);
10723         return ret;
10724 }
10725
10726 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10727                                   struct btrfs_root *csum_root)
10728 {
10729         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10730         struct btrfs_path *path;
10731         struct btrfs_root *tree_root = fs_info->tree_root;
10732         struct btrfs_root *cur_root;
10733         struct extent_buffer *node;
10734         struct btrfs_key key;
10735         int slot = 0;
10736         int ret = 0;
10737
10738         path = btrfs_alloc_path();
10739         if (!path)
10740                 return -ENOMEM;
10741
10742         key.objectid = BTRFS_FS_TREE_OBJECTID;
10743         key.offset = 0;
10744         key.type = BTRFS_ROOT_ITEM_KEY;
10745
10746         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10747         if (ret < 0)
10748                 goto out;
10749         if (ret > 0) {
10750                 ret = -ENOENT;
10751                 goto out;
10752         }
10753
10754         while (1) {
10755                 node = path->nodes[0];
10756                 slot = path->slots[0];
10757                 btrfs_item_key_to_cpu(node, &key, slot);
10758                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10759                         goto out;
10760                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10761                         goto next;
10762                 if (!is_fstree(key.objectid))
10763                         goto next;
10764                 key.offset = (u64)-1;
10765
10766                 cur_root = btrfs_read_fs_root(fs_info, &key);
10767                 if (IS_ERR(cur_root) || !cur_root) {
10768                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10769                                 key.objectid);
10770                         goto out;
10771                 }
10772                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10773                                 cur_root);
10774                 if (ret < 0)
10775                         goto out;
10776 next:
10777                 ret = btrfs_next_item(tree_root, path);
10778                 if (ret > 0) {
10779                         ret = 0;
10780                         goto out;
10781                 }
10782                 if (ret < 0)
10783                         goto out;
10784         }
10785
10786 out:
10787         btrfs_free_path(path);
10788         return ret;
10789 }
10790
10791 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10792                                       struct btrfs_root *csum_root)
10793 {
10794         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10795         struct btrfs_path *path;
10796         struct btrfs_extent_item *ei;
10797         struct extent_buffer *leaf;
10798         char *buf;
10799         struct btrfs_key key;
10800         int ret;
10801
10802         path = btrfs_alloc_path();
10803         if (!path)
10804                 return -ENOMEM;
10805
10806         key.objectid = 0;
10807         key.type = BTRFS_EXTENT_ITEM_KEY;
10808         key.offset = 0;
10809
10810         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10811         if (ret < 0) {
10812                 btrfs_free_path(path);
10813                 return ret;
10814         }
10815
10816         buf = malloc(csum_root->sectorsize);
10817         if (!buf) {
10818                 btrfs_free_path(path);
10819                 return -ENOMEM;
10820         }
10821
10822         while (1) {
10823                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10824                         ret = btrfs_next_leaf(extent_root, path);
10825                         if (ret < 0)
10826                                 break;
10827                         if (ret) {
10828                                 ret = 0;
10829                                 break;
10830                         }
10831                 }
10832                 leaf = path->nodes[0];
10833
10834                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10835                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10836                         path->slots[0]++;
10837                         continue;
10838                 }
10839
10840                 ei = btrfs_item_ptr(leaf, path->slots[0],
10841                                     struct btrfs_extent_item);
10842                 if (!(btrfs_extent_flags(leaf, ei) &
10843                       BTRFS_EXTENT_FLAG_DATA)) {
10844                         path->slots[0]++;
10845                         continue;
10846                 }
10847
10848                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10849                                     key.offset);
10850                 if (ret)
10851                         break;
10852                 path->slots[0]++;
10853         }
10854
10855         btrfs_free_path(path);
10856         free(buf);
10857         return ret;
10858 }
10859
10860 /*
10861  * Recalculate the csum and put it into the csum tree.
10862  *
10863  * Extent tree init will wipe out all the extent info, so in that case, we
10864  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10865  * will use fs/subvol trees to init the csum tree.
10866  */
10867 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10868                           struct btrfs_root *csum_root,
10869                           int search_fs_tree)
10870 {
10871         if (search_fs_tree)
10872                 return fill_csum_tree_from_fs(trans, csum_root);
10873         else
10874                 return fill_csum_tree_from_extent(trans, csum_root);
10875 }
10876
10877 static void free_roots_info_cache(void)
10878 {
10879         if (!roots_info_cache)
10880                 return;
10881
10882         while (!cache_tree_empty(roots_info_cache)) {
10883                 struct cache_extent *entry;
10884                 struct root_item_info *rii;
10885
10886                 entry = first_cache_extent(roots_info_cache);
10887                 if (!entry)
10888                         break;
10889                 remove_cache_extent(roots_info_cache, entry);
10890                 rii = container_of(entry, struct root_item_info, cache_extent);
10891                 free(rii);
10892         }
10893
10894         free(roots_info_cache);
10895         roots_info_cache = NULL;
10896 }
10897
10898 static int build_roots_info_cache(struct btrfs_fs_info *info)
10899 {
10900         int ret = 0;
10901         struct btrfs_key key;
10902         struct extent_buffer *leaf;
10903         struct btrfs_path *path;
10904
10905         if (!roots_info_cache) {
10906                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10907                 if (!roots_info_cache)
10908                         return -ENOMEM;
10909                 cache_tree_init(roots_info_cache);
10910         }
10911
10912         path = btrfs_alloc_path();
10913         if (!path)
10914                 return -ENOMEM;
10915
10916         key.objectid = 0;
10917         key.type = BTRFS_EXTENT_ITEM_KEY;
10918         key.offset = 0;
10919
10920         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10921         if (ret < 0)
10922                 goto out;
10923         leaf = path->nodes[0];
10924
10925         while (1) {
10926                 struct btrfs_key found_key;
10927                 struct btrfs_extent_item *ei;
10928                 struct btrfs_extent_inline_ref *iref;
10929                 int slot = path->slots[0];
10930                 int type;
10931                 u64 flags;
10932                 u64 root_id;
10933                 u8 level;
10934                 struct cache_extent *entry;
10935                 struct root_item_info *rii;
10936
10937                 if (slot >= btrfs_header_nritems(leaf)) {
10938                         ret = btrfs_next_leaf(info->extent_root, path);
10939                         if (ret < 0) {
10940                                 break;
10941                         } else if (ret) {
10942                                 ret = 0;
10943                                 break;
10944                         }
10945                         leaf = path->nodes[0];
10946                         slot = path->slots[0];
10947                 }
10948
10949                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10950
10951                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10952                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10953                         goto next;
10954
10955                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10956                 flags = btrfs_extent_flags(leaf, ei);
10957
10958                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10959                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10960                         goto next;
10961
10962                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10963                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10964                         level = found_key.offset;
10965                 } else {
10966                         struct btrfs_tree_block_info *binfo;
10967
10968                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10969                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10970                         level = btrfs_tree_block_level(leaf, binfo);
10971                 }
10972
10973                 /*
10974                  * For a root extent, it must be of the following type and the
10975                  * first (and only one) iref in the item.
10976                  */
10977                 type = btrfs_extent_inline_ref_type(leaf, iref);
10978                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10979                         goto next;
10980
10981                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10982                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10983                 if (!entry) {
10984                         rii = malloc(sizeof(struct root_item_info));
10985                         if (!rii) {
10986                                 ret = -ENOMEM;
10987                                 goto out;
10988                         }
10989                         rii->cache_extent.start = root_id;
10990                         rii->cache_extent.size = 1;
10991                         rii->level = (u8)-1;
10992                         entry = &rii->cache_extent;
10993                         ret = insert_cache_extent(roots_info_cache, entry);
10994                         ASSERT(ret == 0);
10995                 } else {
10996                         rii = container_of(entry, struct root_item_info,
10997                                            cache_extent);
10998                 }
10999
11000                 ASSERT(rii->cache_extent.start == root_id);
11001                 ASSERT(rii->cache_extent.size == 1);
11002
11003                 if (level > rii->level || rii->level == (u8)-1) {
11004                         rii->level = level;
11005                         rii->bytenr = found_key.objectid;
11006                         rii->gen = btrfs_extent_generation(leaf, ei);
11007                         rii->node_count = 1;
11008                 } else if (level == rii->level) {
11009                         rii->node_count++;
11010                 }
11011 next:
11012                 path->slots[0]++;
11013         }
11014
11015 out:
11016         btrfs_free_path(path);
11017
11018         return ret;
11019 }
11020
11021 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11022                                   struct btrfs_path *path,
11023                                   const struct btrfs_key *root_key,
11024                                   const int read_only_mode)
11025 {
11026         const u64 root_id = root_key->objectid;
11027         struct cache_extent *entry;
11028         struct root_item_info *rii;
11029         struct btrfs_root_item ri;
11030         unsigned long offset;
11031
11032         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11033         if (!entry) {
11034                 fprintf(stderr,
11035                         "Error: could not find extent items for root %llu\n",
11036                         root_key->objectid);
11037                 return -ENOENT;
11038         }
11039
11040         rii = container_of(entry, struct root_item_info, cache_extent);
11041         ASSERT(rii->cache_extent.start == root_id);
11042         ASSERT(rii->cache_extent.size == 1);
11043
11044         if (rii->node_count != 1) {
11045                 fprintf(stderr,
11046                         "Error: could not find btree root extent for root %llu\n",
11047                         root_id);
11048                 return -ENOENT;
11049         }
11050
11051         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11052         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11053
11054         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11055             btrfs_root_level(&ri) != rii->level ||
11056             btrfs_root_generation(&ri) != rii->gen) {
11057
11058                 /*
11059                  * If we're in repair mode but our caller told us to not update
11060                  * the root item, i.e. just check if it needs to be updated, don't
11061                  * print this message, since the caller will call us again shortly
11062                  * for the same root item without read only mode (the caller will
11063                  * open a transaction first).
11064                  */
11065                 if (!(read_only_mode && repair))
11066                         fprintf(stderr,
11067                                 "%sroot item for root %llu,"
11068                                 " current bytenr %llu, current gen %llu, current level %u,"
11069                                 " new bytenr %llu, new gen %llu, new level %u\n",
11070                                 (read_only_mode ? "" : "fixing "),
11071                                 root_id,
11072                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11073                                 btrfs_root_level(&ri),
11074                                 rii->bytenr, rii->gen, rii->level);
11075
11076                 if (btrfs_root_generation(&ri) > rii->gen) {
11077                         fprintf(stderr,
11078                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11079                                 root_id, btrfs_root_generation(&ri), rii->gen);
11080                         return -EINVAL;
11081                 }
11082
11083                 if (!read_only_mode) {
11084                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11085                         btrfs_set_root_level(&ri, rii->level);
11086                         btrfs_set_root_generation(&ri, rii->gen);
11087                         write_extent_buffer(path->nodes[0], &ri,
11088                                             offset, sizeof(ri));
11089                 }
11090
11091                 return 1;
11092         }
11093
11094         return 0;
11095 }
11096
11097 /*
11098  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11099  * caused read-only snapshots to be corrupted if they were created at a moment
11100  * when the source subvolume/snapshot had orphan items. The issue was that the
11101  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11102  * node instead of the post orphan cleanup root node.
11103  * So this function, and its callees, just detects and fixes those cases. Even
11104  * though the regression was for read-only snapshots, this function applies to
11105  * any snapshot/subvolume root.
11106  * This must be run before any other repair code - not doing it so, makes other
11107  * repair code delete or modify backrefs in the extent tree for example, which
11108  * will result in an inconsistent fs after repairing the root items.
11109  */
11110 static int repair_root_items(struct btrfs_fs_info *info)
11111 {
11112         struct btrfs_path *path = NULL;
11113         struct btrfs_key key;
11114         struct extent_buffer *leaf;
11115         struct btrfs_trans_handle *trans = NULL;
11116         int ret = 0;
11117         int bad_roots = 0;
11118         int need_trans = 0;
11119
11120         ret = build_roots_info_cache(info);
11121         if (ret)
11122                 goto out;
11123
11124         path = btrfs_alloc_path();
11125         if (!path) {
11126                 ret = -ENOMEM;
11127                 goto out;
11128         }
11129
11130         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11131         key.type = BTRFS_ROOT_ITEM_KEY;
11132         key.offset = 0;
11133
11134 again:
11135         /*
11136          * Avoid opening and committing transactions if a leaf doesn't have
11137          * any root items that need to be fixed, so that we avoid rotating
11138          * backup roots unnecessarily.
11139          */
11140         if (need_trans) {
11141                 trans = btrfs_start_transaction(info->tree_root, 1);
11142                 if (IS_ERR(trans)) {
11143                         ret = PTR_ERR(trans);
11144                         goto out;
11145                 }
11146         }
11147
11148         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11149                                 0, trans ? 1 : 0);
11150         if (ret < 0)
11151                 goto out;
11152         leaf = path->nodes[0];
11153
11154         while (1) {
11155                 struct btrfs_key found_key;
11156
11157                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11158                         int no_more_keys = find_next_key(path, &key);
11159
11160                         btrfs_release_path(path);
11161                         if (trans) {
11162                                 ret = btrfs_commit_transaction(trans,
11163                                                                info->tree_root);
11164                                 trans = NULL;
11165                                 if (ret < 0)
11166                                         goto out;
11167                         }
11168                         need_trans = 0;
11169                         if (no_more_keys)
11170                                 break;
11171                         goto again;
11172                 }
11173
11174                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11175
11176                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11177                         goto next;
11178                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11179                         goto next;
11180
11181                 ret = maybe_repair_root_item(info, path, &found_key,
11182                                              trans ? 0 : 1);
11183                 if (ret < 0)
11184                         goto out;
11185                 if (ret) {
11186                         if (!trans && repair) {
11187                                 need_trans = 1;
11188                                 key = found_key;
11189                                 btrfs_release_path(path);
11190                                 goto again;
11191                         }
11192                         bad_roots++;
11193                 }
11194 next:
11195                 path->slots[0]++;
11196         }
11197         ret = 0;
11198 out:
11199         free_roots_info_cache();
11200         btrfs_free_path(path);
11201         if (trans)
11202                 btrfs_commit_transaction(trans, info->tree_root);
11203         if (ret < 0)
11204                 return ret;
11205
11206         return bad_roots;
11207 }
11208
11209 const char * const cmd_check_usage[] = {
11210         "btrfs check [options] <device>",
11211         "Check structural integrity of a filesystem (unmounted).",
11212         "Check structural integrity of an unmounted filesystem. Verify internal",
11213         "trees' consistency and item connectivity. In the repair mode try to",
11214         "fix the problems found. ",
11215         "WARNING: the repair mode is considered dangerous",
11216         "",
11217         "-s|--super <superblock>     use this superblock copy",
11218         "-b|--backup                 use the first valid backup root copy",
11219         "--repair                    try to repair the filesystem",
11220         "--readonly                  run in read-only mode (default)",
11221         "--init-csum-tree            create a new CRC tree",
11222         "--init-extent-tree          create a new extent tree",
11223         "--mode <MODE>               allows choice of memory/IO trade-offs",
11224         "                            where MODE is one of:",
11225         "                            original - read inodes and extents to memory (requires",
11226         "                                       more memory, does less IO)",
11227         "                            lowmem   - try to use less memory but read blocks again",
11228         "                                       when needed",
11229         "--check-data-csum           verify checksums of data blocks",
11230         "-Q|--qgroup-report          print a report on qgroup consistency",
11231         "-E|--subvol-extents <subvolid>",
11232         "                            print subvolume extents and sharing state",
11233         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11234         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11235         "-p|--progress               indicate progress",
11236         NULL
11237 };
11238
11239 int cmd_check(int argc, char **argv)
11240 {
11241         struct cache_tree root_cache;
11242         struct btrfs_root *root;
11243         struct btrfs_fs_info *info;
11244         u64 bytenr = 0;
11245         u64 subvolid = 0;
11246         u64 tree_root_bytenr = 0;
11247         u64 chunk_root_bytenr = 0;
11248         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11249         int ret;
11250         u64 num;
11251         int init_csum_tree = 0;
11252         int readonly = 0;
11253         int qgroup_report = 0;
11254         int qgroups_repaired = 0;
11255         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11256
11257         while(1) {
11258                 int c;
11259                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11260                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11261                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11262                         GETOPT_VAL_MODE };
11263                 static const struct option long_options[] = {
11264                         { "super", required_argument, NULL, 's' },
11265                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11266                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11267                         { "init-csum-tree", no_argument, NULL,
11268                                 GETOPT_VAL_INIT_CSUM },
11269                         { "init-extent-tree", no_argument, NULL,
11270                                 GETOPT_VAL_INIT_EXTENT },
11271                         { "check-data-csum", no_argument, NULL,
11272                                 GETOPT_VAL_CHECK_CSUM },
11273                         { "backup", no_argument, NULL, 'b' },
11274                         { "subvol-extents", required_argument, NULL, 'E' },
11275                         { "qgroup-report", no_argument, NULL, 'Q' },
11276                         { "tree-root", required_argument, NULL, 'r' },
11277                         { "chunk-root", required_argument, NULL,
11278                                 GETOPT_VAL_CHUNK_TREE },
11279                         { "progress", no_argument, NULL, 'p' },
11280                         { "mode", required_argument, NULL,
11281                                 GETOPT_VAL_MODE },
11282                         { NULL, 0, NULL, 0}
11283                 };
11284
11285                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11286                 if (c < 0)
11287                         break;
11288                 switch(c) {
11289                         case 'a': /* ignored */ break;
11290                         case 'b':
11291                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11292                                 break;
11293                         case 's':
11294                                 num = arg_strtou64(optarg);
11295                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11296                                         error(
11297                                         "super mirror should be less than %d",
11298                                                 BTRFS_SUPER_MIRROR_MAX);
11299                                         exit(1);
11300                                 }
11301                                 bytenr = btrfs_sb_offset(((int)num));
11302                                 printf("using SB copy %llu, bytenr %llu\n", num,
11303                                        (unsigned long long)bytenr);
11304                                 break;
11305                         case 'Q':
11306                                 qgroup_report = 1;
11307                                 break;
11308                         case 'E':
11309                                 subvolid = arg_strtou64(optarg);
11310                                 break;
11311                         case 'r':
11312                                 tree_root_bytenr = arg_strtou64(optarg);
11313                                 break;
11314                         case GETOPT_VAL_CHUNK_TREE:
11315                                 chunk_root_bytenr = arg_strtou64(optarg);
11316                                 break;
11317                         case 'p':
11318                                 ctx.progress_enabled = true;
11319                                 break;
11320                         case '?':
11321                         case 'h':
11322                                 usage(cmd_check_usage);
11323                         case GETOPT_VAL_REPAIR:
11324                                 printf("enabling repair mode\n");
11325                                 repair = 1;
11326                                 ctree_flags |= OPEN_CTREE_WRITES;
11327                                 break;
11328                         case GETOPT_VAL_READONLY:
11329                                 readonly = 1;
11330                                 break;
11331                         case GETOPT_VAL_INIT_CSUM:
11332                                 printf("Creating a new CRC tree\n");
11333                                 init_csum_tree = 1;
11334                                 repair = 1;
11335                                 ctree_flags |= OPEN_CTREE_WRITES;
11336                                 break;
11337                         case GETOPT_VAL_INIT_EXTENT:
11338                                 init_extent_tree = 1;
11339                                 ctree_flags |= (OPEN_CTREE_WRITES |
11340                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11341                                 repair = 1;
11342                                 break;
11343                         case GETOPT_VAL_CHECK_CSUM:
11344                                 check_data_csum = 1;
11345                                 break;
11346                         case GETOPT_VAL_MODE:
11347                                 check_mode = parse_check_mode(optarg);
11348                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11349                                         error("unknown mode: %s", optarg);
11350                                         exit(1);
11351                                 }
11352                                 break;
11353                 }
11354         }
11355
11356         if (check_argc_exact(argc - optind, 1))
11357                 usage(cmd_check_usage);
11358
11359         if (ctx.progress_enabled) {
11360                 ctx.tp = TASK_NOTHING;
11361                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11362         }
11363
11364         /* This check is the only reason for --readonly to exist */
11365         if (readonly && repair) {
11366                 error("repair options are not compatible with --readonly");
11367                 exit(1);
11368         }
11369
11370         /*
11371          * Not supported yet
11372          */
11373         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11374                 error("low memory mode doesn't support repair yet");
11375                 exit(1);
11376         }
11377
11378         radix_tree_init();
11379         cache_tree_init(&root_cache);
11380
11381         if((ret = check_mounted(argv[optind])) < 0) {
11382                 error("could not check mount status: %s", strerror(-ret));
11383                 goto err_out;
11384         } else if(ret) {
11385                 error("%s is currently mounted, aborting", argv[optind]);
11386                 ret = -EBUSY;
11387                 goto err_out;
11388         }
11389
11390         /* only allow partial opening under repair mode */
11391         if (repair)
11392                 ctree_flags |= OPEN_CTREE_PARTIAL;
11393
11394         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11395                                   chunk_root_bytenr, ctree_flags);
11396         if (!info) {
11397                 error("cannot open file system");
11398                 ret = -EIO;
11399                 goto err_out;
11400         }
11401
11402         global_info = info;
11403         root = info->fs_root;
11404
11405         /*
11406          * repair mode will force us to commit transaction which
11407          * will make us fail to load log tree when mounting.
11408          */
11409         if (repair && btrfs_super_log_root(info->super_copy)) {
11410                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11411                 if (!ret) {
11412                         ret = 1;
11413                         goto close_out;
11414                 }
11415                 ret = zero_log_tree(root);
11416                 if (ret) {
11417                         error("failed to zero log tree: %d", ret);
11418                         goto close_out;
11419                 }
11420         }
11421
11422         uuid_unparse(info->super_copy->fsid, uuidbuf);
11423         if (qgroup_report) {
11424                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11425                        uuidbuf);
11426                 ret = qgroup_verify_all(info);
11427                 if (ret == 0)
11428                         report_qgroups(1);
11429                 goto close_out;
11430         }
11431         if (subvolid) {
11432                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11433                        subvolid, argv[optind], uuidbuf);
11434                 ret = print_extent_state(info, subvolid);
11435                 goto close_out;
11436         }
11437         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11438
11439         if (!extent_buffer_uptodate(info->tree_root->node) ||
11440             !extent_buffer_uptodate(info->dev_root->node) ||
11441             !extent_buffer_uptodate(info->chunk_root->node)) {
11442                 error("critical roots corrupted, unable to check the filesystem");
11443                 ret = -EIO;
11444                 goto close_out;
11445         }
11446
11447         if (init_extent_tree || init_csum_tree) {
11448                 struct btrfs_trans_handle *trans;
11449
11450                 trans = btrfs_start_transaction(info->extent_root, 0);
11451                 if (IS_ERR(trans)) {
11452                         error("error starting transaction");
11453                         ret = PTR_ERR(trans);
11454                         goto close_out;
11455                 }
11456
11457                 if (init_extent_tree) {
11458                         printf("Creating a new extent tree\n");
11459                         ret = reinit_extent_tree(trans, info);
11460                         if (ret)
11461                                 goto close_out;
11462                 }
11463
11464                 if (init_csum_tree) {
11465                         printf("Reinitialize checksum tree\n");
11466                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11467                         if (ret) {
11468                                 error("checksum tree initialization failed: %d",
11469                                                 ret);
11470                                 ret = -EIO;
11471                                 goto close_out;
11472                         }
11473
11474                         ret = fill_csum_tree(trans, info->csum_root,
11475                                              init_extent_tree);
11476                         if (ret) {
11477                                 error("checksum tree refilling failed: %d", ret);
11478                                 return -EIO;
11479                         }
11480                 }
11481                 /*
11482                  * Ok now we commit and run the normal fsck, which will add
11483                  * extent entries for all of the items it finds.
11484                  */
11485                 ret = btrfs_commit_transaction(trans, info->extent_root);
11486                 if (ret)
11487                         goto close_out;
11488         }
11489         if (!extent_buffer_uptodate(info->extent_root->node)) {
11490                 error("critical: extent_root, unable to check the filesystem");
11491                 ret = -EIO;
11492                 goto close_out;
11493         }
11494         if (!extent_buffer_uptodate(info->csum_root->node)) {
11495                 error("critical: csum_root, unable to check the filesystem");
11496                 ret = -EIO;
11497                 goto close_out;
11498         }
11499
11500         if (!ctx.progress_enabled)
11501                 printf("checking extents");
11502         if (check_mode == CHECK_MODE_LOWMEM)
11503                 ret = check_chunks_and_extents_v2(root);
11504         else
11505                 ret = check_chunks_and_extents(root);
11506         if (ret)
11507                 printf("Errors found in extent allocation tree or chunk allocation");
11508
11509         ret = repair_root_items(info);
11510         if (ret < 0)
11511                 goto close_out;
11512         if (repair) {
11513                 fprintf(stderr, "Fixed %d roots.\n", ret);
11514                 ret = 0;
11515         } else if (ret > 0) {
11516                 fprintf(stderr,
11517                        "Found %d roots with an outdated root item.\n",
11518                        ret);
11519                 fprintf(stderr,
11520                         "Please run a filesystem check with the option --repair to fix them.\n");
11521                 ret = 1;
11522                 goto close_out;
11523         }
11524
11525         if (!ctx.progress_enabled) {
11526                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11527                         fprintf(stderr, "checking free space tree\n");
11528                 else
11529                         fprintf(stderr, "checking free space cache\n");
11530         }
11531         ret = check_space_cache(root);
11532         if (ret)
11533                 goto out;
11534
11535         /*
11536          * We used to have to have these hole extents in between our real
11537          * extents so if we don't have this flag set we need to make sure there
11538          * are no gaps in the file extents for inodes, otherwise we can just
11539          * ignore it when this happens.
11540          */
11541         no_holes = btrfs_fs_incompat(root->fs_info,
11542                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11543         if (!ctx.progress_enabled)
11544                 fprintf(stderr, "checking fs roots\n");
11545         ret = check_fs_roots(root, &root_cache);
11546         if (ret)
11547                 goto out;
11548
11549         fprintf(stderr, "checking csums\n");
11550         ret = check_csums(root);
11551         if (ret)
11552                 goto out;
11553
11554         fprintf(stderr, "checking root refs\n");
11555         ret = check_root_refs(root, &root_cache);
11556         if (ret)
11557                 goto out;
11558
11559         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11560                 struct extent_buffer *eb;
11561
11562                 eb = list_first_entry(&root->fs_info->recow_ebs,
11563                                       struct extent_buffer, recow);
11564                 list_del_init(&eb->recow);
11565                 ret = recow_extent_buffer(root, eb);
11566                 if (ret)
11567                         break;
11568         }
11569
11570         while (!list_empty(&delete_items)) {
11571                 struct bad_item *bad;
11572
11573                 bad = list_first_entry(&delete_items, struct bad_item, list);
11574                 list_del_init(&bad->list);
11575                 if (repair)
11576                         ret = delete_bad_item(root, bad);
11577                 free(bad);
11578         }
11579
11580         if (info->quota_enabled) {
11581                 int err;
11582                 fprintf(stderr, "checking quota groups\n");
11583                 err = qgroup_verify_all(info);
11584                 if (err)
11585                         goto out;
11586                 report_qgroups(0);
11587                 err = repair_qgroups(info, &qgroups_repaired);
11588                 if (err)
11589                         goto out;
11590         }
11591
11592         if (!list_empty(&root->fs_info->recow_ebs)) {
11593                 error("transid errors in file system");
11594                 ret = 1;
11595         }
11596 out:
11597         /* Don't override original ret */
11598         if (!ret && qgroups_repaired)
11599                 ret = qgroups_repaired;
11600
11601         if (found_old_backref) { /*
11602                  * there was a disk format change when mixed
11603                  * backref was in testing tree. The old format
11604                  * existed about one week.
11605                  */
11606                 printf("\n * Found old mixed backref format. "
11607                        "The old format is not supported! *"
11608                        "\n * Please mount the FS in readonly mode, "
11609                        "backup data and re-format the FS. *\n\n");
11610                 ret = 1;
11611         }
11612         printf("found %llu bytes used err is %d\n",
11613                (unsigned long long)bytes_used, ret);
11614         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11615         printf("total tree bytes: %llu\n",
11616                (unsigned long long)total_btree_bytes);
11617         printf("total fs tree bytes: %llu\n",
11618                (unsigned long long)total_fs_tree_bytes);
11619         printf("total extent tree bytes: %llu\n",
11620                (unsigned long long)total_extent_tree_bytes);
11621         printf("btree space waste bytes: %llu\n",
11622                (unsigned long long)btree_space_waste);
11623         printf("file data blocks allocated: %llu\n referenced %llu\n",
11624                 (unsigned long long)data_bytes_allocated,
11625                 (unsigned long long)data_bytes_referenced);
11626
11627         free_qgroup_counts();
11628         free_root_recs_tree(&root_cache);
11629 close_out:
11630         close_ctree(root);
11631 err_out:
11632         if (ctx.progress_enabled)
11633                 task_deinit(ctx.info);
11634
11635         return ret;
11636 }