btrfs-progs: fsck: Check drop level before walking through fs tree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct rb_node node;
88         unsigned int is_data:1;
89         unsigned int found_extent_tree:1;
90         unsigned int full_backref:1;
91         unsigned int found_ref:1;
92         unsigned int broken:1;
93 };
94
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
96 {
97         return rb_entry(node, struct extent_backref, node);
98 }
99
100 struct data_backref {
101         struct extent_backref node;
102         union {
103                 u64 parent;
104                 u64 root;
105         };
106         u64 owner;
107         u64 offset;
108         u64 disk_bytenr;
109         u64 bytes;
110         u64 ram_bytes;
111         u32 num_refs;
112         u32 found_ref;
113 };
114
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
116 {
117         return container_of(back, struct data_backref, node);
118 }
119
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
121 {
122         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124         struct data_backref *back1 = to_data_backref(ext1);
125         struct data_backref *back2 = to_data_backref(ext2);
126
127         WARN_ON(!ext1->is_data);
128         WARN_ON(!ext2->is_data);
129
130         /* parent and root are a union, so this covers both */
131         if (back1->parent > back2->parent)
132                 return 1;
133         if (back1->parent < back2->parent)
134                 return -1;
135
136         /* This is a full backref and the parents match. */
137         if (back1->node.full_backref)
138                 return 0;
139
140         if (back1->owner > back2->owner)
141                 return 1;
142         if (back1->owner < back2->owner)
143                 return -1;
144
145         if (back1->offset > back2->offset)
146                 return 1;
147         if (back1->offset < back2->offset)
148                 return -1;
149
150         if (back1->bytes > back2->bytes)
151                 return 1;
152         if (back1->bytes < back2->bytes)
153                 return -1;
154
155         if (back1->found_ref && back2->found_ref) {
156                 if (back1->disk_bytenr > back2->disk_bytenr)
157                         return 1;
158                 if (back1->disk_bytenr < back2->disk_bytenr)
159                         return -1;
160
161                 if (back1->found_ref > back2->found_ref)
162                         return 1;
163                 if (back1->found_ref < back2->found_ref)
164                         return -1;
165         }
166
167         return 0;
168 }
169
170 /*
171  * Much like data_backref, just removed the undetermined members
172  * and change it to use list_head.
173  * During extent scan, it is stored in root->orphan_data_extent.
174  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
175  */
176 struct orphan_data_extent {
177         struct list_head list;
178         u64 root;
179         u64 objectid;
180         u64 offset;
181         u64 disk_bytenr;
182         u64 disk_len;
183 };
184
185 struct tree_backref {
186         struct extent_backref node;
187         union {
188                 u64 parent;
189                 u64 root;
190         };
191 };
192
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
194 {
195         return container_of(back, struct tree_backref, node);
196 }
197
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
199 {
200         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202         struct tree_backref *back1 = to_tree_backref(ext1);
203         struct tree_backref *back2 = to_tree_backref(ext2);
204
205         WARN_ON(ext1->is_data);
206         WARN_ON(ext2->is_data);
207
208         /* parent and root are a union, so this covers both */
209         if (back1->parent > back2->parent)
210                 return 1;
211         if (back1->parent < back2->parent)
212                 return -1;
213
214         return 0;
215 }
216
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
218 {
219         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
221
222         if (ext1->is_data > ext2->is_data)
223                 return 1;
224
225         if (ext1->is_data < ext2->is_data)
226                 return -1;
227
228         if (ext1->full_backref > ext2->full_backref)
229                 return 1;
230         if (ext1->full_backref < ext2->full_backref)
231                 return -1;
232
233         if (ext1->is_data)
234                 return compare_data_backref(node1, node2);
235         else
236                 return compare_tree_backref(node1, node2);
237 }
238
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
241
242 struct extent_record {
243         struct list_head backrefs;
244         struct list_head dups;
245         struct rb_root backref_tree;
246         struct list_head list;
247         struct cache_extent cache;
248         struct btrfs_disk_key parent_key;
249         u64 start;
250         u64 max_size;
251         u64 nr;
252         u64 refs;
253         u64 extent_item_refs;
254         u64 generation;
255         u64 parent_generation;
256         u64 info_objectid;
257         u32 num_duplicates;
258         u8 info_level;
259         unsigned int flag_block_full_backref:2;
260         unsigned int found_rec:1;
261         unsigned int content_checked:1;
262         unsigned int owner_ref_checked:1;
263         unsigned int is_root:1;
264         unsigned int metadata:1;
265         unsigned int bad_full_backref:1;
266         unsigned int crossing_stripes:1;
267         unsigned int wrong_chunk_type:1;
268 };
269
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
271 {
272         return container_of(entry, struct extent_record, list);
273 }
274
275 struct inode_backref {
276         struct list_head list;
277         unsigned int found_dir_item:1;
278         unsigned int found_dir_index:1;
279         unsigned int found_inode_ref:1;
280         unsigned int filetype:8;
281         int errors;
282         unsigned int ref_type;
283         u64 dir;
284         u64 index;
285         u16 namelen;
286         char name[0];
287 };
288
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
290 {
291         return list_entry(entry, struct inode_backref, list);
292 }
293
294 struct root_item_record {
295         struct list_head list;
296         u64 objectid;
297         u64 bytenr;
298         u64 last_snapshot;
299         u8 level;
300         u8 drop_level;
301         int level_size;
302         struct btrfs_key drop_key;
303 };
304
305 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
307 #define REF_ERR_NO_INODE_REF            (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
310 #define REF_ERR_DUP_INODE_REF           (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF             (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
318
319 struct file_extent_hole {
320         struct rb_node node;
321         u64 start;
322         u64 len;
323 };
324
325 struct inode_record {
326         struct list_head backrefs;
327         unsigned int checked:1;
328         unsigned int merging:1;
329         unsigned int found_inode_item:1;
330         unsigned int found_dir_item:1;
331         unsigned int found_file_extent:1;
332         unsigned int found_csum_item:1;
333         unsigned int some_csum_missing:1;
334         unsigned int nodatasum:1;
335         int errors;
336
337         u64 ino;
338         u32 nlink;
339         u32 imode;
340         u64 isize;
341         u64 nbytes;
342
343         u32 found_link;
344         u64 found_size;
345         u64 extent_start;
346         u64 extent_end;
347         struct rb_root holes;
348         struct list_head orphan_extents;
349
350         u32 refs;
351 };
352
353 #define I_ERR_NO_INODE_ITEM             (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
368
369 struct root_backref {
370         struct list_head list;
371         unsigned int found_dir_item:1;
372         unsigned int found_dir_index:1;
373         unsigned int found_back_ref:1;
374         unsigned int found_forward_ref:1;
375         unsigned int reachable:1;
376         int errors;
377         u64 ref_root;
378         u64 dir;
379         u64 index;
380         u16 namelen;
381         char name[0];
382 };
383
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
385 {
386         return list_entry(entry, struct root_backref, list);
387 }
388
389 struct root_record {
390         struct list_head backrefs;
391         struct cache_extent cache;
392         unsigned int found_root_item:1;
393         u64 objectid;
394         u32 found_ref;
395 };
396
397 struct ptr_node {
398         struct cache_extent cache;
399         void *data;
400 };
401
402 struct shared_node {
403         struct cache_extent cache;
404         struct cache_tree root_cache;
405         struct cache_tree inode_cache;
406         struct inode_record *current;
407         u32 refs;
408 };
409
410 struct block_info {
411         u64 start;
412         u32 size;
413 };
414
415 struct walk_control {
416         struct cache_tree shared;
417         struct shared_node *nodes[BTRFS_MAX_LEVEL];
418         int active_node;
419         int root_level;
420 };
421
422 struct bad_item {
423         struct btrfs_key key;
424         u64 root_id;
425         struct list_head list;
426 };
427
428 struct extent_entry {
429         u64 bytenr;
430         u64 bytes;
431         int count;
432         int broken;
433         struct list_head list;
434 };
435
436 struct root_item_info {
437         /* level of the root */
438         u8 level;
439         /* number of nodes at this level, must be 1 for a root */
440         int node_count;
441         u64 bytenr;
442         u64 gen;
443         struct cache_extent cache_extent;
444 };
445
446 /*
447  * Error bit for low memory mode check.
448  *
449  * Currently no caller cares about it yet.  Just internal use for error
450  * classification.
451  */
452 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH     (1 << 8)
462
463 static void *print_status_check(void *p)
464 {
465         struct task_ctx *priv = p;
466         const char work_indicator[] = { '.', 'o', 'O', 'o' };
467         uint32_t count = 0;
468         static char *task_position_string[] = {
469                 "checking extents",
470                 "checking free space cache",
471                 "checking fs roots",
472         };
473
474         task_period_start(priv->info, 1000 /* 1s */);
475
476         if (priv->tp == TASK_NOTHING)
477                 return NULL;
478
479         while (1) {
480                 printf("%s [%c]\r", task_position_string[priv->tp],
481                                 work_indicator[count % 4]);
482                 count++;
483                 fflush(stdout);
484                 task_period_wait(priv->info);
485         }
486         return NULL;
487 }
488
489 static int print_status_return(void *p)
490 {
491         printf("\n");
492         fflush(stdout);
493
494         return 0;
495 }
496
497 static enum btrfs_check_mode parse_check_mode(const char *str)
498 {
499         if (strcmp(str, "lowmem") == 0)
500                 return CHECK_MODE_LOWMEM;
501         if (strcmp(str, "orig") == 0)
502                 return CHECK_MODE_ORIGINAL;
503         if (strcmp(str, "original") == 0)
504                 return CHECK_MODE_ORIGINAL;
505
506         return CHECK_MODE_UNKNOWN;
507 }
508
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
511 {
512         struct file_extent_hole *hole;
513
514         if (RB_EMPTY_ROOT(holes))
515                 return (u64)-1;
516
517         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
518         return hole->start;
519 }
520
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
522 {
523         struct file_extent_hole *hole1;
524         struct file_extent_hole *hole2;
525
526         hole1 = rb_entry(node1, struct file_extent_hole, node);
527         hole2 = rb_entry(node2, struct file_extent_hole, node);
528
529         if (hole1->start > hole2->start)
530                 return -1;
531         if (hole1->start < hole2->start)
532                 return 1;
533         /* Now hole1->start == hole2->start */
534         if (hole1->len >= hole2->len)
535                 /*
536                  * Hole 1 will be merge center
537                  * Same hole will be merged later
538                  */
539                 return -1;
540         /* Hole 2 will be merge center */
541         return 1;
542 }
543
544 /*
545  * Add a hole to the record
546  *
547  * This will do hole merge for copy_file_extent_holes(),
548  * which will ensure there won't be continuous holes.
549  */
550 static int add_file_extent_hole(struct rb_root *holes,
551                                 u64 start, u64 len)
552 {
553         struct file_extent_hole *hole;
554         struct file_extent_hole *prev = NULL;
555         struct file_extent_hole *next = NULL;
556
557         hole = malloc(sizeof(*hole));
558         if (!hole)
559                 return -ENOMEM;
560         hole->start = start;
561         hole->len = len;
562         /* Since compare will not return 0, no -EEXIST will happen */
563         rb_insert(holes, &hole->node, compare_hole);
564
565         /* simple merge with previous hole */
566         if (rb_prev(&hole->node))
567                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
568                                 node);
569         if (prev && prev->start + prev->len >= hole->start) {
570                 hole->len = hole->start + hole->len - prev->start;
571                 hole->start = prev->start;
572                 rb_erase(&prev->node, holes);
573                 free(prev);
574                 prev = NULL;
575         }
576
577         /* iterate merge with next holes */
578         while (1) {
579                 if (!rb_next(&hole->node))
580                         break;
581                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
582                                         node);
583                 if (hole->start + hole->len >= next->start) {
584                         if (hole->start + hole->len <= next->start + next->len)
585                                 hole->len = next->start + next->len -
586                                             hole->start;
587                         rb_erase(&next->node, holes);
588                         free(next);
589                         next = NULL;
590                 } else
591                         break;
592         }
593         return 0;
594 }
595
596 static int compare_hole_range(struct rb_node *node, void *data)
597 {
598         struct file_extent_hole *hole;
599         u64 start;
600
601         hole = (struct file_extent_hole *)data;
602         start = hole->start;
603
604         hole = rb_entry(node, struct file_extent_hole, node);
605         if (start < hole->start)
606                 return -1;
607         if (start >= hole->start && start < hole->start + hole->len)
608                 return 0;
609         return 1;
610 }
611
612 /*
613  * Delete a hole in the record
614  *
615  * This will do the hole split and is much restrict than add.
616  */
617 static int del_file_extent_hole(struct rb_root *holes,
618                                 u64 start, u64 len)
619 {
620         struct file_extent_hole *hole;
621         struct file_extent_hole tmp;
622         u64 prev_start = 0;
623         u64 prev_len = 0;
624         u64 next_start = 0;
625         u64 next_len = 0;
626         struct rb_node *node;
627         int have_prev = 0;
628         int have_next = 0;
629         int ret = 0;
630
631         tmp.start = start;
632         tmp.len = len;
633         node = rb_search(holes, &tmp, compare_hole_range, NULL);
634         if (!node)
635                 return -EEXIST;
636         hole = rb_entry(node, struct file_extent_hole, node);
637         if (start + len > hole->start + hole->len)
638                 return -EEXIST;
639
640         /*
641          * Now there will be no overlap, delete the hole and re-add the
642          * split(s) if they exists.
643          */
644         if (start > hole->start) {
645                 prev_start = hole->start;
646                 prev_len = start - hole->start;
647                 have_prev = 1;
648         }
649         if (hole->start + hole->len > start + len) {
650                 next_start = start + len;
651                 next_len = hole->start + hole->len - start - len;
652                 have_next = 1;
653         }
654         rb_erase(node, holes);
655         free(hole);
656         if (have_prev) {
657                 ret = add_file_extent_hole(holes, prev_start, prev_len);
658                 if (ret < 0)
659                         return ret;
660         }
661         if (have_next) {
662                 ret = add_file_extent_hole(holes, next_start, next_len);
663                 if (ret < 0)
664                         return ret;
665         }
666         return 0;
667 }
668
669 static int copy_file_extent_holes(struct rb_root *dst,
670                                   struct rb_root *src)
671 {
672         struct file_extent_hole *hole;
673         struct rb_node *node;
674         int ret = 0;
675
676         node = rb_first(src);
677         while (node) {
678                 hole = rb_entry(node, struct file_extent_hole, node);
679                 ret = add_file_extent_hole(dst, hole->start, hole->len);
680                 if (ret)
681                         break;
682                 node = rb_next(node);
683         }
684         return ret;
685 }
686
687 static void free_file_extent_holes(struct rb_root *holes)
688 {
689         struct rb_node *node;
690         struct file_extent_hole *hole;
691
692         node = rb_first(holes);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 rb_erase(node, holes);
696                 free(hole);
697                 node = rb_first(holes);
698         }
699 }
700
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
702
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704                                  struct btrfs_root *root)
705 {
706         if (root->last_trans != trans->transid) {
707                 root->track_dirty = 1;
708                 root->last_trans = trans->transid;
709                 root->commit_root = root->node;
710                 extent_buffer_get(root->node);
711         }
712 }
713
714 static u8 imode_to_type(u32 imode)
715 {
716 #define S_SHIFT 12
717         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
719                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
720                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
721                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
722                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
723                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
724                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
725         };
726
727         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
728 #undef S_SHIFT
729 }
730
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
732 {
733         struct device_record *rec1;
734         struct device_record *rec2;
735
736         rec1 = rb_entry(node1, struct device_record, node);
737         rec2 = rb_entry(node2, struct device_record, node);
738         if (rec1->devid > rec2->devid)
739                 return -1;
740         else if (rec1->devid < rec2->devid)
741                 return 1;
742         else
743                 return 0;
744 }
745
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
747 {
748         struct inode_record *rec;
749         struct inode_backref *backref;
750         struct inode_backref *orig;
751         struct inode_backref *tmp;
752         struct orphan_data_extent *src_orphan;
753         struct orphan_data_extent *dst_orphan;
754         size_t size;
755         int ret;
756
757         rec = malloc(sizeof(*rec));
758         if (!rec)
759                 return ERR_PTR(-ENOMEM);
760         memcpy(rec, orig_rec, sizeof(*rec));
761         rec->refs = 1;
762         INIT_LIST_HEAD(&rec->backrefs);
763         INIT_LIST_HEAD(&rec->orphan_extents);
764         rec->holes = RB_ROOT;
765
766         list_for_each_entry(orig, &orig_rec->backrefs, list) {
767                 size = sizeof(*orig) + orig->namelen + 1;
768                 backref = malloc(size);
769                 if (!backref) {
770                         ret = -ENOMEM;
771                         goto cleanup;
772                 }
773                 memcpy(backref, orig, size);
774                 list_add_tail(&backref->list, &rec->backrefs);
775         }
776         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777                 dst_orphan = malloc(sizeof(*dst_orphan));
778                 if (!dst_orphan) {
779                         ret = -ENOMEM;
780                         goto cleanup;
781                 }
782                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
784         }
785         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
786         BUG_ON(ret < 0);
787
788         return rec;
789
790 cleanup:
791         if (!list_empty(&rec->backrefs))
792                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793                         list_del(&orig->list);
794                         free(orig);
795                 }
796
797         if (!list_empty(&rec->orphan_extents))
798                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799                         list_del(&orig->list);
800                         free(orig);
801                 }
802
803         free(rec);
804
805         return ERR_PTR(ret);
806 }
807
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
809                                       u64 objectid)
810 {
811         struct orphan_data_extent *orphan;
812
813         if (list_empty(orphan_extents))
814                 return;
815         printf("The following data extent is lost in tree %llu:\n",
816                objectid);
817         list_for_each_entry(orphan, orphan_extents, list) {
818                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
820                        orphan->disk_len);
821         }
822 }
823
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
825 {
826         u64 root_objectid = root->root_key.objectid;
827         int errors = rec->errors;
828
829         if (!errors)
830                 return;
831         /* reloc root errors, we print its corresponding fs root objectid*/
832         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833                 root_objectid = root->root_key.offset;
834                 fprintf(stderr, "reloc");
835         }
836         fprintf(stderr, "root %llu inode %llu errors %x",
837                 (unsigned long long) root_objectid,
838                 (unsigned long long) rec->ino, rec->errors);
839
840         if (errors & I_ERR_NO_INODE_ITEM)
841                 fprintf(stderr, ", no inode item");
842         if (errors & I_ERR_NO_ORPHAN_ITEM)
843                 fprintf(stderr, ", no orphan item");
844         if (errors & I_ERR_DUP_INODE_ITEM)
845                 fprintf(stderr, ", dup inode item");
846         if (errors & I_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & I_ERR_ODD_DIR_ITEM)
849                 fprintf(stderr, ", odd dir item");
850         if (errors & I_ERR_ODD_FILE_EXTENT)
851                 fprintf(stderr, ", odd file extent");
852         if (errors & I_ERR_BAD_FILE_EXTENT)
853                 fprintf(stderr, ", bad file extent");
854         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855                 fprintf(stderr, ", file extent overlap");
856         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857                 fprintf(stderr, ", file extent discount");
858         if (errors & I_ERR_DIR_ISIZE_WRONG)
859                 fprintf(stderr, ", dir isize wrong");
860         if (errors & I_ERR_FILE_NBYTES_WRONG)
861                 fprintf(stderr, ", nbytes wrong");
862         if (errors & I_ERR_ODD_CSUM_ITEM)
863                 fprintf(stderr, ", odd csum item");
864         if (errors & I_ERR_SOME_CSUM_MISSING)
865                 fprintf(stderr, ", some csum missing");
866         if (errors & I_ERR_LINK_COUNT_WRONG)
867                 fprintf(stderr, ", link count wrong");
868         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869                 fprintf(stderr, ", orphan file extent");
870         fprintf(stderr, "\n");
871         /* Print the orphan extents if needed */
872         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
874
875         /* Print the holes if needed */
876         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877                 struct file_extent_hole *hole;
878                 struct rb_node *node;
879                 int found = 0;
880
881                 node = rb_first(&rec->holes);
882                 fprintf(stderr, "Found file extent holes:\n");
883                 while (node) {
884                         found = 1;
885                         hole = rb_entry(node, struct file_extent_hole, node);
886                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
887                                 hole->start, hole->len);
888                         node = rb_next(node);
889                 }
890                 if (!found)
891                         fprintf(stderr, "\tstart: 0, len: %llu\n",
892                                 round_up(rec->isize, root->sectorsize));
893         }
894 }
895
896 static void print_ref_error(int errors)
897 {
898         if (errors & REF_ERR_NO_DIR_ITEM)
899                 fprintf(stderr, ", no dir item");
900         if (errors & REF_ERR_NO_DIR_INDEX)
901                 fprintf(stderr, ", no dir index");
902         if (errors & REF_ERR_NO_INODE_REF)
903                 fprintf(stderr, ", no inode ref");
904         if (errors & REF_ERR_DUP_DIR_ITEM)
905                 fprintf(stderr, ", dup dir item");
906         if (errors & REF_ERR_DUP_DIR_INDEX)
907                 fprintf(stderr, ", dup dir index");
908         if (errors & REF_ERR_DUP_INODE_REF)
909                 fprintf(stderr, ", dup inode ref");
910         if (errors & REF_ERR_INDEX_UNMATCH)
911                 fprintf(stderr, ", index mismatch");
912         if (errors & REF_ERR_FILETYPE_UNMATCH)
913                 fprintf(stderr, ", filetype mismatch");
914         if (errors & REF_ERR_NAME_TOO_LONG)
915                 fprintf(stderr, ", name too long");
916         if (errors & REF_ERR_NO_ROOT_REF)
917                 fprintf(stderr, ", no root ref");
918         if (errors & REF_ERR_NO_ROOT_BACKREF)
919                 fprintf(stderr, ", no root backref");
920         if (errors & REF_ERR_DUP_ROOT_REF)
921                 fprintf(stderr, ", dup root ref");
922         if (errors & REF_ERR_DUP_ROOT_BACKREF)
923                 fprintf(stderr, ", dup root backref");
924         fprintf(stderr, "\n");
925 }
926
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
928                                           u64 ino, int mod)
929 {
930         struct ptr_node *node;
931         struct cache_extent *cache;
932         struct inode_record *rec = NULL;
933         int ret;
934
935         cache = lookup_cache_extent(inode_cache, ino, 1);
936         if (cache) {
937                 node = container_of(cache, struct ptr_node, cache);
938                 rec = node->data;
939                 if (mod && rec->refs > 1) {
940                         node->data = clone_inode_rec(rec);
941                         if (IS_ERR(node->data))
942                                 return node->data;
943                         rec->refs--;
944                         rec = node->data;
945                 }
946         } else if (mod) {
947                 rec = calloc(1, sizeof(*rec));
948                 if (!rec)
949                         return ERR_PTR(-ENOMEM);
950                 rec->ino = ino;
951                 rec->extent_start = (u64)-1;
952                 rec->refs = 1;
953                 INIT_LIST_HEAD(&rec->backrefs);
954                 INIT_LIST_HEAD(&rec->orphan_extents);
955                 rec->holes = RB_ROOT;
956
957                 node = malloc(sizeof(*node));
958                 if (!node) {
959                         free(rec);
960                         return ERR_PTR(-ENOMEM);
961                 }
962                 node->cache.start = ino;
963                 node->cache.size = 1;
964                 node->data = rec;
965
966                 if (ino == BTRFS_FREE_INO_OBJECTID)
967                         rec->found_link = 1;
968
969                 ret = insert_cache_extent(inode_cache, &node->cache);
970                 if (ret)
971                         return ERR_PTR(-EEXIST);
972         }
973         return rec;
974 }
975
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
977 {
978         struct orphan_data_extent *orphan;
979
980         while (!list_empty(orphan_extents)) {
981                 orphan = list_entry(orphan_extents->next,
982                                     struct orphan_data_extent, list);
983                 list_del(&orphan->list);
984                 free(orphan);
985         }
986 }
987
988 static void free_inode_rec(struct inode_record *rec)
989 {
990         struct inode_backref *backref;
991
992         if (--rec->refs > 0)
993                 return;
994
995         while (!list_empty(&rec->backrefs)) {
996                 backref = to_inode_backref(rec->backrefs.next);
997                 list_del(&backref->list);
998                 free(backref);
999         }
1000         free_orphan_data_extents(&rec->orphan_extents);
1001         free_file_extent_holes(&rec->holes);
1002         free(rec);
1003 }
1004
1005 static int can_free_inode_rec(struct inode_record *rec)
1006 {
1007         if (!rec->errors && rec->checked && rec->found_inode_item &&
1008             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1009                 return 1;
1010         return 0;
1011 }
1012
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014                                  struct inode_record *rec)
1015 {
1016         struct cache_extent *cache;
1017         struct inode_backref *tmp, *backref;
1018         struct ptr_node *node;
1019         unsigned char filetype;
1020
1021         if (!rec->found_inode_item)
1022                 return;
1023
1024         filetype = imode_to_type(rec->imode);
1025         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026                 if (backref->found_dir_item && backref->found_dir_index) {
1027                         if (backref->filetype != filetype)
1028                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029                         if (!backref->errors && backref->found_inode_ref &&
1030                             rec->nlink == rec->found_link) {
1031                                 list_del(&backref->list);
1032                                 free(backref);
1033                         }
1034                 }
1035         }
1036
1037         if (!rec->checked || rec->merging)
1038                 return;
1039
1040         if (S_ISDIR(rec->imode)) {
1041                 if (rec->found_size != rec->isize)
1042                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043                 if (rec->found_file_extent)
1044                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046                 if (rec->found_dir_item)
1047                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1048                 if (rec->found_size != rec->nbytes)
1049                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050                 if (rec->nlink > 0 && !no_holes &&
1051                     (rec->extent_end < rec->isize ||
1052                      first_extent_gap(&rec->holes) < rec->isize))
1053                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1054         }
1055
1056         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057                 if (rec->found_csum_item && rec->nodatasum)
1058                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059                 if (rec->some_csum_missing && !rec->nodatasum)
1060                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1061         }
1062
1063         BUG_ON(rec->refs != 1);
1064         if (can_free_inode_rec(rec)) {
1065                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066                 node = container_of(cache, struct ptr_node, cache);
1067                 BUG_ON(node->data != rec);
1068                 remove_cache_extent(inode_cache, &node->cache);
1069                 free(node);
1070                 free_inode_rec(rec);
1071         }
1072 }
1073
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1075 {
1076         struct btrfs_path path;
1077         struct btrfs_key key;
1078         int ret;
1079
1080         key.objectid = BTRFS_ORPHAN_OBJECTID;
1081         key.type = BTRFS_ORPHAN_ITEM_KEY;
1082         key.offset = ino;
1083
1084         btrfs_init_path(&path);
1085         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086         btrfs_release_path(&path);
1087         if (ret > 0)
1088                 ret = -ENOENT;
1089         return ret;
1090 }
1091
1092 static int process_inode_item(struct extent_buffer *eb,
1093                               int slot, struct btrfs_key *key,
1094                               struct shared_node *active_node)
1095 {
1096         struct inode_record *rec;
1097         struct btrfs_inode_item *item;
1098
1099         rec = active_node->current;
1100         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101         if (rec->found_inode_item) {
1102                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1103                 return 1;
1104         }
1105         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106         rec->nlink = btrfs_inode_nlink(eb, item);
1107         rec->isize = btrfs_inode_size(eb, item);
1108         rec->nbytes = btrfs_inode_nbytes(eb, item);
1109         rec->imode = btrfs_inode_mode(eb, item);
1110         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1111                 rec->nodatasum = 1;
1112         rec->found_inode_item = 1;
1113         if (rec->nlink == 0)
1114                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115         maybe_free_inode_rec(&active_node->inode_cache, rec);
1116         return 0;
1117 }
1118
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1120                                                 const char *name,
1121                                                 int namelen, u64 dir)
1122 {
1123         struct inode_backref *backref;
1124
1125         list_for_each_entry(backref, &rec->backrefs, list) {
1126                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1127                         break;
1128                 if (backref->dir != dir || backref->namelen != namelen)
1129                         continue;
1130                 if (memcmp(name, backref->name, namelen))
1131                         continue;
1132                 return backref;
1133         }
1134
1135         backref = malloc(sizeof(*backref) + namelen + 1);
1136         if (!backref)
1137                 return NULL;
1138         memset(backref, 0, sizeof(*backref));
1139         backref->dir = dir;
1140         backref->namelen = namelen;
1141         memcpy(backref->name, name, namelen);
1142         backref->name[namelen] = '\0';
1143         list_add_tail(&backref->list, &rec->backrefs);
1144         return backref;
1145 }
1146
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148                              u64 ino, u64 dir, u64 index,
1149                              const char *name, int namelen,
1150                              int filetype, int itemtype, int errors)
1151 {
1152         struct inode_record *rec;
1153         struct inode_backref *backref;
1154
1155         rec = get_inode_rec(inode_cache, ino, 1);
1156         BUG_ON(IS_ERR(rec));
1157         backref = get_inode_backref(rec, name, namelen, dir);
1158         BUG_ON(!backref);
1159         if (errors)
1160                 backref->errors |= errors;
1161         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162                 if (backref->found_dir_index)
1163                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164                 if (backref->found_inode_ref && backref->index != index)
1165                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1166                 if (backref->found_dir_item && backref->filetype != filetype)
1167                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1168
1169                 backref->index = index;
1170                 backref->filetype = filetype;
1171                 backref->found_dir_index = 1;
1172         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1173                 rec->found_link++;
1174                 if (backref->found_dir_item)
1175                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176                 if (backref->found_dir_index && backref->filetype != filetype)
1177                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1178
1179                 backref->filetype = filetype;
1180                 backref->found_dir_item = 1;
1181         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183                 if (backref->found_inode_ref)
1184                         backref->errors |= REF_ERR_DUP_INODE_REF;
1185                 if (backref->found_dir_index && backref->index != index)
1186                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1187                 else
1188                         backref->index = index;
1189
1190                 backref->ref_type = itemtype;
1191                 backref->found_inode_ref = 1;
1192         } else {
1193                 BUG_ON(1);
1194         }
1195
1196         maybe_free_inode_rec(inode_cache, rec);
1197         return 0;
1198 }
1199
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201                             struct cache_tree *dst_cache)
1202 {
1203         struct inode_backref *backref;
1204         u32 dir_count = 0;
1205         int ret = 0;
1206
1207         dst->merging = 1;
1208         list_for_each_entry(backref, &src->backrefs, list) {
1209                 if (backref->found_dir_index) {
1210                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1211                                         backref->index, backref->name,
1212                                         backref->namelen, backref->filetype,
1213                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1214                 }
1215                 if (backref->found_dir_item) {
1216                         dir_count++;
1217                         add_inode_backref(dst_cache, dst->ino,
1218                                         backref->dir, 0, backref->name,
1219                                         backref->namelen, backref->filetype,
1220                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1221                 }
1222                 if (backref->found_inode_ref) {
1223                         add_inode_backref(dst_cache, dst->ino,
1224                                         backref->dir, backref->index,
1225                                         backref->name, backref->namelen, 0,
1226                                         backref->ref_type, backref->errors);
1227                 }
1228         }
1229
1230         if (src->found_dir_item)
1231                 dst->found_dir_item = 1;
1232         if (src->found_file_extent)
1233                 dst->found_file_extent = 1;
1234         if (src->found_csum_item)
1235                 dst->found_csum_item = 1;
1236         if (src->some_csum_missing)
1237                 dst->some_csum_missing = 1;
1238         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1240                 if (ret < 0)
1241                         return ret;
1242         }
1243
1244         BUG_ON(src->found_link < dir_count);
1245         dst->found_link += src->found_link - dir_count;
1246         dst->found_size += src->found_size;
1247         if (src->extent_start != (u64)-1) {
1248                 if (dst->extent_start == (u64)-1) {
1249                         dst->extent_start = src->extent_start;
1250                         dst->extent_end = src->extent_end;
1251                 } else {
1252                         if (dst->extent_end > src->extent_start)
1253                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254                         else if (dst->extent_end < src->extent_start) {
1255                                 ret = add_file_extent_hole(&dst->holes,
1256                                         dst->extent_end,
1257                                         src->extent_start - dst->extent_end);
1258                         }
1259                         if (dst->extent_end < src->extent_end)
1260                                 dst->extent_end = src->extent_end;
1261                 }
1262         }
1263
1264         dst->errors |= src->errors;
1265         if (src->found_inode_item) {
1266                 if (!dst->found_inode_item) {
1267                         dst->nlink = src->nlink;
1268                         dst->isize = src->isize;
1269                         dst->nbytes = src->nbytes;
1270                         dst->imode = src->imode;
1271                         dst->nodatasum = src->nodatasum;
1272                         dst->found_inode_item = 1;
1273                 } else {
1274                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1275                 }
1276         }
1277         dst->merging = 0;
1278
1279         return 0;
1280 }
1281
1282 static int splice_shared_node(struct shared_node *src_node,
1283                               struct shared_node *dst_node)
1284 {
1285         struct cache_extent *cache;
1286         struct ptr_node *node, *ins;
1287         struct cache_tree *src, *dst;
1288         struct inode_record *rec, *conflict;
1289         u64 current_ino = 0;
1290         int splice = 0;
1291         int ret;
1292
1293         if (--src_node->refs == 0)
1294                 splice = 1;
1295         if (src_node->current)
1296                 current_ino = src_node->current->ino;
1297
1298         src = &src_node->root_cache;
1299         dst = &dst_node->root_cache;
1300 again:
1301         cache = search_cache_extent(src, 0);
1302         while (cache) {
1303                 node = container_of(cache, struct ptr_node, cache);
1304                 rec = node->data;
1305                 cache = next_cache_extent(cache);
1306
1307                 if (splice) {
1308                         remove_cache_extent(src, &node->cache);
1309                         ins = node;
1310                 } else {
1311                         ins = malloc(sizeof(*ins));
1312                         BUG_ON(!ins);
1313                         ins->cache.start = node->cache.start;
1314                         ins->cache.size = node->cache.size;
1315                         ins->data = rec;
1316                         rec->refs++;
1317                 }
1318                 ret = insert_cache_extent(dst, &ins->cache);
1319                 if (ret == -EEXIST) {
1320                         conflict = get_inode_rec(dst, rec->ino, 1);
1321                         BUG_ON(IS_ERR(conflict));
1322                         merge_inode_recs(rec, conflict, dst);
1323                         if (rec->checked) {
1324                                 conflict->checked = 1;
1325                                 if (dst_node->current == conflict)
1326                                         dst_node->current = NULL;
1327                         }
1328                         maybe_free_inode_rec(dst, conflict);
1329                         free_inode_rec(rec);
1330                         free(ins);
1331                 } else {
1332                         BUG_ON(ret);
1333                 }
1334         }
1335
1336         if (src == &src_node->root_cache) {
1337                 src = &src_node->inode_cache;
1338                 dst = &dst_node->inode_cache;
1339                 goto again;
1340         }
1341
1342         if (current_ino > 0 && (!dst_node->current ||
1343             current_ino > dst_node->current->ino)) {
1344                 if (dst_node->current) {
1345                         dst_node->current->checked = 1;
1346                         maybe_free_inode_rec(dst, dst_node->current);
1347                 }
1348                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349                 BUG_ON(IS_ERR(dst_node->current));
1350         }
1351         return 0;
1352 }
1353
1354 static void free_inode_ptr(struct cache_extent *cache)
1355 {
1356         struct ptr_node *node;
1357         struct inode_record *rec;
1358
1359         node = container_of(cache, struct ptr_node, cache);
1360         rec = node->data;
1361         free_inode_rec(rec);
1362         free(node);
1363 }
1364
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1366
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1368                                             u64 bytenr)
1369 {
1370         struct cache_extent *cache;
1371         struct shared_node *node;
1372
1373         cache = lookup_cache_extent(shared, bytenr, 1);
1374         if (cache) {
1375                 node = container_of(cache, struct shared_node, cache);
1376                 return node;
1377         }
1378         return NULL;
1379 }
1380
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1382 {
1383         int ret;
1384         struct shared_node *node;
1385
1386         node = calloc(1, sizeof(*node));
1387         if (!node)
1388                 return -ENOMEM;
1389         node->cache.start = bytenr;
1390         node->cache.size = 1;
1391         cache_tree_init(&node->root_cache);
1392         cache_tree_init(&node->inode_cache);
1393         node->refs = refs;
1394
1395         ret = insert_cache_extent(shared, &node->cache);
1396
1397         return ret;
1398 }
1399
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401                              struct walk_control *wc, int level)
1402 {
1403         struct shared_node *node;
1404         struct shared_node *dest;
1405         int ret;
1406
1407         if (level == wc->active_node)
1408                 return 0;
1409
1410         BUG_ON(wc->active_node <= level);
1411         node = find_shared_node(&wc->shared, bytenr);
1412         if (!node) {
1413                 ret = add_shared_node(&wc->shared, bytenr, refs);
1414                 BUG_ON(ret);
1415                 node = find_shared_node(&wc->shared, bytenr);
1416                 wc->nodes[level] = node;
1417                 wc->active_node = level;
1418                 return 0;
1419         }
1420
1421         if (wc->root_level == wc->active_node &&
1422             btrfs_root_refs(&root->root_item) == 0) {
1423                 if (--node->refs == 0) {
1424                         free_inode_recs_tree(&node->root_cache);
1425                         free_inode_recs_tree(&node->inode_cache);
1426                         remove_cache_extent(&wc->shared, &node->cache);
1427                         free(node);
1428                 }
1429                 return 1;
1430         }
1431
1432         dest = wc->nodes[wc->active_node];
1433         splice_shared_node(node, dest);
1434         if (node->refs == 0) {
1435                 remove_cache_extent(&wc->shared, &node->cache);
1436                 free(node);
1437         }
1438         return 1;
1439 }
1440
1441 static int leave_shared_node(struct btrfs_root *root,
1442                              struct walk_control *wc, int level)
1443 {
1444         struct shared_node *node;
1445         struct shared_node *dest;
1446         int i;
1447
1448         if (level == wc->root_level)
1449                 return 0;
1450
1451         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1452                 if (wc->nodes[i])
1453                         break;
1454         }
1455         BUG_ON(i >= BTRFS_MAX_LEVEL);
1456
1457         node = wc->nodes[wc->active_node];
1458         wc->nodes[wc->active_node] = NULL;
1459         wc->active_node = i;
1460
1461         dest = wc->nodes[wc->active_node];
1462         if (wc->active_node < wc->root_level ||
1463             btrfs_root_refs(&root->root_item) > 0) {
1464                 BUG_ON(node->refs <= 1);
1465                 splice_shared_node(node, dest);
1466         } else {
1467                 BUG_ON(node->refs < 2);
1468                 node->refs--;
1469         }
1470         return 0;
1471 }
1472
1473 /*
1474  * Returns:
1475  * < 0 - on error
1476  * 1   - if the root with id child_root_id is a child of root parent_root_id
1477  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1478  *       has other root(s) as parent(s)
1479  * 2   - if the root child_root_id doesn't have any parent roots
1480  */
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1482                          u64 child_root_id)
1483 {
1484         struct btrfs_path path;
1485         struct btrfs_key key;
1486         struct extent_buffer *leaf;
1487         int has_parent = 0;
1488         int ret;
1489
1490         btrfs_init_path(&path);
1491
1492         key.objectid = parent_root_id;
1493         key.type = BTRFS_ROOT_REF_KEY;
1494         key.offset = child_root_id;
1495         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1496                                 0, 0);
1497         if (ret < 0)
1498                 return ret;
1499         btrfs_release_path(&path);
1500         if (!ret)
1501                 return 1;
1502
1503         key.objectid = child_root_id;
1504         key.type = BTRFS_ROOT_BACKREF_KEY;
1505         key.offset = 0;
1506         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1507                                 0, 0);
1508         if (ret < 0)
1509                 goto out;
1510
1511         while (1) {
1512                 leaf = path.nodes[0];
1513                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1515                         if (ret)
1516                                 break;
1517                         leaf = path.nodes[0];
1518                 }
1519
1520                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521                 if (key.objectid != child_root_id ||
1522                     key.type != BTRFS_ROOT_BACKREF_KEY)
1523                         break;
1524
1525                 has_parent = 1;
1526
1527                 if (key.offset == parent_root_id) {
1528                         btrfs_release_path(&path);
1529                         return 1;
1530                 }
1531
1532                 path.slots[0]++;
1533         }
1534 out:
1535         btrfs_release_path(&path);
1536         if (ret < 0)
1537                 return ret;
1538         return has_parent ? 0 : 2;
1539 }
1540
1541 static int process_dir_item(struct btrfs_root *root,
1542                             struct extent_buffer *eb,
1543                             int slot, struct btrfs_key *key,
1544                             struct shared_node *active_node)
1545 {
1546         u32 total;
1547         u32 cur = 0;
1548         u32 len;
1549         u32 name_len;
1550         u32 data_len;
1551         int error;
1552         int nritems = 0;
1553         int filetype;
1554         struct btrfs_dir_item *di;
1555         struct inode_record *rec;
1556         struct cache_tree *root_cache;
1557         struct cache_tree *inode_cache;
1558         struct btrfs_key location;
1559         char namebuf[BTRFS_NAME_LEN];
1560
1561         root_cache = &active_node->root_cache;
1562         inode_cache = &active_node->inode_cache;
1563         rec = active_node->current;
1564         rec->found_dir_item = 1;
1565
1566         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567         total = btrfs_item_size_nr(eb, slot);
1568         while (cur < total) {
1569                 nritems++;
1570                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571                 name_len = btrfs_dir_name_len(eb, di);
1572                 data_len = btrfs_dir_data_len(eb, di);
1573                 filetype = btrfs_dir_type(eb, di);
1574
1575                 rec->found_size += name_len;
1576                 if (name_len <= BTRFS_NAME_LEN) {
1577                         len = name_len;
1578                         error = 0;
1579                 } else {
1580                         len = BTRFS_NAME_LEN;
1581                         error = REF_ERR_NAME_TOO_LONG;
1582                 }
1583                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1584
1585                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586                         add_inode_backref(inode_cache, location.objectid,
1587                                           key->objectid, key->offset, namebuf,
1588                                           len, filetype, key->type, error);
1589                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590                         add_inode_backref(root_cache, location.objectid,
1591                                           key->objectid, key->offset,
1592                                           namebuf, len, filetype,
1593                                           key->type, error);
1594                 } else {
1595                         fprintf(stderr, "invalid location in dir item %u\n",
1596                                 location.type);
1597                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598                                           key->objectid, key->offset, namebuf,
1599                                           len, filetype, key->type, error);
1600                 }
1601
1602                 len = sizeof(*di) + name_len + data_len;
1603                 di = (struct btrfs_dir_item *)((char *)di + len);
1604                 cur += len;
1605         }
1606         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1608
1609         return 0;
1610 }
1611
1612 static int process_inode_ref(struct extent_buffer *eb,
1613                              int slot, struct btrfs_key *key,
1614                              struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         int error;
1622         struct cache_tree *inode_cache;
1623         struct btrfs_inode_ref *ref;
1624         char namebuf[BTRFS_NAME_LEN];
1625
1626         inode_cache = &active_node->inode_cache;
1627
1628         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629         total = btrfs_item_size_nr(eb, slot);
1630         while (cur < total) {
1631                 name_len = btrfs_inode_ref_name_len(eb, ref);
1632                 index = btrfs_inode_ref_index(eb, ref);
1633                 if (name_len <= BTRFS_NAME_LEN) {
1634                         len = name_len;
1635                         error = 0;
1636                 } else {
1637                         len = BTRFS_NAME_LEN;
1638                         error = REF_ERR_NAME_TOO_LONG;
1639                 }
1640                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641                 add_inode_backref(inode_cache, key->objectid, key->offset,
1642                                   index, namebuf, len, 0, key->type, error);
1643
1644                 len = sizeof(*ref) + name_len;
1645                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1646                 cur += len;
1647         }
1648         return 0;
1649 }
1650
1651 static int process_inode_extref(struct extent_buffer *eb,
1652                                 int slot, struct btrfs_key *key,
1653                                 struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         u64 parent;
1661         int error;
1662         struct cache_tree *inode_cache;
1663         struct btrfs_inode_extref *extref;
1664         char namebuf[BTRFS_NAME_LEN];
1665
1666         inode_cache = &active_node->inode_cache;
1667
1668         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669         total = btrfs_item_size_nr(eb, slot);
1670         while (cur < total) {
1671                 name_len = btrfs_inode_extref_name_len(eb, extref);
1672                 index = btrfs_inode_extref_index(eb, extref);
1673                 parent = btrfs_inode_extref_parent(eb, extref);
1674                 if (name_len <= BTRFS_NAME_LEN) {
1675                         len = name_len;
1676                         error = 0;
1677                 } else {
1678                         len = BTRFS_NAME_LEN;
1679                         error = REF_ERR_NAME_TOO_LONG;
1680                 }
1681                 read_extent_buffer(eb, namebuf,
1682                                    (unsigned long)(extref + 1), len);
1683                 add_inode_backref(inode_cache, key->objectid, parent,
1684                                   index, namebuf, len, 0, key->type, error);
1685
1686                 len = sizeof(*extref) + name_len;
1687                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1688                 cur += len;
1689         }
1690         return 0;
1691
1692 }
1693
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695                             u64 len, u64 *found)
1696 {
1697         struct btrfs_key key;
1698         struct btrfs_path path;
1699         struct extent_buffer *leaf;
1700         int ret;
1701         size_t size;
1702         *found = 0;
1703         u64 csum_end;
1704         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1705
1706         btrfs_init_path(&path);
1707
1708         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1709         key.offset = start;
1710         key.type = BTRFS_EXTENT_CSUM_KEY;
1711
1712         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1713                                 &key, &path, 0, 0);
1714         if (ret < 0)
1715                 goto out;
1716         if (ret > 0 && path.slots[0] > 0) {
1717                 leaf = path.nodes[0];
1718                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720                     key.type == BTRFS_EXTENT_CSUM_KEY)
1721                         path.slots[0]--;
1722         }
1723
1724         while (len > 0) {
1725                 leaf = path.nodes[0];
1726                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1728                         if (ret > 0)
1729                                 break;
1730                         else if (ret < 0)
1731                                 goto out;
1732                         leaf = path.nodes[0];
1733                 }
1734
1735                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737                     key.type != BTRFS_EXTENT_CSUM_KEY)
1738                         break;
1739
1740                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741                 if (key.offset >= start + len)
1742                         break;
1743
1744                 if (key.offset > start)
1745                         start = key.offset;
1746
1747                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749                 if (csum_end > start) {
1750                         size = min(csum_end - start, len);
1751                         len -= size;
1752                         start += size;
1753                         *found += size;
1754                 }
1755
1756                 path.slots[0]++;
1757         }
1758 out:
1759         btrfs_release_path(&path);
1760         if (ret < 0)
1761                 return ret;
1762         return 0;
1763 }
1764
1765 static int process_file_extent(struct btrfs_root *root,
1766                                 struct extent_buffer *eb,
1767                                 int slot, struct btrfs_key *key,
1768                                 struct shared_node *active_node)
1769 {
1770         struct inode_record *rec;
1771         struct btrfs_file_extent_item *fi;
1772         u64 num_bytes = 0;
1773         u64 disk_bytenr = 0;
1774         u64 extent_offset = 0;
1775         u64 mask = root->sectorsize - 1;
1776         int extent_type;
1777         int ret;
1778
1779         rec = active_node->current;
1780         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781         rec->found_file_extent = 1;
1782
1783         if (rec->extent_start == (u64)-1) {
1784                 rec->extent_start = key->offset;
1785                 rec->extent_end = key->offset;
1786         }
1787
1788         if (rec->extent_end > key->offset)
1789                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790         else if (rec->extent_end < key->offset) {
1791                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792                                            key->offset - rec->extent_end);
1793                 if (ret < 0)
1794                         return ret;
1795         }
1796
1797         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798         extent_type = btrfs_file_extent_type(eb, fi);
1799
1800         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1802                 if (num_bytes == 0)
1803                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804                 rec->found_size += num_bytes;
1805                 num_bytes = (num_bytes + mask) & ~mask;
1806         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810                 extent_offset = btrfs_file_extent_offset(eb, fi);
1811                 if (num_bytes == 0 || (num_bytes & mask))
1812                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813                 if (num_bytes + extent_offset >
1814                     btrfs_file_extent_ram_bytes(eb, fi))
1815                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817                     (btrfs_file_extent_compression(eb, fi) ||
1818                      btrfs_file_extent_encryption(eb, fi) ||
1819                      btrfs_file_extent_other_encoding(eb, fi)))
1820                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821                 if (disk_bytenr > 0)
1822                         rec->found_size += num_bytes;
1823         } else {
1824                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1825         }
1826         rec->extent_end = key->offset + num_bytes;
1827
1828         /*
1829          * The data reloc tree will copy full extents into its inode and then
1830          * copy the corresponding csums.  Because the extent it copied could be
1831          * a preallocated extent that hasn't been written to yet there may be no
1832          * csums to copy, ergo we won't have csums for our file extent.  This is
1833          * ok so just don't bother checking csums if the inode belongs to the
1834          * data reloc tree.
1835          */
1836         if (disk_bytenr > 0 &&
1837             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1838                 u64 found;
1839                 if (btrfs_file_extent_compression(eb, fi))
1840                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1841                 else
1842                         disk_bytenr += extent_offset;
1843
1844                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1845                 if (ret < 0)
1846                         return ret;
1847                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1848                         if (found > 0)
1849                                 rec->found_csum_item = 1;
1850                         if (found < num_bytes)
1851                                 rec->some_csum_missing = 1;
1852                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1853                         if (found > 0)
1854                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1855                 }
1856         }
1857         return 0;
1858 }
1859
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861                             struct walk_control *wc)
1862 {
1863         struct btrfs_key key;
1864         u32 nritems;
1865         int i;
1866         int ret = 0;
1867         struct cache_tree *inode_cache;
1868         struct shared_node *active_node;
1869
1870         if (wc->root_level == wc->active_node &&
1871             btrfs_root_refs(&root->root_item) == 0)
1872                 return 0;
1873
1874         active_node = wc->nodes[wc->active_node];
1875         inode_cache = &active_node->inode_cache;
1876         nritems = btrfs_header_nritems(eb);
1877         for (i = 0; i < nritems; i++) {
1878                 btrfs_item_key_to_cpu(eb, &key, i);
1879
1880                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1881                         continue;
1882                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1883                         continue;
1884
1885                 if (active_node->current == NULL ||
1886                     active_node->current->ino < key.objectid) {
1887                         if (active_node->current) {
1888                                 active_node->current->checked = 1;
1889                                 maybe_free_inode_rec(inode_cache,
1890                                                      active_node->current);
1891                         }
1892                         active_node->current = get_inode_rec(inode_cache,
1893                                                              key.objectid, 1);
1894                         BUG_ON(IS_ERR(active_node->current));
1895                 }
1896                 switch (key.type) {
1897                 case BTRFS_DIR_ITEM_KEY:
1898                 case BTRFS_DIR_INDEX_KEY:
1899                         ret = process_dir_item(root, eb, i, &key, active_node);
1900                         break;
1901                 case BTRFS_INODE_REF_KEY:
1902                         ret = process_inode_ref(eb, i, &key, active_node);
1903                         break;
1904                 case BTRFS_INODE_EXTREF_KEY:
1905                         ret = process_inode_extref(eb, i, &key, active_node);
1906                         break;
1907                 case BTRFS_INODE_ITEM_KEY:
1908                         ret = process_inode_item(eb, i, &key, active_node);
1909                         break;
1910                 case BTRFS_EXTENT_DATA_KEY:
1911                         ret = process_file_extent(root, eb, i, &key,
1912                                                   active_node);
1913                         break;
1914                 default:
1915                         break;
1916                 };
1917         }
1918         return ret;
1919 }
1920
1921 static void reada_walk_down(struct btrfs_root *root,
1922                             struct extent_buffer *node, int slot)
1923 {
1924         u64 bytenr;
1925         u64 ptr_gen;
1926         u32 nritems;
1927         u32 blocksize;
1928         int i;
1929         int level;
1930
1931         level = btrfs_header_level(node);
1932         if (level != 1)
1933                 return;
1934
1935         nritems = btrfs_header_nritems(node);
1936         blocksize = root->nodesize;
1937         for (i = slot; i < nritems; i++) {
1938                 bytenr = btrfs_node_blockptr(node, i);
1939                 ptr_gen = btrfs_node_ptr_generation(node, i);
1940                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1941         }
1942 }
1943
1944 /*
1945  * Check the child node/leaf by the following condition:
1946  * 1. the first item key of the node/leaf should be the same with the one
1947  *    in parent.
1948  * 2. block in parent node should match the child node/leaf.
1949  * 3. generation of parent node and child's header should be consistent.
1950  *
1951  * Or the child node/leaf pointed by the key in parent is not valid.
1952  *
1953  * We hope to check leaf owner too, but since subvol may share leaves,
1954  * which makes leaf owner check not so strong, key check should be
1955  * sufficient enough for that case.
1956  */
1957 static int check_child_node(struct btrfs_root *root,
1958                             struct extent_buffer *parent, int slot,
1959                             struct extent_buffer *child)
1960 {
1961         struct btrfs_key parent_key;
1962         struct btrfs_key child_key;
1963         int ret = 0;
1964
1965         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966         if (btrfs_header_level(child) == 0)
1967                 btrfs_item_key_to_cpu(child, &child_key, 0);
1968         else
1969                 btrfs_node_key_to_cpu(child, &child_key, 0);
1970
1971         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1972                 ret = -EINVAL;
1973                 fprintf(stderr,
1974                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975                         parent_key.objectid, parent_key.type, parent_key.offset,
1976                         child_key.objectid, child_key.type, child_key.offset);
1977         }
1978         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1979                 ret = -EINVAL;
1980                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981                         btrfs_node_blockptr(parent, slot),
1982                         btrfs_header_bytenr(child));
1983         }
1984         if (btrfs_node_ptr_generation(parent, slot) !=
1985             btrfs_header_generation(child)) {
1986                 ret = -EINVAL;
1987                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988                         btrfs_header_generation(child),
1989                         btrfs_node_ptr_generation(parent, slot));
1990         }
1991         return ret;
1992 }
1993
1994 struct node_refs {
1995         u64 bytenr[BTRFS_MAX_LEVEL];
1996         u64 refs[BTRFS_MAX_LEVEL];
1997 };
1998
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000                           struct walk_control *wc, int *level,
2001                           struct node_refs *nrefs)
2002 {
2003         enum btrfs_tree_block_status status;
2004         u64 bytenr;
2005         u64 ptr_gen;
2006         struct extent_buffer *next;
2007         struct extent_buffer *cur;
2008         u32 blocksize;
2009         int ret, err = 0;
2010         u64 refs;
2011
2012         WARN_ON(*level < 0);
2013         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014
2015         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016                 refs = nrefs->refs[*level];
2017                 ret = 0;
2018         } else {
2019                 ret = btrfs_lookup_extent_info(NULL, root,
2020                                        path->nodes[*level]->start,
2021                                        *level, 1, &refs, NULL);
2022                 if (ret < 0) {
2023                         err = ret;
2024                         goto out;
2025                 }
2026                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027                 nrefs->refs[*level] = refs;
2028         }
2029
2030         if (refs > 1) {
2031                 ret = enter_shared_node(root, path->nodes[*level]->start,
2032                                         refs, wc, *level);
2033                 if (ret > 0) {
2034                         err = ret;
2035                         goto out;
2036                 }
2037         }
2038
2039         while (*level >= 0) {
2040                 WARN_ON(*level < 0);
2041                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042                 cur = path->nodes[*level];
2043
2044                 if (btrfs_header_level(cur) != *level)
2045                         WARN_ON(1);
2046
2047                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2048                         break;
2049                 if (*level == 0) {
2050                         ret = process_one_leaf(root, cur, wc);
2051                         if (ret < 0)
2052                                 err = ret;
2053                         break;
2054                 }
2055                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057                 blocksize = root->nodesize;
2058
2059                 if (bytenr == nrefs->bytenr[*level - 1]) {
2060                         refs = nrefs->refs[*level - 1];
2061                 } else {
2062                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063                                         *level - 1, 1, &refs, NULL);
2064                         if (ret < 0) {
2065                                 refs = 0;
2066                         } else {
2067                                 nrefs->bytenr[*level - 1] = bytenr;
2068                                 nrefs->refs[*level - 1] = refs;
2069                         }
2070                 }
2071
2072                 if (refs > 1) {
2073                         ret = enter_shared_node(root, bytenr, refs,
2074                                                 wc, *level - 1);
2075                         if (ret > 0) {
2076                                 path->slots[*level]++;
2077                                 continue;
2078                         }
2079                 }
2080
2081                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083                         free_extent_buffer(next);
2084                         reada_walk_down(root, cur, path->slots[*level]);
2085                         next = read_tree_block(root, bytenr, blocksize,
2086                                                ptr_gen);
2087                         if (!extent_buffer_uptodate(next)) {
2088                                 struct btrfs_key node_key;
2089
2090                                 btrfs_node_key_to_cpu(path->nodes[*level],
2091                                                       &node_key,
2092                                                       path->slots[*level]);
2093                                 btrfs_add_corrupt_extent_record(root->fs_info,
2094                                                 &node_key,
2095                                                 path->nodes[*level]->start,
2096                                                 root->nodesize, *level);
2097                                 err = -EIO;
2098                                 goto out;
2099                         }
2100                 }
2101
2102                 ret = check_child_node(root, cur, path->slots[*level], next);
2103                 if (ret) {
2104                         err = ret;
2105                         goto out;
2106                 }
2107
2108                 if (btrfs_is_leaf(next))
2109                         status = btrfs_check_leaf(root, NULL, next);
2110                 else
2111                         status = btrfs_check_node(root, NULL, next);
2112                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113                         free_extent_buffer(next);
2114                         err = -EIO;
2115                         goto out;
2116                 }
2117
2118                 *level = *level - 1;
2119                 free_extent_buffer(path->nodes[*level]);
2120                 path->nodes[*level] = next;
2121                 path->slots[*level] = 0;
2122         }
2123 out:
2124         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2125         return err;
2126 }
2127
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129                         struct walk_control *wc, int *level)
2130 {
2131         int i;
2132         struct extent_buffer *leaf;
2133
2134         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135                 leaf = path->nodes[i];
2136                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2137                         path->slots[i]++;
2138                         *level = i;
2139                         return 0;
2140                 } else {
2141                         free_extent_buffer(path->nodes[*level]);
2142                         path->nodes[*level] = NULL;
2143                         BUG_ON(*level > wc->active_node);
2144                         if (*level == wc->active_node)
2145                                 leave_shared_node(root, wc, *level);
2146                         *level = i + 1;
2147                 }
2148         }
2149         return 1;
2150 }
2151
2152 static int check_root_dir(struct inode_record *rec)
2153 {
2154         struct inode_backref *backref;
2155         int ret = -1;
2156
2157         if (!rec->found_inode_item || rec->errors)
2158                 goto out;
2159         if (rec->nlink != 1 || rec->found_link != 0)
2160                 goto out;
2161         if (list_empty(&rec->backrefs))
2162                 goto out;
2163         backref = to_inode_backref(rec->backrefs.next);
2164         if (!backref->found_inode_ref)
2165                 goto out;
2166         if (backref->index != 0 || backref->namelen != 2 ||
2167             memcmp(backref->name, "..", 2))
2168                 goto out;
2169         if (backref->found_dir_index || backref->found_dir_item)
2170                 goto out;
2171         ret = 0;
2172 out:
2173         return ret;
2174 }
2175
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177                               struct btrfs_root *root, struct btrfs_path *path,
2178                               struct inode_record *rec)
2179 {
2180         struct btrfs_inode_item *ei;
2181         struct btrfs_key key;
2182         int ret;
2183
2184         key.objectid = rec->ino;
2185         key.type = BTRFS_INODE_ITEM_KEY;
2186         key.offset = (u64)-1;
2187
2188         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2189         if (ret < 0)
2190                 goto out;
2191         if (ret) {
2192                 if (!path->slots[0]) {
2193                         ret = -ENOENT;
2194                         goto out;
2195                 }
2196                 path->slots[0]--;
2197                 ret = 0;
2198         }
2199         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200         if (key.objectid != rec->ino) {
2201                 ret = -ENOENT;
2202                 goto out;
2203         }
2204
2205         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206                             struct btrfs_inode_item);
2207         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208         btrfs_mark_buffer_dirty(path->nodes[0]);
2209         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211                root->root_key.objectid);
2212 out:
2213         btrfs_release_path(path);
2214         return ret;
2215 }
2216
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218                                     struct btrfs_root *root,
2219                                     struct btrfs_path *path,
2220                                     struct inode_record *rec)
2221 {
2222         int ret;
2223
2224         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225         btrfs_release_path(path);
2226         if (!ret)
2227                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2228         return ret;
2229 }
2230
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232                                struct btrfs_root *root,
2233                                struct btrfs_path *path,
2234                                struct inode_record *rec)
2235 {
2236         struct btrfs_inode_item *ei;
2237         struct btrfs_key key;
2238         int ret = 0;
2239
2240         key.objectid = rec->ino;
2241         key.type = BTRFS_INODE_ITEM_KEY;
2242         key.offset = 0;
2243
2244         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2245         if (ret) {
2246                 if (ret > 0)
2247                         ret = -ENOENT;
2248                 goto out;
2249         }
2250
2251         /* Since ret == 0, no need to check anything */
2252         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253                             struct btrfs_inode_item);
2254         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255         btrfs_mark_buffer_dirty(path->nodes[0]);
2256         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257         printf("reset nbytes for ino %llu root %llu\n",
2258                rec->ino, root->root_key.objectid);
2259 out:
2260         btrfs_release_path(path);
2261         return ret;
2262 }
2263
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265                                  struct cache_tree *inode_cache,
2266                                  struct inode_record *rec,
2267                                  struct inode_backref *backref)
2268 {
2269         struct btrfs_path *path;
2270         struct btrfs_trans_handle *trans;
2271         struct btrfs_dir_item *dir_item;
2272         struct extent_buffer *leaf;
2273         struct btrfs_key key;
2274         struct btrfs_disk_key disk_key;
2275         struct inode_record *dir_rec;
2276         unsigned long name_ptr;
2277         u32 data_size = sizeof(*dir_item) + backref->namelen;
2278         int ret;
2279
2280         path = btrfs_alloc_path();
2281         if (!path)
2282                 return -ENOMEM;
2283
2284         trans = btrfs_start_transaction(root, 1);
2285         if (IS_ERR(trans)) {
2286                 btrfs_free_path(path);
2287                 return PTR_ERR(trans);
2288         }
2289
2290         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291                 (unsigned long long)rec->ino);
2292         key.objectid = backref->dir;
2293         key.type = BTRFS_DIR_INDEX_KEY;
2294         key.offset = backref->index;
2295
2296         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2297         BUG_ON(ret);
2298
2299         leaf = path->nodes[0];
2300         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2301
2302         disk_key.objectid = cpu_to_le64(rec->ino);
2303         disk_key.type = BTRFS_INODE_ITEM_KEY;
2304         disk_key.offset = 0;
2305
2306         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308         btrfs_set_dir_data_len(leaf, dir_item, 0);
2309         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310         name_ptr = (unsigned long)(dir_item + 1);
2311         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312         btrfs_mark_buffer_dirty(leaf);
2313         btrfs_free_path(path);
2314         btrfs_commit_transaction(trans, root);
2315
2316         backref->found_dir_index = 1;
2317         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318         BUG_ON(IS_ERR(dir_rec));
2319         if (!dir_rec)
2320                 return 0;
2321         dir_rec->found_size += backref->namelen;
2322         if (dir_rec->found_size == dir_rec->isize &&
2323             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325         if (dir_rec->found_size != dir_rec->isize)
2326                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2327
2328         return 0;
2329 }
2330
2331 static int delete_dir_index(struct btrfs_root *root,
2332                             struct cache_tree *inode_cache,
2333                             struct inode_record *rec,
2334                             struct inode_backref *backref)
2335 {
2336         struct btrfs_trans_handle *trans;
2337         struct btrfs_dir_item *di;
2338         struct btrfs_path *path;
2339         int ret = 0;
2340
2341         path = btrfs_alloc_path();
2342         if (!path)
2343                 return -ENOMEM;
2344
2345         trans = btrfs_start_transaction(root, 1);
2346         if (IS_ERR(trans)) {
2347                 btrfs_free_path(path);
2348                 return PTR_ERR(trans);
2349         }
2350
2351
2352         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353                 (unsigned long long)backref->dir,
2354                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355                 (unsigned long long)root->objectid);
2356
2357         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358                                     backref->name, backref->namelen,
2359                                     backref->index, -1);
2360         if (IS_ERR(di)) {
2361                 ret = PTR_ERR(di);
2362                 btrfs_free_path(path);
2363                 btrfs_commit_transaction(trans, root);
2364                 if (ret == -ENOENT)
2365                         return 0;
2366                 return ret;
2367         }
2368
2369         if (!di)
2370                 ret = btrfs_del_item(trans, root, path);
2371         else
2372                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2373         BUG_ON(ret);
2374         btrfs_free_path(path);
2375         btrfs_commit_transaction(trans, root);
2376         return ret;
2377 }
2378
2379 static int create_inode_item(struct btrfs_root *root,
2380                              struct inode_record *rec,
2381                              struct inode_backref *backref, int root_dir)
2382 {
2383         struct btrfs_trans_handle *trans;
2384         struct btrfs_inode_item inode_item;
2385         time_t now = time(NULL);
2386         int ret;
2387
2388         trans = btrfs_start_transaction(root, 1);
2389         if (IS_ERR(trans)) {
2390                 ret = PTR_ERR(trans);
2391                 return ret;
2392         }
2393
2394         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395                 "be incomplete, please check permissions and content after "
2396                 "the fsck completes.\n", (unsigned long long)root->objectid,
2397                 (unsigned long long)rec->ino);
2398
2399         memset(&inode_item, 0, sizeof(inode_item));
2400         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2401         if (root_dir)
2402                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2403         else
2404                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406         if (rec->found_dir_item) {
2407                 if (rec->found_file_extent)
2408                         fprintf(stderr, "root %llu inode %llu has both a dir "
2409                                 "item and extents, unsure if it is a dir or a "
2410                                 "regular file so setting it as a directory\n",
2411                                 (unsigned long long)root->objectid,
2412                                 (unsigned long long)rec->ino);
2413                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415         } else if (!rec->found_dir_item) {
2416                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2418         }
2419         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2427
2428         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2429         BUG_ON(ret);
2430         btrfs_commit_transaction(trans, root);
2431         return 0;
2432 }
2433
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435                                  struct inode_record *rec,
2436                                  struct cache_tree *inode_cache,
2437                                  int delete)
2438 {
2439         struct inode_backref *tmp, *backref;
2440         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2441         int ret = 0;
2442         int repaired = 0;
2443
2444         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445                 if (!delete && rec->ino == root_dirid) {
2446                         if (!rec->found_inode_item) {
2447                                 ret = create_inode_item(root, rec, backref, 1);
2448                                 if (ret)
2449                                         break;
2450                                 repaired++;
2451                         }
2452                 }
2453
2454                 /* Index 0 for root dir's are special, don't mess with it */
2455                 if (rec->ino == root_dirid && backref->index == 0)
2456                         continue;
2457
2458                 if (delete &&
2459                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2460                      (backref->found_dir_index && backref->found_inode_ref &&
2461                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462                         ret = delete_dir_index(root, inode_cache, rec, backref);
2463                         if (ret)
2464                                 break;
2465                         repaired++;
2466                         list_del(&backref->list);
2467                         free(backref);
2468                 }
2469
2470                 if (!delete && !backref->found_dir_index &&
2471                     backref->found_dir_item && backref->found_inode_ref) {
2472                         ret = add_missing_dir_index(root, inode_cache, rec,
2473                                                     backref);
2474                         if (ret)
2475                                 break;
2476                         repaired++;
2477                         if (backref->found_dir_item &&
2478                             backref->found_dir_index &&
2479                             backref->found_dir_index) {
2480                                 if (!backref->errors &&
2481                                     backref->found_inode_ref) {
2482                                         list_del(&backref->list);
2483                                         free(backref);
2484                                 }
2485                         }
2486                 }
2487
2488                 if (!delete && (!backref->found_dir_index &&
2489                                 !backref->found_dir_item &&
2490                                 backref->found_inode_ref)) {
2491                         struct btrfs_trans_handle *trans;
2492                         struct btrfs_key location;
2493
2494                         ret = check_dir_conflict(root, backref->name,
2495                                                  backref->namelen,
2496                                                  backref->dir,
2497                                                  backref->index);
2498                         if (ret) {
2499                                 /*
2500                                  * let nlink fixing routine to handle it,
2501                                  * which can do it better.
2502                                  */
2503                                 ret = 0;
2504                                 break;
2505                         }
2506                         location.objectid = rec->ino;
2507                         location.type = BTRFS_INODE_ITEM_KEY;
2508                         location.offset = 0;
2509
2510                         trans = btrfs_start_transaction(root, 1);
2511                         if (IS_ERR(trans)) {
2512                                 ret = PTR_ERR(trans);
2513                                 break;
2514                         }
2515                         fprintf(stderr, "adding missing dir index/item pair "
2516                                 "for inode %llu\n",
2517                                 (unsigned long long)rec->ino);
2518                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2519                                                     backref->namelen,
2520                                                     backref->dir, &location,
2521                                                     imode_to_type(rec->imode),
2522                                                     backref->index);
2523                         BUG_ON(ret);
2524                         btrfs_commit_transaction(trans, root);
2525                         repaired++;
2526                 }
2527
2528                 if (!delete && (backref->found_inode_ref &&
2529                                 backref->found_dir_index &&
2530                                 backref->found_dir_item &&
2531                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532                                 !rec->found_inode_item)) {
2533                         ret = create_inode_item(root, rec, backref, 0);
2534                         if (ret)
2535                                 break;
2536                         repaired++;
2537                 }
2538
2539         }
2540         return ret ? ret : repaired;
2541 }
2542
2543 /*
2544  * To determine the file type for nlink/inode_item repair
2545  *
2546  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547  * Return -ENOENT if file type is not found.
2548  */
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2550 {
2551         struct inode_backref *backref;
2552
2553         /* For inode item recovered case */
2554         if (rec->found_inode_item) {
2555                 *type = imode_to_type(rec->imode);
2556                 return 0;
2557         }
2558
2559         list_for_each_entry(backref, &rec->backrefs, list) {
2560                 if (backref->found_dir_index || backref->found_dir_item) {
2561                         *type = backref->filetype;
2562                         return 0;
2563                 }
2564         }
2565         return -ENOENT;
2566 }
2567
2568 /*
2569  * To determine the file name for nlink repair
2570  *
2571  * Return 0 if file name is found, set name and namelen.
2572  * Return -ENOENT if file name is not found.
2573  */
2574 static int find_file_name(struct inode_record *rec,
2575                           char *name, int *namelen)
2576 {
2577         struct inode_backref *backref;
2578
2579         list_for_each_entry(backref, &rec->backrefs, list) {
2580                 if (backref->found_dir_index || backref->found_dir_item ||
2581                     backref->found_inode_ref) {
2582                         memcpy(name, backref->name, backref->namelen);
2583                         *namelen = backref->namelen;
2584                         return 0;
2585                 }
2586         }
2587         return -ENOENT;
2588 }
2589
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592                        struct btrfs_root *root,
2593                        struct btrfs_path *path,
2594                        struct inode_record *rec)
2595 {
2596         struct inode_backref *backref;
2597         struct inode_backref *tmp;
2598         struct btrfs_key key;
2599         struct btrfs_inode_item *inode_item;
2600         int ret = 0;
2601
2602         /* We don't believe this either, reset it and iterate backref */
2603         rec->found_link = 0;
2604
2605         /* Remove all backref including the valid ones */
2606         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608                                    backref->index, backref->name,
2609                                    backref->namelen, 0);
2610                 if (ret < 0)
2611                         goto out;
2612
2613                 /* remove invalid backref, so it won't be added back */
2614                 if (!(backref->found_dir_index &&
2615                       backref->found_dir_item &&
2616                       backref->found_inode_ref)) {
2617                         list_del(&backref->list);
2618                         free(backref);
2619                 } else {
2620                         rec->found_link++;
2621                 }
2622         }
2623
2624         /* Set nlink to 0 */
2625         key.objectid = rec->ino;
2626         key.type = BTRFS_INODE_ITEM_KEY;
2627         key.offset = 0;
2628         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2629         if (ret < 0)
2630                 goto out;
2631         if (ret > 0) {
2632                 ret = -ENOENT;
2633                 goto out;
2634         }
2635         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636                                     struct btrfs_inode_item);
2637         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638         btrfs_mark_buffer_dirty(path->nodes[0]);
2639         btrfs_release_path(path);
2640
2641         /*
2642          * Add back valid inode_ref/dir_item/dir_index,
2643          * add_link() will handle the nlink inc, so new nlink must be correct
2644          */
2645         list_for_each_entry(backref, &rec->backrefs, list) {
2646                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647                                      backref->name, backref->namelen,
2648                                      backref->filetype, &backref->index, 1);
2649                 if (ret < 0)
2650                         goto out;
2651         }
2652 out:
2653         btrfs_release_path(path);
2654         return ret;
2655 }
2656
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658                                struct btrfs_root *root,
2659                                struct btrfs_path *path,
2660                                struct inode_record *rec)
2661 {
2662         char *dir_name = "lost+found";
2663         char namebuf[BTRFS_NAME_LEN] = {0};
2664         u64 lost_found_ino;
2665         u32 mode = 0700;
2666         u8 type = 0;
2667         int namelen = 0;
2668         int name_recovered = 0;
2669         int type_recovered = 0;
2670         int ret = 0;
2671
2672         /*
2673          * Get file name and type first before these invalid inode ref
2674          * are deleted by remove_all_invalid_backref()
2675          */
2676         name_recovered = !find_file_name(rec, namebuf, &namelen);
2677         type_recovered = !find_file_type(rec, &type);
2678
2679         if (!name_recovered) {
2680                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681                        rec->ino, rec->ino);
2682                 namelen = count_digits(rec->ino);
2683                 sprintf(namebuf, "%llu", rec->ino);
2684                 name_recovered = 1;
2685         }
2686         if (!type_recovered) {
2687                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2688                        rec->ino);
2689                 type = BTRFS_FT_REG_FILE;
2690                 type_recovered = 1;
2691         }
2692
2693         ret = reset_nlink(trans, root, path, rec);
2694         if (ret < 0) {
2695                 fprintf(stderr,
2696                         "Failed to reset nlink for inode %llu: %s\n",
2697                         rec->ino, strerror(-ret));
2698                 goto out;
2699         }
2700
2701         if (rec->found_link == 0) {
2702                 lost_found_ino = root->highest_inode;
2703                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2704                         ret = -EOVERFLOW;
2705                         goto out;
2706                 }
2707                 lost_found_ino++;
2708                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2710                                   mode);
2711                 if (ret < 0) {
2712                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713                                 dir_name, strerror(-ret));
2714                         goto out;
2715                 }
2716                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717                                      namebuf, namelen, type, NULL, 1);
2718                 /*
2719                  * Add ".INO" suffix several times to handle case where
2720                  * "FILENAME.INO" is already taken by another file.
2721                  */
2722                 while (ret == -EEXIST) {
2723                         /*
2724                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2725                          */
2726                         if (namelen + count_digits(rec->ino) + 1 >
2727                             BTRFS_NAME_LEN) {
2728                                 ret = -EFBIG;
2729                                 goto out;
2730                         }
2731                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2732                                  ".%llu", rec->ino);
2733                         namelen += count_digits(rec->ino) + 1;
2734                         ret = btrfs_add_link(trans, root, rec->ino,
2735                                              lost_found_ino, namebuf,
2736                                              namelen, type, NULL, 1);
2737                 }
2738                 if (ret < 0) {
2739                         fprintf(stderr,
2740                                 "Failed to link the inode %llu to %s dir: %s\n",
2741                                 rec->ino, dir_name, strerror(-ret));
2742                         goto out;
2743                 }
2744                 /*
2745                  * Just increase the found_link, don't actually add the
2746                  * backref. This will make things easier and this inode
2747                  * record will be freed after the repair is done.
2748                  * So fsck will not report problem about this inode.
2749                  */
2750                 rec->found_link++;
2751                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752                        namelen, namebuf, dir_name);
2753         }
2754         printf("Fixed the nlink of inode %llu\n", rec->ino);
2755 out:
2756         /*
2757          * Clear the flag anyway, or we will loop forever for the same inode
2758          * as it will not be removed from the bad inode list and the dead loop
2759          * happens.
2760          */
2761         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762         btrfs_release_path(path);
2763         return ret;
2764 }
2765
2766 /*
2767  * Check if there is any normal(reg or prealloc) file extent for given
2768  * ino.
2769  * This is used to determine the file type when neither its dir_index/item or
2770  * inode_item exists.
2771  *
2772  * This will *NOT* report error, if any error happens, just consider it does
2773  * not have any normal file extent.
2774  */
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2776 {
2777         struct btrfs_path *path;
2778         struct btrfs_key key;
2779         struct btrfs_key found_key;
2780         struct btrfs_file_extent_item *fi;
2781         u8 type;
2782         int ret = 0;
2783
2784         path = btrfs_alloc_path();
2785         if (!path)
2786                 goto out;
2787         key.objectid = ino;
2788         key.type = BTRFS_EXTENT_DATA_KEY;
2789         key.offset = 0;
2790
2791         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2792         if (ret < 0) {
2793                 ret = 0;
2794                 goto out;
2795         }
2796         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797                 ret = btrfs_next_leaf(root, path);
2798                 if (ret) {
2799                         ret = 0;
2800                         goto out;
2801                 }
2802         }
2803         while (1) {
2804                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2805                                       path->slots[0]);
2806                 if (found_key.objectid != ino ||
2807                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2808                         break;
2809                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810                                     struct btrfs_file_extent_item);
2811                 type = btrfs_file_extent_type(path->nodes[0], fi);
2812                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2813                         ret = 1;
2814                         goto out;
2815                 }
2816         }
2817 out:
2818         btrfs_free_path(path);
2819         return ret;
2820 }
2821
2822 static u32 btrfs_type_to_imode(u8 type)
2823 {
2824         static u32 imode_by_btrfs_type[] = {
2825                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2826                 [BTRFS_FT_DIR]          = S_IFDIR,
2827                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2828                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2829                 [BTRFS_FT_FIFO]         = S_IFIFO,
2830                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2831                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2832         };
2833
2834         return imode_by_btrfs_type[(type)];
2835 }
2836
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838                                 struct btrfs_root *root,
2839                                 struct btrfs_path *path,
2840                                 struct inode_record *rec)
2841 {
2842         u8 filetype;
2843         u32 mode = 0700;
2844         int type_recovered = 0;
2845         int ret = 0;
2846
2847         printf("Trying to rebuild inode:%llu\n", rec->ino);
2848
2849         type_recovered = !find_file_type(rec, &filetype);
2850
2851         /*
2852          * Try to determine inode type if type not found.
2853          *
2854          * For found regular file extent, it must be FILE.
2855          * For found dir_item/index, it must be DIR.
2856          *
2857          * For undetermined one, use FILE as fallback.
2858          *
2859          * TODO:
2860          * 1. If found backref(inode_index/item is already handled) to it,
2861          *    it must be DIR.
2862          *    Need new inode-inode ref structure to allow search for that.
2863          */
2864         if (!type_recovered) {
2865                 if (rec->found_file_extent &&
2866                     find_normal_file_extent(root, rec->ino)) {
2867                         type_recovered = 1;
2868                         filetype = BTRFS_FT_REG_FILE;
2869                 } else if (rec->found_dir_item) {
2870                         type_recovered = 1;
2871                         filetype = BTRFS_FT_DIR;
2872                 } else if (!list_empty(&rec->orphan_extents)) {
2873                         type_recovered = 1;
2874                         filetype = BTRFS_FT_REG_FILE;
2875                 } else{
2876                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2877                                rec->ino);
2878                         type_recovered = 1;
2879                         filetype = BTRFS_FT_REG_FILE;
2880                 }
2881         }
2882
2883         ret = btrfs_new_inode(trans, root, rec->ino,
2884                               mode | btrfs_type_to_imode(filetype));
2885         if (ret < 0)
2886                 goto out;
2887
2888         /*
2889          * Here inode rebuild is done, we only rebuild the inode item,
2890          * don't repair the nlink(like move to lost+found).
2891          * That is the job of nlink repair.
2892          *
2893          * We just fill the record and return
2894          */
2895         rec->found_dir_item = 1;
2896         rec->imode = mode | btrfs_type_to_imode(filetype);
2897         rec->nlink = 0;
2898         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899         /* Ensure the inode_nlinks repair function will be called */
2900         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2901 out:
2902         return ret;
2903 }
2904
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906                                       struct btrfs_root *root,
2907                                       struct btrfs_path *path,
2908                                       struct inode_record *rec)
2909 {
2910         struct orphan_data_extent *orphan;
2911         struct orphan_data_extent *tmp;
2912         int ret = 0;
2913
2914         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2915                 /*
2916                  * Check for conflicting file extents
2917                  *
2918                  * Here we don't know whether the extents is compressed or not,
2919                  * so we can only assume it not compressed nor data offset,
2920                  * and use its disk_len as extent length.
2921                  */
2922                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923                                        orphan->offset, orphan->disk_len, 0);
2924                 btrfs_release_path(path);
2925                 if (ret < 0)
2926                         goto out;
2927                 if (!ret) {
2928                         fprintf(stderr,
2929                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930                                 orphan->disk_bytenr, orphan->disk_len);
2931                         ret = btrfs_free_extent(trans,
2932                                         root->fs_info->extent_root,
2933                                         orphan->disk_bytenr, orphan->disk_len,
2934                                         0, root->objectid, orphan->objectid,
2935                                         orphan->offset);
2936                         if (ret < 0)
2937                                 goto out;
2938                 }
2939                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940                                 orphan->offset, orphan->disk_bytenr,
2941                                 orphan->disk_len, orphan->disk_len);
2942                 if (ret < 0)
2943                         goto out;
2944
2945                 /* Update file size info */
2946                 rec->found_size += orphan->disk_len;
2947                 if (rec->found_size == rec->nbytes)
2948                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2949
2950                 /* Update the file extent hole info too */
2951                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2952                                            orphan->disk_len);
2953                 if (ret < 0)
2954                         goto out;
2955                 if (RB_EMPTY_ROOT(&rec->holes))
2956                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2957
2958                 list_del(&orphan->list);
2959                 free(orphan);
2960         }
2961         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2962 out:
2963         return ret;
2964 }
2965
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967                                         struct btrfs_root *root,
2968                                         struct btrfs_path *path,
2969                                         struct inode_record *rec)
2970 {
2971         struct rb_node *node;
2972         struct file_extent_hole *hole;
2973         int found = 0;
2974         int ret = 0;
2975
2976         node = rb_first(&rec->holes);
2977
2978         while (node) {
2979                 found = 1;
2980                 hole = rb_entry(node, struct file_extent_hole, node);
2981                 ret = btrfs_punch_hole(trans, root, rec->ino,
2982                                        hole->start, hole->len);
2983                 if (ret < 0)
2984                         goto out;
2985                 ret = del_file_extent_hole(&rec->holes, hole->start,
2986                                            hole->len);
2987                 if (ret < 0)
2988                         goto out;
2989                 if (RB_EMPTY_ROOT(&rec->holes))
2990                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991                 node = rb_first(&rec->holes);
2992         }
2993         /* special case for a file losing all its file extent */
2994         if (!found) {
2995                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996                                        round_up(rec->isize, root->sectorsize));
2997                 if (ret < 0)
2998                         goto out;
2999         }
3000         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001                rec->ino, root->objectid);
3002 out:
3003         return ret;
3004 }
3005
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3007 {
3008         struct btrfs_trans_handle *trans;
3009         struct btrfs_path *path;
3010         int ret = 0;
3011
3012         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013                              I_ERR_NO_ORPHAN_ITEM |
3014                              I_ERR_LINK_COUNT_WRONG |
3015                              I_ERR_NO_INODE_ITEM |
3016                              I_ERR_FILE_EXTENT_ORPHAN |
3017                              I_ERR_FILE_EXTENT_DISCOUNT|
3018                              I_ERR_FILE_NBYTES_WRONG)))
3019                 return rec->errors;
3020
3021         path = btrfs_alloc_path();
3022         if (!path)
3023                 return -ENOMEM;
3024
3025         /*
3026          * For nlink repair, it may create a dir and add link, so
3027          * 2 for parent(256)'s dir_index and dir_item
3028          * 2 for lost+found dir's inode_item and inode_ref
3029          * 1 for the new inode_ref of the file
3030          * 2 for lost+found dir's dir_index and dir_item for the file
3031          */
3032         trans = btrfs_start_transaction(root, 7);
3033         if (IS_ERR(trans)) {
3034                 btrfs_free_path(path);
3035                 return PTR_ERR(trans);
3036         }
3037
3038         if (rec->errors & I_ERR_NO_INODE_ITEM)
3039                 ret = repair_inode_no_item(trans, root, path, rec);
3040         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043                 ret = repair_inode_discount_extent(trans, root, path, rec);
3044         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045                 ret = repair_inode_isize(trans, root, path, rec);
3046         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047                 ret = repair_inode_orphan_item(trans, root, path, rec);
3048         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049                 ret = repair_inode_nlinks(trans, root, path, rec);
3050         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051                 ret = repair_inode_nbytes(trans, root, path, rec);
3052         btrfs_commit_transaction(trans, root);
3053         btrfs_free_path(path);
3054         return ret;
3055 }
3056
3057 static int check_inode_recs(struct btrfs_root *root,
3058                             struct cache_tree *inode_cache)
3059 {
3060         struct cache_extent *cache;
3061         struct ptr_node *node;
3062         struct inode_record *rec;
3063         struct inode_backref *backref;
3064         int stage = 0;
3065         int ret = 0;
3066         int err = 0;
3067         u64 error = 0;
3068         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3069
3070         if (btrfs_root_refs(&root->root_item) == 0) {
3071                 if (!cache_tree_empty(inode_cache))
3072                         fprintf(stderr, "warning line %d\n", __LINE__);
3073                 return 0;
3074         }
3075
3076         /*
3077          * We need to record the highest inode number for later 'lost+found'
3078          * dir creation.
3079          * We must select an ino not used/referred by any existing inode, or
3080          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081          * this may cause 'lost+found' dir has wrong nlinks.
3082          */
3083         cache = last_cache_extent(inode_cache);
3084         if (cache) {
3085                 node = container_of(cache, struct ptr_node, cache);
3086                 rec = node->data;
3087                 if (rec->ino > root->highest_inode)
3088                         root->highest_inode = rec->ino;
3089         }
3090
3091         /*
3092          * We need to repair backrefs first because we could change some of the
3093          * errors in the inode recs.
3094          *
3095          * We also need to go through and delete invalid backrefs first and then
3096          * add the correct ones second.  We do this because we may get EEXIST
3097          * when adding back the correct index because we hadn't yet deleted the
3098          * invalid index.
3099          *
3100          * For example, if we were missing a dir index then the directories
3101          * isize would be wrong, so if we fixed the isize to what we thought it
3102          * would be and then fixed the backref we'd still have a invalid fs, so
3103          * we need to add back the dir index and then check to see if the isize
3104          * is still wrong.
3105          */
3106         while (stage < 3) {
3107                 stage++;
3108                 if (stage == 3 && !err)
3109                         break;
3110
3111                 cache = search_cache_extent(inode_cache, 0);
3112                 while (repair && cache) {
3113                         node = container_of(cache, struct ptr_node, cache);
3114                         rec = node->data;
3115                         cache = next_cache_extent(cache);
3116
3117                         /* Need to free everything up and rescan */
3118                         if (stage == 3) {
3119                                 remove_cache_extent(inode_cache, &node->cache);
3120                                 free(node);
3121                                 free_inode_rec(rec);
3122                                 continue;
3123                         }
3124
3125                         if (list_empty(&rec->backrefs))
3126                                 continue;
3127
3128                         ret = repair_inode_backrefs(root, rec, inode_cache,
3129                                                     stage == 1);
3130                         if (ret < 0) {
3131                                 err = ret;
3132                                 stage = 2;
3133                                 break;
3134                         } if (ret > 0) {
3135                                 err = -EAGAIN;
3136                         }
3137                 }
3138         }
3139         if (err)
3140                 return err;
3141
3142         rec = get_inode_rec(inode_cache, root_dirid, 0);
3143         BUG_ON(IS_ERR(rec));
3144         if (rec) {
3145                 ret = check_root_dir(rec);
3146                 if (ret) {
3147                         fprintf(stderr, "root %llu root dir %llu error\n",
3148                                 (unsigned long long)root->root_key.objectid,
3149                                 (unsigned long long)root_dirid);
3150                         print_inode_error(root, rec);
3151                         error++;
3152                 }
3153         } else {
3154                 if (repair) {
3155                         struct btrfs_trans_handle *trans;
3156
3157                         trans = btrfs_start_transaction(root, 1);
3158                         if (IS_ERR(trans)) {
3159                                 err = PTR_ERR(trans);
3160                                 return err;
3161                         }
3162
3163                         fprintf(stderr,
3164                                 "root %llu missing its root dir, recreating\n",
3165                                 (unsigned long long)root->objectid);
3166
3167                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3168                         BUG_ON(ret);
3169
3170                         btrfs_commit_transaction(trans, root);
3171                         return -EAGAIN;
3172                 }
3173
3174                 fprintf(stderr, "root %llu root dir %llu not found\n",
3175                         (unsigned long long)root->root_key.objectid,
3176                         (unsigned long long)root_dirid);
3177         }
3178
3179         while (1) {
3180                 cache = search_cache_extent(inode_cache, 0);
3181                 if (!cache)
3182                         break;
3183                 node = container_of(cache, struct ptr_node, cache);
3184                 rec = node->data;
3185                 remove_cache_extent(inode_cache, &node->cache);
3186                 free(node);
3187                 if (rec->ino == root_dirid ||
3188                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189                         free_inode_rec(rec);
3190                         continue;
3191                 }
3192
3193                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194                         ret = check_orphan_item(root, rec->ino);
3195                         if (ret == 0)
3196                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197                         if (can_free_inode_rec(rec)) {
3198                                 free_inode_rec(rec);
3199                                 continue;
3200                         }
3201                 }
3202
3203                 if (!rec->found_inode_item)
3204                         rec->errors |= I_ERR_NO_INODE_ITEM;
3205                 if (rec->found_link != rec->nlink)
3206                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3207                 if (repair) {
3208                         ret = try_repair_inode(root, rec);
3209                         if (ret == 0 && can_free_inode_rec(rec)) {
3210                                 free_inode_rec(rec);
3211                                 continue;
3212                         }
3213                         ret = 0;
3214                 }
3215
3216                 if (!(repair && ret == 0))
3217                         error++;
3218                 print_inode_error(root, rec);
3219                 list_for_each_entry(backref, &rec->backrefs, list) {
3220                         if (!backref->found_dir_item)
3221                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222                         if (!backref->found_dir_index)
3223                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224                         if (!backref->found_inode_ref)
3225                                 backref->errors |= REF_ERR_NO_INODE_REF;
3226                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227                                 " namelen %u name %s filetype %d errors %x",
3228                                 (unsigned long long)backref->dir,
3229                                 (unsigned long long)backref->index,
3230                                 backref->namelen, backref->name,
3231                                 backref->filetype, backref->errors);
3232                         print_ref_error(backref->errors);
3233                 }
3234                 free_inode_rec(rec);
3235         }
3236         return (error > 0) ? -1 : 0;
3237 }
3238
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3240                                         u64 objectid)
3241 {
3242         struct cache_extent *cache;
3243         struct root_record *rec = NULL;
3244         int ret;
3245
3246         cache = lookup_cache_extent(root_cache, objectid, 1);
3247         if (cache) {
3248                 rec = container_of(cache, struct root_record, cache);
3249         } else {
3250                 rec = calloc(1, sizeof(*rec));
3251                 if (!rec)
3252                         return ERR_PTR(-ENOMEM);
3253                 rec->objectid = objectid;
3254                 INIT_LIST_HEAD(&rec->backrefs);
3255                 rec->cache.start = objectid;
3256                 rec->cache.size = 1;
3257
3258                 ret = insert_cache_extent(root_cache, &rec->cache);
3259                 if (ret)
3260                         return ERR_PTR(-EEXIST);
3261         }
3262         return rec;
3263 }
3264
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266                                              u64 ref_root, u64 dir, u64 index,
3267                                              const char *name, int namelen)
3268 {
3269         struct root_backref *backref;
3270
3271         list_for_each_entry(backref, &rec->backrefs, list) {
3272                 if (backref->ref_root != ref_root || backref->dir != dir ||
3273                     backref->namelen != namelen)
3274                         continue;
3275                 if (memcmp(name, backref->name, namelen))
3276                         continue;
3277                 return backref;
3278         }
3279
3280         backref = calloc(1, sizeof(*backref) + namelen + 1);
3281         if (!backref)
3282                 return NULL;
3283         backref->ref_root = ref_root;
3284         backref->dir = dir;
3285         backref->index = index;
3286         backref->namelen = namelen;
3287         memcpy(backref->name, name, namelen);
3288         backref->name[namelen] = '\0';
3289         list_add_tail(&backref->list, &rec->backrefs);
3290         return backref;
3291 }
3292
3293 static void free_root_record(struct cache_extent *cache)
3294 {
3295         struct root_record *rec;
3296         struct root_backref *backref;
3297
3298         rec = container_of(cache, struct root_record, cache);
3299         while (!list_empty(&rec->backrefs)) {
3300                 backref = to_root_backref(rec->backrefs.next);
3301                 list_del(&backref->list);
3302                 free(backref);
3303         }
3304
3305         kfree(rec);
3306 }
3307
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3309
3310 static int add_root_backref(struct cache_tree *root_cache,
3311                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3312                             const char *name, int namelen,
3313                             int item_type, int errors)
3314 {
3315         struct root_record *rec;
3316         struct root_backref *backref;
3317
3318         rec = get_root_rec(root_cache, root_id);
3319         BUG_ON(IS_ERR(rec));
3320         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3321         BUG_ON(!backref);
3322
3323         backref->errors |= errors;
3324
3325         if (item_type != BTRFS_DIR_ITEM_KEY) {
3326                 if (backref->found_dir_index || backref->found_back_ref ||
3327                     backref->found_forward_ref) {
3328                         if (backref->index != index)
3329                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3330                 } else {
3331                         backref->index = index;
3332                 }
3333         }
3334
3335         if (item_type == BTRFS_DIR_ITEM_KEY) {
3336                 if (backref->found_forward_ref)
3337                         rec->found_ref++;
3338                 backref->found_dir_item = 1;
3339         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340                 backref->found_dir_index = 1;
3341         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342                 if (backref->found_forward_ref)
3343                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3344                 else if (backref->found_dir_item)
3345                         rec->found_ref++;
3346                 backref->found_forward_ref = 1;
3347         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348                 if (backref->found_back_ref)
3349                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350                 backref->found_back_ref = 1;
3351         } else {
3352                 BUG_ON(1);
3353         }
3354
3355         if (backref->found_forward_ref && backref->found_dir_item)
3356                 backref->reachable = 1;
3357         return 0;
3358 }
3359
3360 static int merge_root_recs(struct btrfs_root *root,
3361                            struct cache_tree *src_cache,
3362                            struct cache_tree *dst_cache)
3363 {
3364         struct cache_extent *cache;
3365         struct ptr_node *node;
3366         struct inode_record *rec;
3367         struct inode_backref *backref;
3368         int ret = 0;
3369
3370         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371                 free_inode_recs_tree(src_cache);
3372                 return 0;
3373         }
3374
3375         while (1) {
3376                 cache = search_cache_extent(src_cache, 0);
3377                 if (!cache)
3378                         break;
3379                 node = container_of(cache, struct ptr_node, cache);
3380                 rec = node->data;
3381                 remove_cache_extent(src_cache, &node->cache);
3382                 free(node);
3383
3384                 ret = is_child_root(root, root->objectid, rec->ino);
3385                 if (ret < 0)
3386                         break;
3387                 else if (ret == 0)
3388                         goto skip;
3389
3390                 list_for_each_entry(backref, &rec->backrefs, list) {
3391                         BUG_ON(backref->found_inode_ref);
3392                         if (backref->found_dir_item)
3393                                 add_root_backref(dst_cache, rec->ino,
3394                                         root->root_key.objectid, backref->dir,
3395                                         backref->index, backref->name,
3396                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3397                                         backref->errors);
3398                         if (backref->found_dir_index)
3399                                 add_root_backref(dst_cache, rec->ino,
3400                                         root->root_key.objectid, backref->dir,
3401                                         backref->index, backref->name,
3402                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3403                                         backref->errors);
3404                 }
3405 skip:
3406                 free_inode_rec(rec);
3407         }
3408         if (ret < 0)
3409                 return ret;
3410         return 0;
3411 }
3412
3413 static int check_root_refs(struct btrfs_root *root,
3414                            struct cache_tree *root_cache)
3415 {
3416         struct root_record *rec;
3417         struct root_record *ref_root;
3418         struct root_backref *backref;
3419         struct cache_extent *cache;
3420         int loop = 1;
3421         int ret;
3422         int error;
3423         int errors = 0;
3424
3425         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426         BUG_ON(IS_ERR(rec));
3427         rec->found_ref = 1;
3428
3429         /* fixme: this can not detect circular references */
3430         while (loop) {
3431                 loop = 0;
3432                 cache = search_cache_extent(root_cache, 0);
3433                 while (1) {
3434                         if (!cache)
3435                                 break;
3436                         rec = container_of(cache, struct root_record, cache);
3437                         cache = next_cache_extent(cache);
3438
3439                         if (rec->found_ref == 0)
3440                                 continue;
3441
3442                         list_for_each_entry(backref, &rec->backrefs, list) {
3443                                 if (!backref->reachable)
3444                                         continue;
3445
3446                                 ref_root = get_root_rec(root_cache,
3447                                                         backref->ref_root);
3448                                 BUG_ON(IS_ERR(ref_root));
3449                                 if (ref_root->found_ref > 0)
3450                                         continue;
3451
3452                                 backref->reachable = 0;
3453                                 rec->found_ref--;
3454                                 if (rec->found_ref == 0)
3455                                         loop = 1;
3456                         }
3457                 }
3458         }
3459
3460         cache = search_cache_extent(root_cache, 0);
3461         while (1) {
3462                 if (!cache)
3463                         break;
3464                 rec = container_of(cache, struct root_record, cache);
3465                 cache = next_cache_extent(cache);
3466
3467                 if (rec->found_ref == 0 &&
3468                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470                         ret = check_orphan_item(root->fs_info->tree_root,
3471                                                 rec->objectid);
3472                         if (ret == 0)
3473                                 continue;
3474
3475                         /*
3476                          * If we don't have a root item then we likely just have
3477                          * a dir item in a snapshot for this root but no actual
3478                          * ref key or anything so it's meaningless.
3479                          */
3480                         if (!rec->found_root_item)
3481                                 continue;
3482                         errors++;
3483                         fprintf(stderr, "fs tree %llu not referenced\n",
3484                                 (unsigned long long)rec->objectid);
3485                 }
3486
3487                 error = 0;
3488                 if (rec->found_ref > 0 && !rec->found_root_item)
3489                         error = 1;
3490                 list_for_each_entry(backref, &rec->backrefs, list) {
3491                         if (!backref->found_dir_item)
3492                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493                         if (!backref->found_dir_index)
3494                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495                         if (!backref->found_back_ref)
3496                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497                         if (!backref->found_forward_ref)
3498                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3499                         if (backref->reachable && backref->errors)
3500                                 error = 1;
3501                 }
3502                 if (!error)
3503                         continue;
3504
3505                 errors++;
3506                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507                         (unsigned long long)rec->objectid, rec->found_ref,
3508                          rec->found_root_item ? "" : "not found");
3509
3510                 list_for_each_entry(backref, &rec->backrefs, list) {
3511                         if (!backref->reachable)
3512                                 continue;
3513                         if (!backref->errors && rec->found_root_item)
3514                                 continue;
3515                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516                                 " index %llu namelen %u name %s errors %x\n",
3517                                 (unsigned long long)backref->ref_root,
3518                                 (unsigned long long)backref->dir,
3519                                 (unsigned long long)backref->index,
3520                                 backref->namelen, backref->name,
3521                                 backref->errors);
3522                         print_ref_error(backref->errors);
3523                 }
3524         }
3525         return errors > 0 ? 1 : 0;
3526 }
3527
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529                             struct btrfs_key *key,
3530                             struct cache_tree *root_cache)
3531 {
3532         u64 dirid;
3533         u64 index;
3534         u32 len;
3535         u32 name_len;
3536         struct btrfs_root_ref *ref;
3537         char namebuf[BTRFS_NAME_LEN];
3538         int error;
3539
3540         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3541
3542         dirid = btrfs_root_ref_dirid(eb, ref);
3543         index = btrfs_root_ref_sequence(eb, ref);
3544         name_len = btrfs_root_ref_name_len(eb, ref);
3545
3546         if (name_len <= BTRFS_NAME_LEN) {
3547                 len = name_len;
3548                 error = 0;
3549         } else {
3550                 len = BTRFS_NAME_LEN;
3551                 error = REF_ERR_NAME_TOO_LONG;
3552         }
3553         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3554
3555         if (key->type == BTRFS_ROOT_REF_KEY) {
3556                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557                                  index, namebuf, len, key->type, error);
3558         } else {
3559                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560                                  index, namebuf, len, key->type, error);
3561         }
3562         return 0;
3563 }
3564
3565 static void free_corrupt_block(struct cache_extent *cache)
3566 {
3567         struct btrfs_corrupt_block *corrupt;
3568
3569         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3570         free(corrupt);
3571 }
3572
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3574
3575 /*
3576  * Repair the btree of the given root.
3577  *
3578  * The fix is to remove the node key in corrupt_blocks cache_tree.
3579  * and rebalance the tree.
3580  * After the fix, the btree should be writeable.
3581  */
3582 static int repair_btree(struct btrfs_root *root,
3583                         struct cache_tree *corrupt_blocks)
3584 {
3585         struct btrfs_trans_handle *trans;
3586         struct btrfs_path *path;
3587         struct btrfs_corrupt_block *corrupt;
3588         struct cache_extent *cache;
3589         struct btrfs_key key;
3590         u64 offset;
3591         int level;
3592         int ret = 0;
3593
3594         if (cache_tree_empty(corrupt_blocks))
3595                 return 0;
3596
3597         path = btrfs_alloc_path();
3598         if (!path)
3599                 return -ENOMEM;
3600
3601         trans = btrfs_start_transaction(root, 1);
3602         if (IS_ERR(trans)) {
3603                 ret = PTR_ERR(trans);
3604                 fprintf(stderr, "Error starting transaction: %s\n",
3605                         strerror(-ret));
3606                 goto out_free_path;
3607         }
3608         cache = first_cache_extent(corrupt_blocks);
3609         while (cache) {
3610                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3611                                        cache);
3612                 level = corrupt->level;
3613                 path->lowest_level = level;
3614                 key.objectid = corrupt->key.objectid;
3615                 key.type = corrupt->key.type;
3616                 key.offset = corrupt->key.offset;
3617
3618                 /*
3619                  * Here we don't want to do any tree balance, since it may
3620                  * cause a balance with corrupted brother leaf/node,
3621                  * so ins_len set to 0 here.
3622                  * Balance will be done after all corrupt node/leaf is deleted.
3623                  */
3624                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3625                 if (ret < 0)
3626                         goto out;
3627                 offset = btrfs_node_blockptr(path->nodes[level],
3628                                              path->slots[level]);
3629
3630                 /* Remove the ptr */
3631                 ret = btrfs_del_ptr(trans, root, path, level,
3632                                     path->slots[level]);
3633                 if (ret < 0)
3634                         goto out;
3635                 /*
3636                  * Remove the corresponding extent
3637                  * return value is not concerned.
3638                  */
3639                 btrfs_release_path(path);
3640                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641                                         0, root->root_key.objectid,
3642                                         level - 1, 0);
3643                 cache = next_cache_extent(cache);
3644         }
3645
3646         /* Balance the btree using btrfs_search_slot() */
3647         cache = first_cache_extent(corrupt_blocks);
3648         while (cache) {
3649                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3650                                        cache);
3651                 memcpy(&key, &corrupt->key, sizeof(key));
3652                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3653                 if (ret < 0)
3654                         goto out;
3655                 /* return will always >0 since it won't find the item */
3656                 ret = 0;
3657                 btrfs_release_path(path);
3658                 cache = next_cache_extent(cache);
3659         }
3660 out:
3661         btrfs_commit_transaction(trans, root);
3662 out_free_path:
3663         btrfs_free_path(path);
3664         return ret;
3665 }
3666
3667 static int check_fs_root(struct btrfs_root *root,
3668                          struct cache_tree *root_cache,
3669                          struct walk_control *wc)
3670 {
3671         int ret = 0;
3672         int err = 0;
3673         int wret;
3674         int level;
3675         struct btrfs_path path;
3676         struct shared_node root_node;
3677         struct root_record *rec;
3678         struct btrfs_root_item *root_item = &root->root_item;
3679         struct cache_tree corrupt_blocks;
3680         struct orphan_data_extent *orphan;
3681         struct orphan_data_extent *tmp;
3682         enum btrfs_tree_block_status status;
3683         struct node_refs nrefs;
3684
3685         /*
3686          * Reuse the corrupt_block cache tree to record corrupted tree block
3687          *
3688          * Unlike the usage in extent tree check, here we do it in a per
3689          * fs/subvol tree base.
3690          */
3691         cache_tree_init(&corrupt_blocks);
3692         root->fs_info->corrupt_blocks = &corrupt_blocks;
3693
3694         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695                 rec = get_root_rec(root_cache, root->root_key.objectid);
3696                 BUG_ON(IS_ERR(rec));
3697                 if (btrfs_root_refs(root_item) > 0)
3698                         rec->found_root_item = 1;
3699         }
3700
3701         btrfs_init_path(&path);
3702         memset(&root_node, 0, sizeof(root_node));
3703         cache_tree_init(&root_node.root_cache);
3704         cache_tree_init(&root_node.inode_cache);
3705         memset(&nrefs, 0, sizeof(nrefs));
3706
3707         /* Move the orphan extent record to corresponding inode_record */
3708         list_for_each_entry_safe(orphan, tmp,
3709                                  &root->orphan_data_extents, list) {
3710                 struct inode_record *inode;
3711
3712                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3713                                       1);
3714                 BUG_ON(IS_ERR(inode));
3715                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716                 list_move(&orphan->list, &inode->orphan_extents);
3717         }
3718
3719         level = btrfs_header_level(root->node);
3720         memset(wc->nodes, 0, sizeof(wc->nodes));
3721         wc->nodes[level] = &root_node;
3722         wc->active_node = level;
3723         wc->root_level = level;
3724
3725         /* We may not have checked the root block, lets do that now */
3726         if (btrfs_is_leaf(root->node))
3727                 status = btrfs_check_leaf(root, NULL, root->node);
3728         else
3729                 status = btrfs_check_node(root, NULL, root->node);
3730         if (status != BTRFS_TREE_BLOCK_CLEAN)
3731                 return -EIO;
3732
3733         if (btrfs_root_refs(root_item) > 0 ||
3734             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735                 path.nodes[level] = root->node;
3736                 extent_buffer_get(root->node);
3737                 path.slots[level] = 0;
3738         } else {
3739                 struct btrfs_key key;
3740                 struct btrfs_disk_key found_key;
3741
3742                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743                 level = root_item->drop_level;
3744                 path.lowest_level = level;
3745                 if (level > btrfs_header_level(root->node) ||
3746                     level >= BTRFS_MAX_LEVEL) {
3747                         error("ignoring invalid drop level: %u", level);
3748                         goto skip_walking;
3749                 }
3750                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3751                 if (wret < 0)
3752                         goto skip_walking;
3753                 btrfs_node_key(path.nodes[level], &found_key,
3754                                 path.slots[level]);
3755                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3756                                         sizeof(found_key)));
3757         }
3758
3759         while (1) {
3760                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3761                 if (wret < 0)
3762                         ret = wret;
3763                 if (wret != 0)
3764                         break;
3765
3766                 wret = walk_up_tree(root, &path, wc, &level);
3767                 if (wret < 0)
3768                         ret = wret;
3769                 if (wret != 0)
3770                         break;
3771         }
3772 skip_walking:
3773         btrfs_release_path(&path);
3774
3775         if (!cache_tree_empty(&corrupt_blocks)) {
3776                 struct cache_extent *cache;
3777                 struct btrfs_corrupt_block *corrupt;
3778
3779                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3780                        root->root_key.objectid);
3781                 cache = first_cache_extent(&corrupt_blocks);
3782                 while (cache) {
3783                         corrupt = container_of(cache,
3784                                                struct btrfs_corrupt_block,
3785                                                cache);
3786                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3787                                cache->start, corrupt->level,
3788                                corrupt->key.objectid, corrupt->key.type,
3789                                corrupt->key.offset);
3790                         cache = next_cache_extent(cache);
3791                 }
3792                 if (repair) {
3793                         printf("Try to repair the btree for root %llu\n",
3794                                root->root_key.objectid);
3795                         ret = repair_btree(root, &corrupt_blocks);
3796                         if (ret < 0)
3797                                 fprintf(stderr, "Failed to repair btree: %s\n",
3798                                         strerror(-ret));
3799                         if (!ret)
3800                                 printf("Btree for root %llu is fixed\n",
3801                                        root->root_key.objectid);
3802                 }
3803         }
3804
3805         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3806         if (err < 0)
3807                 ret = err;
3808
3809         if (root_node.current) {
3810                 root_node.current->checked = 1;
3811                 maybe_free_inode_rec(&root_node.inode_cache,
3812                                 root_node.current);
3813         }
3814
3815         err = check_inode_recs(root, &root_node.inode_cache);
3816         if (!ret)
3817                 ret = err;
3818
3819         free_corrupt_blocks_tree(&corrupt_blocks);
3820         root->fs_info->corrupt_blocks = NULL;
3821         free_orphan_data_extents(&root->orphan_data_extents);
3822         return ret;
3823 }
3824
3825 static int fs_root_objectid(u64 objectid)
3826 {
3827         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3828             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3829                 return 1;
3830         return is_fstree(objectid);
3831 }
3832
3833 static int check_fs_roots(struct btrfs_root *root,
3834                           struct cache_tree *root_cache)
3835 {
3836         struct btrfs_path path;
3837         struct btrfs_key key;
3838         struct walk_control wc;
3839         struct extent_buffer *leaf, *tree_node;
3840         struct btrfs_root *tmp_root;
3841         struct btrfs_root *tree_root = root->fs_info->tree_root;
3842         int ret;
3843         int err = 0;
3844
3845         if (ctx.progress_enabled) {
3846                 ctx.tp = TASK_FS_ROOTS;
3847                 task_start(ctx.info);
3848         }
3849
3850         /*
3851          * Just in case we made any changes to the extent tree that weren't
3852          * reflected into the free space cache yet.
3853          */
3854         if (repair)
3855                 reset_cached_block_groups(root->fs_info);
3856         memset(&wc, 0, sizeof(wc));
3857         cache_tree_init(&wc.shared);
3858         btrfs_init_path(&path);
3859
3860 again:
3861         key.offset = 0;
3862         key.objectid = 0;
3863         key.type = BTRFS_ROOT_ITEM_KEY;
3864         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3865         if (ret < 0) {
3866                 err = 1;
3867                 goto out;
3868         }
3869         tree_node = tree_root->node;
3870         while (1) {
3871                 if (tree_node != tree_root->node) {
3872                         free_root_recs_tree(root_cache);
3873                         btrfs_release_path(&path);
3874                         goto again;
3875                 }
3876                 leaf = path.nodes[0];
3877                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3878                         ret = btrfs_next_leaf(tree_root, &path);
3879                         if (ret) {
3880                                 if (ret < 0)
3881                                         err = 1;
3882                                 break;
3883                         }
3884                         leaf = path.nodes[0];
3885                 }
3886                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3887                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3888                     fs_root_objectid(key.objectid)) {
3889                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3890                                 tmp_root = btrfs_read_fs_root_no_cache(
3891                                                 root->fs_info, &key);
3892                         } else {
3893                                 key.offset = (u64)-1;
3894                                 tmp_root = btrfs_read_fs_root(
3895                                                 root->fs_info, &key);
3896                         }
3897                         if (IS_ERR(tmp_root)) {
3898                                 err = 1;
3899                                 goto next;
3900                         }
3901                         ret = check_fs_root(tmp_root, root_cache, &wc);
3902                         if (ret == -EAGAIN) {
3903                                 free_root_recs_tree(root_cache);
3904                                 btrfs_release_path(&path);
3905                                 goto again;
3906                         }
3907                         if (ret)
3908                                 err = 1;
3909                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3910                                 btrfs_free_fs_root(tmp_root);
3911                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3912                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3913                         process_root_ref(leaf, path.slots[0], &key,
3914                                          root_cache);
3915                 }
3916 next:
3917                 path.slots[0]++;
3918         }
3919 out:
3920         btrfs_release_path(&path);
3921         if (err)
3922                 free_extent_cache_tree(&wc.shared);
3923         if (!cache_tree_empty(&wc.shared))
3924                 fprintf(stderr, "warning line %d\n", __LINE__);
3925
3926         task_stop(ctx.info);
3927
3928         return err;
3929 }
3930
3931 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3932 {
3933         struct rb_node *n;
3934         struct extent_backref *back;
3935         struct tree_backref *tback;
3936         struct data_backref *dback;
3937         u64 found = 0;
3938         int err = 0;
3939
3940         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3941                 back = rb_node_to_extent_backref(n);
3942                 if (!back->found_extent_tree) {
3943                         err = 1;
3944                         if (!print_errs)
3945                                 goto out;
3946                         if (back->is_data) {
3947                                 dback = to_data_backref(back);
3948                                 fprintf(stderr, "Backref %llu %s %llu"
3949                                         " owner %llu offset %llu num_refs %lu"
3950                                         " not found in extent tree\n",
3951                                         (unsigned long long)rec->start,
3952                                         back->full_backref ?
3953                                         "parent" : "root",
3954                                         back->full_backref ?
3955                                         (unsigned long long)dback->parent:
3956                                         (unsigned long long)dback->root,
3957                                         (unsigned long long)dback->owner,
3958                                         (unsigned long long)dback->offset,
3959                                         (unsigned long)dback->num_refs);
3960                         } else {
3961                                 tback = to_tree_backref(back);
3962                                 fprintf(stderr, "Backref %llu parent %llu"
3963                                         " root %llu not found in extent tree\n",
3964                                         (unsigned long long)rec->start,
3965                                         (unsigned long long)tback->parent,
3966                                         (unsigned long long)tback->root);
3967                         }
3968                 }
3969                 if (!back->is_data && !back->found_ref) {
3970                         err = 1;
3971                         if (!print_errs)
3972                                 goto out;
3973                         tback = to_tree_backref(back);
3974                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3975                                 (unsigned long long)rec->start,
3976                                 back->full_backref ? "parent" : "root",
3977                                 back->full_backref ?
3978                                 (unsigned long long)tback->parent :
3979                                 (unsigned long long)tback->root, back);
3980                 }
3981                 if (back->is_data) {
3982                         dback = to_data_backref(back);
3983                         if (dback->found_ref != dback->num_refs) {
3984                                 err = 1;
3985                                 if (!print_errs)
3986                                         goto out;
3987                                 fprintf(stderr, "Incorrect local backref count"
3988                                         " on %llu %s %llu owner %llu"
3989                                         " offset %llu found %u wanted %u back %p\n",
3990                                         (unsigned long long)rec->start,
3991                                         back->full_backref ?
3992                                         "parent" : "root",
3993                                         back->full_backref ?
3994                                         (unsigned long long)dback->parent:
3995                                         (unsigned long long)dback->root,
3996                                         (unsigned long long)dback->owner,
3997                                         (unsigned long long)dback->offset,
3998                                         dback->found_ref, dback->num_refs, back);
3999                         }
4000                         if (dback->disk_bytenr != rec->start) {
4001                                 err = 1;
4002                                 if (!print_errs)
4003                                         goto out;
4004                                 fprintf(stderr, "Backref disk bytenr does not"
4005                                         " match extent record, bytenr=%llu, "
4006                                         "ref bytenr=%llu\n",
4007                                         (unsigned long long)rec->start,
4008                                         (unsigned long long)dback->disk_bytenr);
4009                         }
4010
4011                         if (dback->bytes != rec->nr) {
4012                                 err = 1;
4013                                 if (!print_errs)
4014                                         goto out;
4015                                 fprintf(stderr, "Backref bytes do not match "
4016                                         "extent backref, bytenr=%llu, ref "
4017                                         "bytes=%llu, backref bytes=%llu\n",
4018                                         (unsigned long long)rec->start,
4019                                         (unsigned long long)rec->nr,
4020                                         (unsigned long long)dback->bytes);
4021                         }
4022                 }
4023                 if (!back->is_data) {
4024                         found += 1;
4025                 } else {
4026                         dback = to_data_backref(back);
4027                         found += dback->found_ref;
4028                 }
4029         }
4030         if (found != rec->refs) {
4031                 err = 1;
4032                 if (!print_errs)
4033                         goto out;
4034                 fprintf(stderr, "Incorrect global backref count "
4035                         "on %llu found %llu wanted %llu\n",
4036                         (unsigned long long)rec->start,
4037                         (unsigned long long)found,
4038                         (unsigned long long)rec->refs);
4039         }
4040 out:
4041         return err;
4042 }
4043
4044 static void __free_one_backref(struct rb_node *node)
4045 {
4046         struct extent_backref *back = rb_node_to_extent_backref(node);
4047
4048         free(back);
4049 }
4050
4051 static void free_all_extent_backrefs(struct extent_record *rec)
4052 {
4053         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4054 }
4055
4056 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4057                                      struct cache_tree *extent_cache)
4058 {
4059         struct cache_extent *cache;
4060         struct extent_record *rec;
4061
4062         while (1) {
4063                 cache = first_cache_extent(extent_cache);
4064                 if (!cache)
4065                         break;
4066                 rec = container_of(cache, struct extent_record, cache);
4067                 remove_cache_extent(extent_cache, cache);
4068                 free_all_extent_backrefs(rec);
4069                 free(rec);
4070         }
4071 }
4072
4073 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4074                                  struct extent_record *rec)
4075 {
4076         if (rec->content_checked && rec->owner_ref_checked &&
4077             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4078             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4079             !rec->bad_full_backref && !rec->crossing_stripes &&
4080             !rec->wrong_chunk_type) {
4081                 remove_cache_extent(extent_cache, &rec->cache);
4082                 free_all_extent_backrefs(rec);
4083                 list_del_init(&rec->list);
4084                 free(rec);
4085         }
4086         return 0;
4087 }
4088
4089 static int check_owner_ref(struct btrfs_root *root,
4090                             struct extent_record *rec,
4091                             struct extent_buffer *buf)
4092 {
4093         struct extent_backref *node, *tmp;
4094         struct tree_backref *back;
4095         struct btrfs_root *ref_root;
4096         struct btrfs_key key;
4097         struct btrfs_path path;
4098         struct extent_buffer *parent;
4099         int level;
4100         int found = 0;
4101         int ret;
4102
4103         rbtree_postorder_for_each_entry_safe(node, tmp,
4104                                              &rec->backref_tree, node) {
4105                 if (node->is_data)
4106                         continue;
4107                 if (!node->found_ref)
4108                         continue;
4109                 if (node->full_backref)
4110                         continue;
4111                 back = to_tree_backref(node);
4112                 if (btrfs_header_owner(buf) == back->root)
4113                         return 0;
4114         }
4115         BUG_ON(rec->is_root);
4116
4117         /* try to find the block by search corresponding fs tree */
4118         key.objectid = btrfs_header_owner(buf);
4119         key.type = BTRFS_ROOT_ITEM_KEY;
4120         key.offset = (u64)-1;
4121
4122         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4123         if (IS_ERR(ref_root))
4124                 return 1;
4125
4126         level = btrfs_header_level(buf);
4127         if (level == 0)
4128                 btrfs_item_key_to_cpu(buf, &key, 0);
4129         else
4130                 btrfs_node_key_to_cpu(buf, &key, 0);
4131
4132         btrfs_init_path(&path);
4133         path.lowest_level = level + 1;
4134         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4135         if (ret < 0)
4136                 return 0;
4137
4138         parent = path.nodes[level + 1];
4139         if (parent && buf->start == btrfs_node_blockptr(parent,
4140                                                         path.slots[level + 1]))
4141                 found = 1;
4142
4143         btrfs_release_path(&path);
4144         return found ? 0 : 1;
4145 }
4146
4147 static int is_extent_tree_record(struct extent_record *rec)
4148 {
4149         struct extent_backref *ref, *tmp;
4150         struct tree_backref *back;
4151         int is_extent = 0;
4152
4153         rbtree_postorder_for_each_entry_safe(ref, tmp,
4154                                              &rec->backref_tree, node) {
4155                 if (ref->is_data)
4156                         return 0;
4157                 back = to_tree_backref(ref);
4158                 if (ref->full_backref)
4159                         return 0;
4160                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4161                         is_extent = 1;
4162         }
4163         return is_extent;
4164 }
4165
4166
4167 static int record_bad_block_io(struct btrfs_fs_info *info,
4168                                struct cache_tree *extent_cache,
4169                                u64 start, u64 len)
4170 {
4171         struct extent_record *rec;
4172         struct cache_extent *cache;
4173         struct btrfs_key key;
4174
4175         cache = lookup_cache_extent(extent_cache, start, len);
4176         if (!cache)
4177                 return 0;
4178
4179         rec = container_of(cache, struct extent_record, cache);
4180         if (!is_extent_tree_record(rec))
4181                 return 0;
4182
4183         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4184         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4185 }
4186
4187 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4188                        struct extent_buffer *buf, int slot)
4189 {
4190         if (btrfs_header_level(buf)) {
4191                 struct btrfs_key_ptr ptr1, ptr2;
4192
4193                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4194                                    sizeof(struct btrfs_key_ptr));
4195                 read_extent_buffer(buf, &ptr2,
4196                                    btrfs_node_key_ptr_offset(slot + 1),
4197                                    sizeof(struct btrfs_key_ptr));
4198                 write_extent_buffer(buf, &ptr1,
4199                                     btrfs_node_key_ptr_offset(slot + 1),
4200                                     sizeof(struct btrfs_key_ptr));
4201                 write_extent_buffer(buf, &ptr2,
4202                                     btrfs_node_key_ptr_offset(slot),
4203                                     sizeof(struct btrfs_key_ptr));
4204                 if (slot == 0) {
4205                         struct btrfs_disk_key key;
4206                         btrfs_node_key(buf, &key, 0);
4207                         btrfs_fixup_low_keys(root, path, &key,
4208                                              btrfs_header_level(buf) + 1);
4209                 }
4210         } else {
4211                 struct btrfs_item *item1, *item2;
4212                 struct btrfs_key k1, k2;
4213                 char *item1_data, *item2_data;
4214                 u32 item1_offset, item2_offset, item1_size, item2_size;
4215
4216                 item1 = btrfs_item_nr(slot);
4217                 item2 = btrfs_item_nr(slot + 1);
4218                 btrfs_item_key_to_cpu(buf, &k1, slot);
4219                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4220                 item1_offset = btrfs_item_offset(buf, item1);
4221                 item2_offset = btrfs_item_offset(buf, item2);
4222                 item1_size = btrfs_item_size(buf, item1);
4223                 item2_size = btrfs_item_size(buf, item2);
4224
4225                 item1_data = malloc(item1_size);
4226                 if (!item1_data)
4227                         return -ENOMEM;
4228                 item2_data = malloc(item2_size);
4229                 if (!item2_data) {
4230                         free(item1_data);
4231                         return -ENOMEM;
4232                 }
4233
4234                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4235                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4236
4237                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4238                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4239                 free(item1_data);
4240                 free(item2_data);
4241
4242                 btrfs_set_item_offset(buf, item1, item2_offset);
4243                 btrfs_set_item_offset(buf, item2, item1_offset);
4244                 btrfs_set_item_size(buf, item1, item2_size);
4245                 btrfs_set_item_size(buf, item2, item1_size);
4246
4247                 path->slots[0] = slot;
4248                 btrfs_set_item_key_unsafe(root, path, &k2);
4249                 path->slots[0] = slot + 1;
4250                 btrfs_set_item_key_unsafe(root, path, &k1);
4251         }
4252         return 0;
4253 }
4254
4255 static int fix_key_order(struct btrfs_trans_handle *trans,
4256                          struct btrfs_root *root,
4257                          struct btrfs_path *path)
4258 {
4259         struct extent_buffer *buf;
4260         struct btrfs_key k1, k2;
4261         int i;
4262         int level = path->lowest_level;
4263         int ret = -EIO;
4264
4265         buf = path->nodes[level];
4266         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4267                 if (level) {
4268                         btrfs_node_key_to_cpu(buf, &k1, i);
4269                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4270                 } else {
4271                         btrfs_item_key_to_cpu(buf, &k1, i);
4272                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4273                 }
4274                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4275                         continue;
4276                 ret = swap_values(root, path, buf, i);
4277                 if (ret)
4278                         break;
4279                 btrfs_mark_buffer_dirty(buf);
4280                 i = 0;
4281         }
4282         return ret;
4283 }
4284
4285 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4286                              struct btrfs_root *root,
4287                              struct btrfs_path *path,
4288                              struct extent_buffer *buf, int slot)
4289 {
4290         struct btrfs_key key;
4291         int nritems = btrfs_header_nritems(buf);
4292
4293         btrfs_item_key_to_cpu(buf, &key, slot);
4294
4295         /* These are all the keys we can deal with missing. */
4296         if (key.type != BTRFS_DIR_INDEX_KEY &&
4297             key.type != BTRFS_EXTENT_ITEM_KEY &&
4298             key.type != BTRFS_METADATA_ITEM_KEY &&
4299             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4300             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4301                 return -1;
4302
4303         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4304                (unsigned long long)key.objectid, key.type,
4305                (unsigned long long)key.offset, slot, buf->start);
4306         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4307                               btrfs_item_nr_offset(slot + 1),
4308                               sizeof(struct btrfs_item) *
4309                               (nritems - slot - 1));
4310         btrfs_set_header_nritems(buf, nritems - 1);
4311         if (slot == 0) {
4312                 struct btrfs_disk_key disk_key;
4313
4314                 btrfs_item_key(buf, &disk_key, 0);
4315                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4316         }
4317         btrfs_mark_buffer_dirty(buf);
4318         return 0;
4319 }
4320
4321 static int fix_item_offset(struct btrfs_trans_handle *trans,
4322                            struct btrfs_root *root,
4323                            struct btrfs_path *path)
4324 {
4325         struct extent_buffer *buf;
4326         int i;
4327         int ret = 0;
4328
4329         /* We should only get this for leaves */
4330         BUG_ON(path->lowest_level);
4331         buf = path->nodes[0];
4332 again:
4333         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4334                 unsigned int shift = 0, offset;
4335
4336                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4337                     BTRFS_LEAF_DATA_SIZE(root)) {
4338                         if (btrfs_item_end_nr(buf, i) >
4339                             BTRFS_LEAF_DATA_SIZE(root)) {
4340                                 ret = delete_bogus_item(trans, root, path,
4341                                                         buf, i);
4342                                 if (!ret)
4343                                         goto again;
4344                                 fprintf(stderr, "item is off the end of the "
4345                                         "leaf, can't fix\n");
4346                                 ret = -EIO;
4347                                 break;
4348                         }
4349                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4350                                 btrfs_item_end_nr(buf, i);
4351                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4352                            btrfs_item_offset_nr(buf, i - 1)) {
4353                         if (btrfs_item_end_nr(buf, i) >
4354                             btrfs_item_offset_nr(buf, i - 1)) {
4355                                 ret = delete_bogus_item(trans, root, path,
4356                                                         buf, i);
4357                                 if (!ret)
4358                                         goto again;
4359                                 fprintf(stderr, "items overlap, can't fix\n");
4360                                 ret = -EIO;
4361                                 break;
4362                         }
4363                         shift = btrfs_item_offset_nr(buf, i - 1) -
4364                                 btrfs_item_end_nr(buf, i);
4365                 }
4366                 if (!shift)
4367                         continue;
4368
4369                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4370                        i, shift, (unsigned long long)buf->start);
4371                 offset = btrfs_item_offset_nr(buf, i);
4372                 memmove_extent_buffer(buf,
4373                                       btrfs_leaf_data(buf) + offset + shift,
4374                                       btrfs_leaf_data(buf) + offset,
4375                                       btrfs_item_size_nr(buf, i));
4376                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4377                                       offset + shift);
4378                 btrfs_mark_buffer_dirty(buf);
4379         }
4380
4381         /*
4382          * We may have moved things, in which case we want to exit so we don't
4383          * write those changes out.  Once we have proper abort functionality in
4384          * progs this can be changed to something nicer.
4385          */
4386         BUG_ON(ret);
4387         return ret;
4388 }
4389
4390 /*
4391  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4392  * then just return -EIO.
4393  */
4394 static int try_to_fix_bad_block(struct btrfs_root *root,
4395                                 struct extent_buffer *buf,
4396                                 enum btrfs_tree_block_status status)
4397 {
4398         struct btrfs_trans_handle *trans;
4399         struct ulist *roots;
4400         struct ulist_node *node;
4401         struct btrfs_root *search_root;
4402         struct btrfs_path *path;
4403         struct ulist_iterator iter;
4404         struct btrfs_key root_key, key;
4405         int ret;
4406
4407         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4408             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4409                 return -EIO;
4410
4411         path = btrfs_alloc_path();
4412         if (!path)
4413                 return -EIO;
4414
4415         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4416                                    0, &roots);
4417         if (ret) {
4418                 btrfs_free_path(path);
4419                 return -EIO;
4420         }
4421
4422         ULIST_ITER_INIT(&iter);
4423         while ((node = ulist_next(roots, &iter))) {
4424                 root_key.objectid = node->val;
4425                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4426                 root_key.offset = (u64)-1;
4427
4428                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4429                 if (IS_ERR(root)) {
4430                         ret = -EIO;
4431                         break;
4432                 }
4433
4434
4435                 trans = btrfs_start_transaction(search_root, 0);
4436                 if (IS_ERR(trans)) {
4437                         ret = PTR_ERR(trans);
4438                         break;
4439                 }
4440
4441                 path->lowest_level = btrfs_header_level(buf);
4442                 path->skip_check_block = 1;
4443                 if (path->lowest_level)
4444                         btrfs_node_key_to_cpu(buf, &key, 0);
4445                 else
4446                         btrfs_item_key_to_cpu(buf, &key, 0);
4447                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4448                 if (ret) {
4449                         ret = -EIO;
4450                         btrfs_commit_transaction(trans, search_root);
4451                         break;
4452                 }
4453                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4454                         ret = fix_key_order(trans, search_root, path);
4455                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4456                         ret = fix_item_offset(trans, search_root, path);
4457                 if (ret) {
4458                         btrfs_commit_transaction(trans, search_root);
4459                         break;
4460                 }
4461                 btrfs_release_path(path);
4462                 btrfs_commit_transaction(trans, search_root);
4463         }
4464         ulist_free(roots);
4465         btrfs_free_path(path);
4466         return ret;
4467 }
4468
4469 static int check_block(struct btrfs_root *root,
4470                        struct cache_tree *extent_cache,
4471                        struct extent_buffer *buf, u64 flags)
4472 {
4473         struct extent_record *rec;
4474         struct cache_extent *cache;
4475         struct btrfs_key key;
4476         enum btrfs_tree_block_status status;
4477         int ret = 0;
4478         int level;
4479
4480         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4481         if (!cache)
4482                 return 1;
4483         rec = container_of(cache, struct extent_record, cache);
4484         rec->generation = btrfs_header_generation(buf);
4485
4486         level = btrfs_header_level(buf);
4487         if (btrfs_header_nritems(buf) > 0) {
4488
4489                 if (level == 0)
4490                         btrfs_item_key_to_cpu(buf, &key, 0);
4491                 else
4492                         btrfs_node_key_to_cpu(buf, &key, 0);
4493
4494                 rec->info_objectid = key.objectid;
4495         }
4496         rec->info_level = level;
4497
4498         if (btrfs_is_leaf(buf))
4499                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4500         else
4501                 status = btrfs_check_node(root, &rec->parent_key, buf);
4502
4503         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4504                 if (repair)
4505                         status = try_to_fix_bad_block(root, buf, status);
4506                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4507                         ret = -EIO;
4508                         fprintf(stderr, "bad block %llu\n",
4509                                 (unsigned long long)buf->start);
4510                 } else {
4511                         /*
4512                          * Signal to callers we need to start the scan over
4513                          * again since we'll have cowed blocks.
4514                          */
4515                         ret = -EAGAIN;
4516                 }
4517         } else {
4518                 rec->content_checked = 1;
4519                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4520                         rec->owner_ref_checked = 1;
4521                 else {
4522                         ret = check_owner_ref(root, rec, buf);
4523                         if (!ret)
4524                                 rec->owner_ref_checked = 1;
4525                 }
4526         }
4527         if (!ret)
4528                 maybe_free_extent_rec(extent_cache, rec);
4529         return ret;
4530 }
4531
4532
4533 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4534                                                 u64 parent, u64 root)
4535 {
4536         struct rb_node *node;
4537         struct tree_backref *back = NULL;
4538         struct tree_backref match = {
4539                 .node = {
4540                         .is_data = 0,
4541                 },
4542         };
4543
4544         if (parent) {
4545                 match.parent = parent;
4546                 match.node.full_backref = 1;
4547         } else {
4548                 match.root = root;
4549         }
4550
4551         node = rb_search(&rec->backref_tree, &match.node.node,
4552                          (rb_compare_keys)compare_extent_backref, NULL);
4553         if (node)
4554                 back = to_tree_backref(rb_node_to_extent_backref(node));
4555
4556         return back;
4557 }
4558
4559 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4560                                                 u64 parent, u64 root)
4561 {
4562         struct tree_backref *ref = malloc(sizeof(*ref));
4563
4564         if (!ref)
4565                 return NULL;
4566         memset(&ref->node, 0, sizeof(ref->node));
4567         if (parent > 0) {
4568                 ref->parent = parent;
4569                 ref->node.full_backref = 1;
4570         } else {
4571                 ref->root = root;
4572                 ref->node.full_backref = 0;
4573         }
4574         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4575
4576         return ref;
4577 }
4578
4579 static struct data_backref *find_data_backref(struct extent_record *rec,
4580                                                 u64 parent, u64 root,
4581                                                 u64 owner, u64 offset,
4582                                                 int found_ref,
4583                                                 u64 disk_bytenr, u64 bytes)
4584 {
4585         struct rb_node *node;
4586         struct data_backref *back = NULL;
4587         struct data_backref match = {
4588                 .node = {
4589                         .is_data = 1,
4590                 },
4591                 .owner = owner,
4592                 .offset = offset,
4593                 .bytes = bytes,
4594                 .found_ref = found_ref,
4595                 .disk_bytenr = disk_bytenr,
4596         };
4597
4598         if (parent) {
4599                 match.parent = parent;
4600                 match.node.full_backref = 1;
4601         } else {
4602                 match.root = root;
4603         }
4604
4605         node = rb_search(&rec->backref_tree, &match.node.node,
4606                          (rb_compare_keys)compare_extent_backref, NULL);
4607         if (node)
4608                 back = to_data_backref(rb_node_to_extent_backref(node));
4609
4610         return back;
4611 }
4612
4613 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4614                                                 u64 parent, u64 root,
4615                                                 u64 owner, u64 offset,
4616                                                 u64 max_size)
4617 {
4618         struct data_backref *ref = malloc(sizeof(*ref));
4619
4620         if (!ref)
4621                 return NULL;
4622         memset(&ref->node, 0, sizeof(ref->node));
4623         ref->node.is_data = 1;
4624
4625         if (parent > 0) {
4626                 ref->parent = parent;
4627                 ref->owner = 0;
4628                 ref->offset = 0;
4629                 ref->node.full_backref = 1;
4630         } else {
4631                 ref->root = root;
4632                 ref->owner = owner;
4633                 ref->offset = offset;
4634                 ref->node.full_backref = 0;
4635         }
4636         ref->bytes = max_size;
4637         ref->found_ref = 0;
4638         ref->num_refs = 0;
4639         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4640         if (max_size > rec->max_size)
4641                 rec->max_size = max_size;
4642         return ref;
4643 }
4644
4645 /* Check if the type of extent matches with its chunk */
4646 static void check_extent_type(struct extent_record *rec)
4647 {
4648         struct btrfs_block_group_cache *bg_cache;
4649
4650         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4651         if (!bg_cache)
4652                 return;
4653
4654         /* data extent, check chunk directly*/
4655         if (!rec->metadata) {
4656                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4657                         rec->wrong_chunk_type = 1;
4658                 return;
4659         }
4660
4661         /* metadata extent, check the obvious case first */
4662         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4663                                  BTRFS_BLOCK_GROUP_METADATA))) {
4664                 rec->wrong_chunk_type = 1;
4665                 return;
4666         }
4667
4668         /*
4669          * Check SYSTEM extent, as it's also marked as metadata, we can only
4670          * make sure it's a SYSTEM extent by its backref
4671          */
4672         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4673                 struct extent_backref *node;
4674                 struct tree_backref *tback;
4675                 u64 bg_type;
4676
4677                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4678                 if (node->is_data) {
4679                         /* tree block shouldn't have data backref */
4680                         rec->wrong_chunk_type = 1;
4681                         return;
4682                 }
4683                 tback = container_of(node, struct tree_backref, node);
4684
4685                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4686                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4687                 else
4688                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4689                 if (!(bg_cache->flags & bg_type))
4690                         rec->wrong_chunk_type = 1;
4691         }
4692 }
4693
4694 /*
4695  * Allocate a new extent record, fill default values from @tmpl and insert int
4696  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4697  * the cache, otherwise it fails.
4698  */
4699 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4700                 struct extent_record *tmpl)
4701 {
4702         struct extent_record *rec;
4703         int ret = 0;
4704
4705         rec = malloc(sizeof(*rec));
4706         if (!rec)
4707                 return -ENOMEM;
4708         rec->start = tmpl->start;
4709         rec->max_size = tmpl->max_size;
4710         rec->nr = max(tmpl->nr, tmpl->max_size);
4711         rec->found_rec = tmpl->found_rec;
4712         rec->content_checked = tmpl->content_checked;
4713         rec->owner_ref_checked = tmpl->owner_ref_checked;
4714         rec->num_duplicates = 0;
4715         rec->metadata = tmpl->metadata;
4716         rec->flag_block_full_backref = FLAG_UNSET;
4717         rec->bad_full_backref = 0;
4718         rec->crossing_stripes = 0;
4719         rec->wrong_chunk_type = 0;
4720         rec->is_root = tmpl->is_root;
4721         rec->refs = tmpl->refs;
4722         rec->extent_item_refs = tmpl->extent_item_refs;
4723         rec->parent_generation = tmpl->parent_generation;
4724         INIT_LIST_HEAD(&rec->backrefs);
4725         INIT_LIST_HEAD(&rec->dups);
4726         INIT_LIST_HEAD(&rec->list);
4727         rec->backref_tree = RB_ROOT;
4728         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4729         rec->cache.start = tmpl->start;
4730         rec->cache.size = tmpl->nr;
4731         ret = insert_cache_extent(extent_cache, &rec->cache);
4732         BUG_ON(ret);
4733         bytes_used += rec->nr;
4734
4735         if (tmpl->metadata)
4736                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4737                                 global_info->tree_root->nodesize);
4738         check_extent_type(rec);
4739         return ret;
4740 }
4741
4742 /*
4743  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4744  * some are hints:
4745  * - refs              - if found, increase refs
4746  * - is_root           - if found, set
4747  * - content_checked   - if found, set
4748  * - owner_ref_checked - if found, set
4749  *
4750  * If not found, create a new one, initialize and insert.
4751  */
4752 static int add_extent_rec(struct cache_tree *extent_cache,
4753                 struct extent_record *tmpl)
4754 {
4755         struct extent_record *rec;
4756         struct cache_extent *cache;
4757         int ret = 0;
4758         int dup = 0;
4759
4760         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4761         if (cache) {
4762                 rec = container_of(cache, struct extent_record, cache);
4763                 if (tmpl->refs)
4764                         rec->refs++;
4765                 if (rec->nr == 1)
4766                         rec->nr = max(tmpl->nr, tmpl->max_size);
4767
4768                 /*
4769                  * We need to make sure to reset nr to whatever the extent
4770                  * record says was the real size, this way we can compare it to
4771                  * the backrefs.
4772                  */
4773                 if (tmpl->found_rec) {
4774                         if (tmpl->start != rec->start || rec->found_rec) {
4775                                 struct extent_record *tmp;
4776
4777                                 dup = 1;
4778                                 if (list_empty(&rec->list))
4779                                         list_add_tail(&rec->list,
4780                                                       &duplicate_extents);
4781
4782                                 /*
4783                                  * We have to do this song and dance in case we
4784                                  * find an extent record that falls inside of
4785                                  * our current extent record but does not have
4786                                  * the same objectid.
4787                                  */
4788                                 tmp = malloc(sizeof(*tmp));
4789                                 if (!tmp)
4790                                         return -ENOMEM;
4791                                 tmp->start = tmpl->start;
4792                                 tmp->max_size = tmpl->max_size;
4793                                 tmp->nr = tmpl->nr;
4794                                 tmp->found_rec = 1;
4795                                 tmp->metadata = tmpl->metadata;
4796                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4797                                 INIT_LIST_HEAD(&tmp->list);
4798                                 list_add_tail(&tmp->list, &rec->dups);
4799                                 rec->num_duplicates++;
4800                         } else {
4801                                 rec->nr = tmpl->nr;
4802                                 rec->found_rec = 1;
4803                         }
4804                 }
4805
4806                 if (tmpl->extent_item_refs && !dup) {
4807                         if (rec->extent_item_refs) {
4808                                 fprintf(stderr, "block %llu rec "
4809                                         "extent_item_refs %llu, passed %llu\n",
4810                                         (unsigned long long)tmpl->start,
4811                                         (unsigned long long)
4812                                                         rec->extent_item_refs,
4813                                         (unsigned long long)tmpl->extent_item_refs);
4814                         }
4815                         rec->extent_item_refs = tmpl->extent_item_refs;
4816                 }
4817                 if (tmpl->is_root)
4818                         rec->is_root = 1;
4819                 if (tmpl->content_checked)
4820                         rec->content_checked = 1;
4821                 if (tmpl->owner_ref_checked)
4822                         rec->owner_ref_checked = 1;
4823                 memcpy(&rec->parent_key, &tmpl->parent_key,
4824                                 sizeof(tmpl->parent_key));
4825                 if (tmpl->parent_generation)
4826                         rec->parent_generation = tmpl->parent_generation;
4827                 if (rec->max_size < tmpl->max_size)
4828                         rec->max_size = tmpl->max_size;
4829
4830                 /*
4831                  * A metadata extent can't cross stripe_len boundary, otherwise
4832                  * kernel scrub won't be able to handle it.
4833                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4834                  * it.
4835                  */
4836                 if (tmpl->metadata)
4837                         rec->crossing_stripes = check_crossing_stripes(
4838                                 rec->start, global_info->tree_root->nodesize);
4839                 check_extent_type(rec);
4840                 maybe_free_extent_rec(extent_cache, rec);
4841                 return ret;
4842         }
4843
4844         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4845
4846         return ret;
4847 }
4848
4849 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4850                             u64 parent, u64 root, int found_ref)
4851 {
4852         struct extent_record *rec;
4853         struct tree_backref *back;
4854         struct cache_extent *cache;
4855
4856         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4857         if (!cache) {
4858                 struct extent_record tmpl;
4859
4860                 memset(&tmpl, 0, sizeof(tmpl));
4861                 tmpl.start = bytenr;
4862                 tmpl.nr = 1;
4863                 tmpl.metadata = 1;
4864
4865                 add_extent_rec_nolookup(extent_cache, &tmpl);
4866
4867                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4868                 if (!cache)
4869                         abort();
4870         }
4871
4872         rec = container_of(cache, struct extent_record, cache);
4873         if (rec->start != bytenr) {
4874                 abort();
4875         }
4876
4877         back = find_tree_backref(rec, parent, root);
4878         if (!back) {
4879                 back = alloc_tree_backref(rec, parent, root);
4880                 BUG_ON(!back);
4881         }
4882
4883         if (found_ref) {
4884                 if (back->node.found_ref) {
4885                         fprintf(stderr, "Extent back ref already exists "
4886                                 "for %llu parent %llu root %llu \n",
4887                                 (unsigned long long)bytenr,
4888                                 (unsigned long long)parent,
4889                                 (unsigned long long)root);
4890                 }
4891                 back->node.found_ref = 1;
4892         } else {
4893                 if (back->node.found_extent_tree) {
4894                         fprintf(stderr, "Extent back ref already exists "
4895                                 "for %llu parent %llu root %llu \n",
4896                                 (unsigned long long)bytenr,
4897                                 (unsigned long long)parent,
4898                                 (unsigned long long)root);
4899                 }
4900                 back->node.found_extent_tree = 1;
4901         }
4902         check_extent_type(rec);
4903         maybe_free_extent_rec(extent_cache, rec);
4904         return 0;
4905 }
4906
4907 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4908                             u64 parent, u64 root, u64 owner, u64 offset,
4909                             u32 num_refs, int found_ref, u64 max_size)
4910 {
4911         struct extent_record *rec;
4912         struct data_backref *back;
4913         struct cache_extent *cache;
4914
4915         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4916         if (!cache) {
4917                 struct extent_record tmpl;
4918
4919                 memset(&tmpl, 0, sizeof(tmpl));
4920                 tmpl.start = bytenr;
4921                 tmpl.nr = 1;
4922                 tmpl.max_size = max_size;
4923
4924                 add_extent_rec_nolookup(extent_cache, &tmpl);
4925
4926                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4927                 if (!cache)
4928                         abort();
4929         }
4930
4931         rec = container_of(cache, struct extent_record, cache);
4932         if (rec->max_size < max_size)
4933                 rec->max_size = max_size;
4934
4935         /*
4936          * If found_ref is set then max_size is the real size and must match the
4937          * existing refs.  So if we have already found a ref then we need to
4938          * make sure that this ref matches the existing one, otherwise we need
4939          * to add a new backref so we can notice that the backrefs don't match
4940          * and we need to figure out who is telling the truth.  This is to
4941          * account for that awful fsync bug I introduced where we'd end up with
4942          * a btrfs_file_extent_item that would have its length include multiple
4943          * prealloc extents or point inside of a prealloc extent.
4944          */
4945         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4946                                  bytenr, max_size);
4947         if (!back) {
4948                 back = alloc_data_backref(rec, parent, root, owner, offset,
4949                                           max_size);
4950                 BUG_ON(!back);
4951         }
4952
4953         if (found_ref) {
4954                 BUG_ON(num_refs != 1);
4955                 if (back->node.found_ref)
4956                         BUG_ON(back->bytes != max_size);
4957                 back->node.found_ref = 1;
4958                 back->found_ref += 1;
4959                 back->bytes = max_size;
4960                 back->disk_bytenr = bytenr;
4961                 rec->refs += 1;
4962                 rec->content_checked = 1;
4963                 rec->owner_ref_checked = 1;
4964         } else {
4965                 if (back->node.found_extent_tree) {
4966                         fprintf(stderr, "Extent back ref already exists "
4967                                 "for %llu parent %llu root %llu "
4968                                 "owner %llu offset %llu num_refs %lu\n",
4969                                 (unsigned long long)bytenr,
4970                                 (unsigned long long)parent,
4971                                 (unsigned long long)root,
4972                                 (unsigned long long)owner,
4973                                 (unsigned long long)offset,
4974                                 (unsigned long)num_refs);
4975                 }
4976                 back->num_refs = num_refs;
4977                 back->node.found_extent_tree = 1;
4978         }
4979         maybe_free_extent_rec(extent_cache, rec);
4980         return 0;
4981 }
4982
4983 static int add_pending(struct cache_tree *pending,
4984                        struct cache_tree *seen, u64 bytenr, u32 size)
4985 {
4986         int ret;
4987         ret = add_cache_extent(seen, bytenr, size);
4988         if (ret)
4989                 return ret;
4990         add_cache_extent(pending, bytenr, size);
4991         return 0;
4992 }
4993
4994 static int pick_next_pending(struct cache_tree *pending,
4995                         struct cache_tree *reada,
4996                         struct cache_tree *nodes,
4997                         u64 last, struct block_info *bits, int bits_nr,
4998                         int *reada_bits)
4999 {
5000         unsigned long node_start = last;
5001         struct cache_extent *cache;
5002         int ret;
5003
5004         cache = search_cache_extent(reada, 0);
5005         if (cache) {
5006                 bits[0].start = cache->start;
5007                 bits[0].size = cache->size;
5008                 *reada_bits = 1;
5009                 return 1;
5010         }
5011         *reada_bits = 0;
5012         if (node_start > 32768)
5013                 node_start -= 32768;
5014
5015         cache = search_cache_extent(nodes, node_start);
5016         if (!cache)
5017                 cache = search_cache_extent(nodes, 0);
5018
5019         if (!cache) {
5020                  cache = search_cache_extent(pending, 0);
5021                  if (!cache)
5022                          return 0;
5023                  ret = 0;
5024                  do {
5025                          bits[ret].start = cache->start;
5026                          bits[ret].size = cache->size;
5027                          cache = next_cache_extent(cache);
5028                          ret++;
5029                  } while (cache && ret < bits_nr);
5030                  return ret;
5031         }
5032
5033         ret = 0;
5034         do {
5035                 bits[ret].start = cache->start;
5036                 bits[ret].size = cache->size;
5037                 cache = next_cache_extent(cache);
5038                 ret++;
5039         } while (cache && ret < bits_nr);
5040
5041         if (bits_nr - ret > 8) {
5042                 u64 lookup = bits[0].start + bits[0].size;
5043                 struct cache_extent *next;
5044                 next = search_cache_extent(pending, lookup);
5045                 while(next) {
5046                         if (next->start - lookup > 32768)
5047                                 break;
5048                         bits[ret].start = next->start;
5049                         bits[ret].size = next->size;
5050                         lookup = next->start + next->size;
5051                         ret++;
5052                         if (ret == bits_nr)
5053                                 break;
5054                         next = next_cache_extent(next);
5055                         if (!next)
5056                                 break;
5057                 }
5058         }
5059         return ret;
5060 }
5061
5062 static void free_chunk_record(struct cache_extent *cache)
5063 {
5064         struct chunk_record *rec;
5065
5066         rec = container_of(cache, struct chunk_record, cache);
5067         list_del_init(&rec->list);
5068         list_del_init(&rec->dextents);
5069         free(rec);
5070 }
5071
5072 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5073 {
5074         cache_tree_free_extents(chunk_cache, free_chunk_record);
5075 }
5076
5077 static void free_device_record(struct rb_node *node)
5078 {
5079         struct device_record *rec;
5080
5081         rec = container_of(node, struct device_record, node);
5082         free(rec);
5083 }
5084
5085 FREE_RB_BASED_TREE(device_cache, free_device_record);
5086
5087 int insert_block_group_record(struct block_group_tree *tree,
5088                               struct block_group_record *bg_rec)
5089 {
5090         int ret;
5091
5092         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5093         if (ret)
5094                 return ret;
5095
5096         list_add_tail(&bg_rec->list, &tree->block_groups);
5097         return 0;
5098 }
5099
5100 static void free_block_group_record(struct cache_extent *cache)
5101 {
5102         struct block_group_record *rec;
5103
5104         rec = container_of(cache, struct block_group_record, cache);
5105         list_del_init(&rec->list);
5106         free(rec);
5107 }
5108
5109 void free_block_group_tree(struct block_group_tree *tree)
5110 {
5111         cache_tree_free_extents(&tree->tree, free_block_group_record);
5112 }
5113
5114 int insert_device_extent_record(struct device_extent_tree *tree,
5115                                 struct device_extent_record *de_rec)
5116 {
5117         int ret;
5118
5119         /*
5120          * Device extent is a bit different from the other extents, because
5121          * the extents which belong to the different devices may have the
5122          * same start and size, so we need use the special extent cache
5123          * search/insert functions.
5124          */
5125         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5126         if (ret)
5127                 return ret;
5128
5129         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5130         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5131         return 0;
5132 }
5133
5134 static void free_device_extent_record(struct cache_extent *cache)
5135 {
5136         struct device_extent_record *rec;
5137
5138         rec = container_of(cache, struct device_extent_record, cache);
5139         if (!list_empty(&rec->chunk_list))
5140                 list_del_init(&rec->chunk_list);
5141         if (!list_empty(&rec->device_list))
5142                 list_del_init(&rec->device_list);
5143         free(rec);
5144 }
5145
5146 void free_device_extent_tree(struct device_extent_tree *tree)
5147 {
5148         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5149 }
5150
5151 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5152 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5153                                  struct extent_buffer *leaf, int slot)
5154 {
5155         struct btrfs_extent_ref_v0 *ref0;
5156         struct btrfs_key key;
5157
5158         btrfs_item_key_to_cpu(leaf, &key, slot);
5159         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5160         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5161                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5162         } else {
5163                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5164                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5165         }
5166         return 0;
5167 }
5168 #endif
5169
5170 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5171                                             struct btrfs_key *key,
5172                                             int slot)
5173 {
5174         struct btrfs_chunk *ptr;
5175         struct chunk_record *rec;
5176         int num_stripes, i;
5177
5178         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5179         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5180
5181         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5182         if (!rec) {
5183                 fprintf(stderr, "memory allocation failed\n");
5184                 exit(-1);
5185         }
5186
5187         INIT_LIST_HEAD(&rec->list);
5188         INIT_LIST_HEAD(&rec->dextents);
5189         rec->bg_rec = NULL;
5190
5191         rec->cache.start = key->offset;
5192         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5193
5194         rec->generation = btrfs_header_generation(leaf);
5195
5196         rec->objectid = key->objectid;
5197         rec->type = key->type;
5198         rec->offset = key->offset;
5199
5200         rec->length = rec->cache.size;
5201         rec->owner = btrfs_chunk_owner(leaf, ptr);
5202         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5203         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5204         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5205         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5206         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5207         rec->num_stripes = num_stripes;
5208         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5209
5210         for (i = 0; i < rec->num_stripes; ++i) {
5211                 rec->stripes[i].devid =
5212                         btrfs_stripe_devid_nr(leaf, ptr, i);
5213                 rec->stripes[i].offset =
5214                         btrfs_stripe_offset_nr(leaf, ptr, i);
5215                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5216                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5217                                 BTRFS_UUID_SIZE);
5218         }
5219
5220         return rec;
5221 }
5222
5223 static int process_chunk_item(struct cache_tree *chunk_cache,
5224                               struct btrfs_key *key, struct extent_buffer *eb,
5225                               int slot)
5226 {
5227         struct chunk_record *rec;
5228         struct btrfs_chunk *chunk;
5229         int ret = 0;
5230
5231         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5232         /*
5233          * Do extra check for this chunk item,
5234          *
5235          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5236          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5237          * and owner<->key_type check.
5238          */
5239         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5240                                       key->offset);
5241         if (ret < 0) {
5242                 error("chunk(%llu, %llu) is not valid, ignore it",
5243                       key->offset, btrfs_chunk_length(eb, chunk));
5244                 return 0;
5245         }
5246         rec = btrfs_new_chunk_record(eb, key, slot);
5247         ret = insert_cache_extent(chunk_cache, &rec->cache);
5248         if (ret) {
5249                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5250                         rec->offset, rec->length);
5251                 free(rec);
5252         }
5253
5254         return ret;
5255 }
5256
5257 static int process_device_item(struct rb_root *dev_cache,
5258                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5259 {
5260         struct btrfs_dev_item *ptr;
5261         struct device_record *rec;
5262         int ret = 0;
5263
5264         ptr = btrfs_item_ptr(eb,
5265                 slot, struct btrfs_dev_item);
5266
5267         rec = malloc(sizeof(*rec));
5268         if (!rec) {
5269                 fprintf(stderr, "memory allocation failed\n");
5270                 return -ENOMEM;
5271         }
5272
5273         rec->devid = key->offset;
5274         rec->generation = btrfs_header_generation(eb);
5275
5276         rec->objectid = key->objectid;
5277         rec->type = key->type;
5278         rec->offset = key->offset;
5279
5280         rec->devid = btrfs_device_id(eb, ptr);
5281         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5282         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5283
5284         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5285         if (ret) {
5286                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5287                 free(rec);
5288         }
5289
5290         return ret;
5291 }
5292
5293 struct block_group_record *
5294 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5295                              int slot)
5296 {
5297         struct btrfs_block_group_item *ptr;
5298         struct block_group_record *rec;
5299
5300         rec = calloc(1, sizeof(*rec));
5301         if (!rec) {
5302                 fprintf(stderr, "memory allocation failed\n");
5303                 exit(-1);
5304         }
5305
5306         rec->cache.start = key->objectid;
5307         rec->cache.size = key->offset;
5308
5309         rec->generation = btrfs_header_generation(leaf);
5310
5311         rec->objectid = key->objectid;
5312         rec->type = key->type;
5313         rec->offset = key->offset;
5314
5315         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5316         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5317
5318         INIT_LIST_HEAD(&rec->list);
5319
5320         return rec;
5321 }
5322
5323 static int process_block_group_item(struct block_group_tree *block_group_cache,
5324                                     struct btrfs_key *key,
5325                                     struct extent_buffer *eb, int slot)
5326 {
5327         struct block_group_record *rec;
5328         int ret = 0;
5329
5330         rec = btrfs_new_block_group_record(eb, key, slot);
5331         ret = insert_block_group_record(block_group_cache, rec);
5332         if (ret) {
5333                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5334                         rec->objectid, rec->offset);
5335                 free(rec);
5336         }
5337
5338         return ret;
5339 }
5340
5341 struct device_extent_record *
5342 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5343                                struct btrfs_key *key, int slot)
5344 {
5345         struct device_extent_record *rec;
5346         struct btrfs_dev_extent *ptr;
5347
5348         rec = calloc(1, sizeof(*rec));
5349         if (!rec) {
5350                 fprintf(stderr, "memory allocation failed\n");
5351                 exit(-1);
5352         }
5353
5354         rec->cache.objectid = key->objectid;
5355         rec->cache.start = key->offset;
5356
5357         rec->generation = btrfs_header_generation(leaf);
5358
5359         rec->objectid = key->objectid;
5360         rec->type = key->type;
5361         rec->offset = key->offset;
5362
5363         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5364         rec->chunk_objecteid =
5365                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5366         rec->chunk_offset =
5367                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5368         rec->length = btrfs_dev_extent_length(leaf, ptr);
5369         rec->cache.size = rec->length;
5370
5371         INIT_LIST_HEAD(&rec->chunk_list);
5372         INIT_LIST_HEAD(&rec->device_list);
5373
5374         return rec;
5375 }
5376
5377 static int
5378 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5379                            struct btrfs_key *key, struct extent_buffer *eb,
5380                            int slot)
5381 {
5382         struct device_extent_record *rec;
5383         int ret;
5384
5385         rec = btrfs_new_device_extent_record(eb, key, slot);
5386         ret = insert_device_extent_record(dev_extent_cache, rec);
5387         if (ret) {
5388                 fprintf(stderr,
5389                         "Device extent[%llu, %llu, %llu] existed.\n",
5390                         rec->objectid, rec->offset, rec->length);
5391                 free(rec);
5392         }
5393
5394         return ret;
5395 }
5396
5397 static int process_extent_item(struct btrfs_root *root,
5398                                struct cache_tree *extent_cache,
5399                                struct extent_buffer *eb, int slot)
5400 {
5401         struct btrfs_extent_item *ei;
5402         struct btrfs_extent_inline_ref *iref;
5403         struct btrfs_extent_data_ref *dref;
5404         struct btrfs_shared_data_ref *sref;
5405         struct btrfs_key key;
5406         struct extent_record tmpl;
5407         unsigned long end;
5408         unsigned long ptr;
5409         int type;
5410         u32 item_size = btrfs_item_size_nr(eb, slot);
5411         u64 refs = 0;
5412         u64 offset;
5413         u64 num_bytes;
5414         int metadata = 0;
5415
5416         btrfs_item_key_to_cpu(eb, &key, slot);
5417
5418         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5419                 metadata = 1;
5420                 num_bytes = root->nodesize;
5421         } else {
5422                 num_bytes = key.offset;
5423         }
5424
5425         if (item_size < sizeof(*ei)) {
5426 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5427                 struct btrfs_extent_item_v0 *ei0;
5428                 BUG_ON(item_size != sizeof(*ei0));
5429                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5430                 refs = btrfs_extent_refs_v0(eb, ei0);
5431 #else
5432                 BUG();
5433 #endif
5434                 memset(&tmpl, 0, sizeof(tmpl));
5435                 tmpl.start = key.objectid;
5436                 tmpl.nr = num_bytes;
5437                 tmpl.extent_item_refs = refs;
5438                 tmpl.metadata = metadata;
5439                 tmpl.found_rec = 1;
5440                 tmpl.max_size = num_bytes;
5441
5442                 return add_extent_rec(extent_cache, &tmpl);
5443         }
5444
5445         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5446         refs = btrfs_extent_refs(eb, ei);
5447         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5448                 metadata = 1;
5449         else
5450                 metadata = 0;
5451
5452         memset(&tmpl, 0, sizeof(tmpl));
5453         tmpl.start = key.objectid;
5454         tmpl.nr = num_bytes;
5455         tmpl.extent_item_refs = refs;
5456         tmpl.metadata = metadata;
5457         tmpl.found_rec = 1;
5458         tmpl.max_size = num_bytes;
5459         add_extent_rec(extent_cache, &tmpl);
5460
5461         ptr = (unsigned long)(ei + 1);
5462         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5463             key.type == BTRFS_EXTENT_ITEM_KEY)
5464                 ptr += sizeof(struct btrfs_tree_block_info);
5465
5466         end = (unsigned long)ei + item_size;
5467         while (ptr < end) {
5468                 iref = (struct btrfs_extent_inline_ref *)ptr;
5469                 type = btrfs_extent_inline_ref_type(eb, iref);
5470                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5471                 switch (type) {
5472                 case BTRFS_TREE_BLOCK_REF_KEY:
5473                         add_tree_backref(extent_cache, key.objectid,
5474                                          0, offset, 0);
5475                         break;
5476                 case BTRFS_SHARED_BLOCK_REF_KEY:
5477                         add_tree_backref(extent_cache, key.objectid,
5478                                          offset, 0, 0);
5479                         break;
5480                 case BTRFS_EXTENT_DATA_REF_KEY:
5481                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5482                         add_data_backref(extent_cache, key.objectid, 0,
5483                                         btrfs_extent_data_ref_root(eb, dref),
5484                                         btrfs_extent_data_ref_objectid(eb,
5485                                                                        dref),
5486                                         btrfs_extent_data_ref_offset(eb, dref),
5487                                         btrfs_extent_data_ref_count(eb, dref),
5488                                         0, num_bytes);
5489                         break;
5490                 case BTRFS_SHARED_DATA_REF_KEY:
5491                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5492                         add_data_backref(extent_cache, key.objectid, offset,
5493                                         0, 0, 0,
5494                                         btrfs_shared_data_ref_count(eb, sref),
5495                                         0, num_bytes);
5496                         break;
5497                 default:
5498                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5499                                 key.objectid, key.type, num_bytes);
5500                         goto out;
5501                 }
5502                 ptr += btrfs_extent_inline_ref_size(type);
5503         }
5504         WARN_ON(ptr > end);
5505 out:
5506         return 0;
5507 }
5508
5509 static int check_cache_range(struct btrfs_root *root,
5510                              struct btrfs_block_group_cache *cache,
5511                              u64 offset, u64 bytes)
5512 {
5513         struct btrfs_free_space *entry;
5514         u64 *logical;
5515         u64 bytenr;
5516         int stripe_len;
5517         int i, nr, ret;
5518
5519         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5520                 bytenr = btrfs_sb_offset(i);
5521                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5522                                        cache->key.objectid, bytenr, 0,
5523                                        &logical, &nr, &stripe_len);
5524                 if (ret)
5525                         return ret;
5526
5527                 while (nr--) {
5528                         if (logical[nr] + stripe_len <= offset)
5529                                 continue;
5530                         if (offset + bytes <= logical[nr])
5531                                 continue;
5532                         if (logical[nr] == offset) {
5533                                 if (stripe_len >= bytes) {
5534                                         kfree(logical);
5535                                         return 0;
5536                                 }
5537                                 bytes -= stripe_len;
5538                                 offset += stripe_len;
5539                         } else if (logical[nr] < offset) {
5540                                 if (logical[nr] + stripe_len >=
5541                                     offset + bytes) {
5542                                         kfree(logical);
5543                                         return 0;
5544                                 }
5545                                 bytes = (offset + bytes) -
5546                                         (logical[nr] + stripe_len);
5547                                 offset = logical[nr] + stripe_len;
5548                         } else {
5549                                 /*
5550                                  * Could be tricky, the super may land in the
5551                                  * middle of the area we're checking.  First
5552                                  * check the easiest case, it's at the end.
5553                                  */
5554                                 if (logical[nr] + stripe_len >=
5555                                     bytes + offset) {
5556                                         bytes = logical[nr] - offset;
5557                                         continue;
5558                                 }
5559
5560                                 /* Check the left side */
5561                                 ret = check_cache_range(root, cache,
5562                                                         offset,
5563                                                         logical[nr] - offset);
5564                                 if (ret) {
5565                                         kfree(logical);
5566                                         return ret;
5567                                 }
5568
5569                                 /* Now we continue with the right side */
5570                                 bytes = (offset + bytes) -
5571                                         (logical[nr] + stripe_len);
5572                                 offset = logical[nr] + stripe_len;
5573                         }
5574                 }
5575
5576                 kfree(logical);
5577         }
5578
5579         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5580         if (!entry) {
5581                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5582                         offset, offset+bytes);
5583                 return -EINVAL;
5584         }
5585
5586         if (entry->offset != offset) {
5587                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5588                         entry->offset);
5589                 return -EINVAL;
5590         }
5591
5592         if (entry->bytes != bytes) {
5593                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5594                         bytes, entry->bytes, offset);
5595                 return -EINVAL;
5596         }
5597
5598         unlink_free_space(cache->free_space_ctl, entry);
5599         free(entry);
5600         return 0;
5601 }
5602
5603 static int verify_space_cache(struct btrfs_root *root,
5604                               struct btrfs_block_group_cache *cache)
5605 {
5606         struct btrfs_path *path;
5607         struct extent_buffer *leaf;
5608         struct btrfs_key key;
5609         u64 last;
5610         int ret = 0;
5611
5612         path = btrfs_alloc_path();
5613         if (!path)
5614                 return -ENOMEM;
5615
5616         root = root->fs_info->extent_root;
5617
5618         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5619
5620         key.objectid = last;
5621         key.offset = 0;
5622         key.type = BTRFS_EXTENT_ITEM_KEY;
5623
5624         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5625         if (ret < 0)
5626                 goto out;
5627         ret = 0;
5628         while (1) {
5629                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5630                         ret = btrfs_next_leaf(root, path);
5631                         if (ret < 0)
5632                                 goto out;
5633                         if (ret > 0) {
5634                                 ret = 0;
5635                                 break;
5636                         }
5637                 }
5638                 leaf = path->nodes[0];
5639                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5640                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5641                         break;
5642                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5643                     key.type != BTRFS_METADATA_ITEM_KEY) {
5644                         path->slots[0]++;
5645                         continue;
5646                 }
5647
5648                 if (last == key.objectid) {
5649                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5650                                 last = key.objectid + key.offset;
5651                         else
5652                                 last = key.objectid + root->nodesize;
5653                         path->slots[0]++;
5654                         continue;
5655                 }
5656
5657                 ret = check_cache_range(root, cache, last,
5658                                         key.objectid - last);
5659                 if (ret)
5660                         break;
5661                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5662                         last = key.objectid + key.offset;
5663                 else
5664                         last = key.objectid + root->nodesize;
5665                 path->slots[0]++;
5666         }
5667
5668         if (last < cache->key.objectid + cache->key.offset)
5669                 ret = check_cache_range(root, cache, last,
5670                                         cache->key.objectid +
5671                                         cache->key.offset - last);
5672
5673 out:
5674         btrfs_free_path(path);
5675
5676         if (!ret &&
5677             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5678                 fprintf(stderr, "There are still entries left in the space "
5679                         "cache\n");
5680                 ret = -EINVAL;
5681         }
5682
5683         return ret;
5684 }
5685
5686 static int check_space_cache(struct btrfs_root *root)
5687 {
5688         struct btrfs_block_group_cache *cache;
5689         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5690         int ret;
5691         int error = 0;
5692
5693         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5694             btrfs_super_generation(root->fs_info->super_copy) !=
5695             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5696                 printf("cache and super generation don't match, space cache "
5697                        "will be invalidated\n");
5698                 return 0;
5699         }
5700
5701         if (ctx.progress_enabled) {
5702                 ctx.tp = TASK_FREE_SPACE;
5703                 task_start(ctx.info);
5704         }
5705
5706         while (1) {
5707                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5708                 if (!cache)
5709                         break;
5710
5711                 start = cache->key.objectid + cache->key.offset;
5712                 if (!cache->free_space_ctl) {
5713                         if (btrfs_init_free_space_ctl(cache,
5714                                                       root->sectorsize)) {
5715                                 ret = -ENOMEM;
5716                                 break;
5717                         }
5718                 } else {
5719                         btrfs_remove_free_space_cache(cache);
5720                 }
5721
5722                 if (btrfs_fs_compat_ro(root->fs_info,
5723                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5724                         ret = exclude_super_stripes(root, cache);
5725                         if (ret) {
5726                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5727                                         strerror(-ret));
5728                                 error++;
5729                                 continue;
5730                         }
5731                         ret = load_free_space_tree(root->fs_info, cache);
5732                         free_excluded_extents(root, cache);
5733                         if (ret < 0) {
5734                                 fprintf(stderr, "could not load free space tree: %s\n",
5735                                         strerror(-ret));
5736                                 error++;
5737                                 continue;
5738                         }
5739                         error += ret;
5740                 } else {
5741                         ret = load_free_space_cache(root->fs_info, cache);
5742                         if (!ret)
5743                                 continue;
5744                 }
5745
5746                 ret = verify_space_cache(root, cache);
5747                 if (ret) {
5748                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5749                                 cache->key.objectid);
5750                         error++;
5751                 }
5752         }
5753
5754         task_stop(ctx.info);
5755
5756         return error ? -EINVAL : 0;
5757 }
5758
5759 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5760                         u64 num_bytes, unsigned long leaf_offset,
5761                         struct extent_buffer *eb) {
5762
5763         u64 offset = 0;
5764         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5765         char *data;
5766         unsigned long csum_offset;
5767         u32 csum;
5768         u32 csum_expected;
5769         u64 read_len;
5770         u64 data_checked = 0;
5771         u64 tmp;
5772         int ret = 0;
5773         int mirror;
5774         int num_copies;
5775
5776         if (num_bytes % root->sectorsize)
5777                 return -EINVAL;
5778
5779         data = malloc(num_bytes);
5780         if (!data)
5781                 return -ENOMEM;
5782
5783         while (offset < num_bytes) {
5784                 mirror = 0;
5785 again:
5786                 read_len = num_bytes - offset;
5787                 /* read as much space once a time */
5788                 ret = read_extent_data(root, data + offset,
5789                                 bytenr + offset, &read_len, mirror);
5790                 if (ret)
5791                         goto out;
5792                 data_checked = 0;
5793                 /* verify every 4k data's checksum */
5794                 while (data_checked < read_len) {
5795                         csum = ~(u32)0;
5796                         tmp = offset + data_checked;
5797
5798                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5799                                                csum, root->sectorsize);
5800                         btrfs_csum_final(csum, (char *)&csum);
5801
5802                         csum_offset = leaf_offset +
5803                                  tmp / root->sectorsize * csum_size;
5804                         read_extent_buffer(eb, (char *)&csum_expected,
5805                                            csum_offset, csum_size);
5806                         /* try another mirror */
5807                         if (csum != csum_expected) {
5808                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5809                                                 mirror, bytenr + tmp,
5810                                                 csum, csum_expected);
5811                                 num_copies = btrfs_num_copies(
5812                                                 &root->fs_info->mapping_tree,
5813                                                 bytenr, num_bytes);
5814                                 if (mirror < num_copies - 1) {
5815                                         mirror += 1;
5816                                         goto again;
5817                                 }
5818                         }
5819                         data_checked += root->sectorsize;
5820                 }
5821                 offset += read_len;
5822         }
5823 out:
5824         free(data);
5825         return ret;
5826 }
5827
5828 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5829                                u64 num_bytes)
5830 {
5831         struct btrfs_path *path;
5832         struct extent_buffer *leaf;
5833         struct btrfs_key key;
5834         int ret;
5835
5836         path = btrfs_alloc_path();
5837         if (!path) {
5838                 fprintf(stderr, "Error allocating path\n");
5839                 return -ENOMEM;
5840         }
5841
5842         key.objectid = bytenr;
5843         key.type = BTRFS_EXTENT_ITEM_KEY;
5844         key.offset = (u64)-1;
5845
5846 again:
5847         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5848                                 0, 0);
5849         if (ret < 0) {
5850                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5851                 btrfs_free_path(path);
5852                 return ret;
5853         } else if (ret) {
5854                 if (path->slots[0] > 0) {
5855                         path->slots[0]--;
5856                 } else {
5857                         ret = btrfs_prev_leaf(root, path);
5858                         if (ret < 0) {
5859                                 goto out;
5860                         } else if (ret > 0) {
5861                                 ret = 0;
5862                                 goto out;
5863                         }
5864                 }
5865         }
5866
5867         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5868
5869         /*
5870          * Block group items come before extent items if they have the same
5871          * bytenr, so walk back one more just in case.  Dear future traveller,
5872          * first congrats on mastering time travel.  Now if it's not too much
5873          * trouble could you go back to 2006 and tell Chris to make the
5874          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5875          * EXTENT_ITEM_KEY please?
5876          */
5877         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5878                 if (path->slots[0] > 0) {
5879                         path->slots[0]--;
5880                 } else {
5881                         ret = btrfs_prev_leaf(root, path);
5882                         if (ret < 0) {
5883                                 goto out;
5884                         } else if (ret > 0) {
5885                                 ret = 0;
5886                                 goto out;
5887                         }
5888                 }
5889                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5890         }
5891
5892         while (num_bytes) {
5893                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5894                         ret = btrfs_next_leaf(root, path);
5895                         if (ret < 0) {
5896                                 fprintf(stderr, "Error going to next leaf "
5897                                         "%d\n", ret);
5898                                 btrfs_free_path(path);
5899                                 return ret;
5900                         } else if (ret) {
5901                                 break;
5902                         }
5903                 }
5904                 leaf = path->nodes[0];
5905                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5906                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5907                         path->slots[0]++;
5908                         continue;
5909                 }
5910                 if (key.objectid + key.offset < bytenr) {
5911                         path->slots[0]++;
5912                         continue;
5913                 }
5914                 if (key.objectid > bytenr + num_bytes)
5915                         break;
5916
5917                 if (key.objectid == bytenr) {
5918                         if (key.offset >= num_bytes) {
5919                                 num_bytes = 0;
5920                                 break;
5921                         }
5922                         num_bytes -= key.offset;
5923                         bytenr += key.offset;
5924                 } else if (key.objectid < bytenr) {
5925                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5926                                 num_bytes = 0;
5927                                 break;
5928                         }
5929                         num_bytes = (bytenr + num_bytes) -
5930                                 (key.objectid + key.offset);
5931                         bytenr = key.objectid + key.offset;
5932                 } else {
5933                         if (key.objectid + key.offset < bytenr + num_bytes) {
5934                                 u64 new_start = key.objectid + key.offset;
5935                                 u64 new_bytes = bytenr + num_bytes - new_start;
5936
5937                                 /*
5938                                  * Weird case, the extent is in the middle of
5939                                  * our range, we'll have to search one side
5940                                  * and then the other.  Not sure if this happens
5941                                  * in real life, but no harm in coding it up
5942                                  * anyway just in case.
5943                                  */
5944                                 btrfs_release_path(path);
5945                                 ret = check_extent_exists(root, new_start,
5946                                                           new_bytes);
5947                                 if (ret) {
5948                                         fprintf(stderr, "Right section didn't "
5949                                                 "have a record\n");
5950                                         break;
5951                                 }
5952                                 num_bytes = key.objectid - bytenr;
5953                                 goto again;
5954                         }
5955                         num_bytes = key.objectid - bytenr;
5956                 }
5957                 path->slots[0]++;
5958         }
5959         ret = 0;
5960
5961 out:
5962         if (num_bytes && !ret) {
5963                 fprintf(stderr, "There are no extents for csum range "
5964                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5965                 ret = 1;
5966         }
5967
5968         btrfs_free_path(path);
5969         return ret;
5970 }
5971
5972 static int check_csums(struct btrfs_root *root)
5973 {
5974         struct btrfs_path *path;
5975         struct extent_buffer *leaf;
5976         struct btrfs_key key;
5977         u64 offset = 0, num_bytes = 0;
5978         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5979         int errors = 0;
5980         int ret;
5981         u64 data_len;
5982         unsigned long leaf_offset;
5983
5984         root = root->fs_info->csum_root;
5985         if (!extent_buffer_uptodate(root->node)) {
5986                 fprintf(stderr, "No valid csum tree found\n");
5987                 return -ENOENT;
5988         }
5989
5990         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5991         key.type = BTRFS_EXTENT_CSUM_KEY;
5992         key.offset = 0;
5993
5994         path = btrfs_alloc_path();
5995         if (!path)
5996                 return -ENOMEM;
5997
5998         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5999         if (ret < 0) {
6000                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6001                 btrfs_free_path(path);
6002                 return ret;
6003         }
6004
6005         if (ret > 0 && path->slots[0])
6006                 path->slots[0]--;
6007         ret = 0;
6008
6009         while (1) {
6010                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6011                         ret = btrfs_next_leaf(root, path);
6012                         if (ret < 0) {
6013                                 fprintf(stderr, "Error going to next leaf "
6014                                         "%d\n", ret);
6015                                 break;
6016                         }
6017                         if (ret)
6018                                 break;
6019                 }
6020                 leaf = path->nodes[0];
6021
6022                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6023                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6024                         path->slots[0]++;
6025                         continue;
6026                 }
6027
6028                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6029                               csum_size) * root->sectorsize;
6030                 if (!check_data_csum)
6031                         goto skip_csum_check;
6032                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6033                 ret = check_extent_csums(root, key.offset, data_len,
6034                                          leaf_offset, leaf);
6035                 if (ret)
6036                         break;
6037 skip_csum_check:
6038                 if (!num_bytes) {
6039                         offset = key.offset;
6040                 } else if (key.offset != offset + num_bytes) {
6041                         ret = check_extent_exists(root, offset, num_bytes);
6042                         if (ret) {
6043                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6044                                         "there is no extent record\n",
6045                                         offset, offset+num_bytes);
6046                                 errors++;
6047                         }
6048                         offset = key.offset;
6049                         num_bytes = 0;
6050                 }
6051                 num_bytes += data_len;
6052                 path->slots[0]++;
6053         }
6054
6055         btrfs_free_path(path);
6056         return errors;
6057 }
6058
6059 static int is_dropped_key(struct btrfs_key *key,
6060                           struct btrfs_key *drop_key) {
6061         if (key->objectid < drop_key->objectid)
6062                 return 1;
6063         else if (key->objectid == drop_key->objectid) {
6064                 if (key->type < drop_key->type)
6065                         return 1;
6066                 else if (key->type == drop_key->type) {
6067                         if (key->offset < drop_key->offset)
6068                                 return 1;
6069                 }
6070         }
6071         return 0;
6072 }
6073
6074 /*
6075  * Here are the rules for FULL_BACKREF.
6076  *
6077  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6078  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6079  *      FULL_BACKREF set.
6080  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6081  *    if it happened after the relocation occurred since we'll have dropped the
6082  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6083  *    have no real way to know for sure.
6084  *
6085  * We process the blocks one root at a time, and we start from the lowest root
6086  * objectid and go to the highest.  So we can just lookup the owner backref for
6087  * the record and if we don't find it then we know it doesn't exist and we have
6088  * a FULL BACKREF.
6089  *
6090  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6091  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6092  * be set or not and then we can check later once we've gathered all the refs.
6093  */
6094 static int calc_extent_flag(struct btrfs_root *root,
6095                            struct cache_tree *extent_cache,
6096                            struct extent_buffer *buf,
6097                            struct root_item_record *ri,
6098                            u64 *flags)
6099 {
6100         struct extent_record *rec;
6101         struct cache_extent *cache;
6102         struct tree_backref *tback;
6103         u64 owner = 0;
6104
6105         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6106         /* we have added this extent before */
6107         BUG_ON(!cache);
6108         rec = container_of(cache, struct extent_record, cache);
6109
6110         /*
6111          * Except file/reloc tree, we can not have
6112          * FULL BACKREF MODE
6113          */
6114         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6115                 goto normal;
6116         /*
6117          * root node
6118          */
6119         if (buf->start == ri->bytenr)
6120                 goto normal;
6121
6122         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6123                 goto full_backref;
6124
6125         owner = btrfs_header_owner(buf);
6126         if (owner == ri->objectid)
6127                 goto normal;
6128
6129         tback = find_tree_backref(rec, 0, owner);
6130         if (!tback)
6131                 goto full_backref;
6132 normal:
6133         *flags = 0;
6134         if (rec->flag_block_full_backref != FLAG_UNSET &&
6135             rec->flag_block_full_backref != 0)
6136                 rec->bad_full_backref = 1;
6137         return 0;
6138 full_backref:
6139         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6140         if (rec->flag_block_full_backref != FLAG_UNSET &&
6141             rec->flag_block_full_backref != 1)
6142                 rec->bad_full_backref = 1;
6143         return 0;
6144 }
6145
6146 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6147 {
6148         fprintf(stderr, "Invalid key type(");
6149         print_key_type(stderr, 0, key_type);
6150         fprintf(stderr, ") found in root(");
6151         print_objectid(stderr, rootid, 0);
6152         fprintf(stderr, ")\n");
6153 }
6154
6155 /*
6156  * Check if the key is valid with its extent buffer.
6157  *
6158  * This is a early check in case invalid key exists in a extent buffer
6159  * This is not comprehensive yet, but should prevent wrong key/item passed
6160  * further
6161  */
6162 static int check_type_with_root(u64 rootid, u8 key_type)
6163 {
6164         switch (key_type) {
6165         /* Only valid in chunk tree */
6166         case BTRFS_DEV_ITEM_KEY:
6167         case BTRFS_CHUNK_ITEM_KEY:
6168                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6169                         goto err;
6170                 break;
6171         /* valid in csum and log tree */
6172         case BTRFS_CSUM_TREE_OBJECTID:
6173                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6174                       is_fstree(rootid)))
6175                         goto err;
6176                 break;
6177         case BTRFS_EXTENT_ITEM_KEY:
6178         case BTRFS_METADATA_ITEM_KEY:
6179         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6180                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6181                         goto err;
6182                 break;
6183         case BTRFS_ROOT_ITEM_KEY:
6184                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6185                         goto err;
6186                 break;
6187         case BTRFS_DEV_EXTENT_KEY:
6188                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6189                         goto err;
6190                 break;
6191         }
6192         return 0;
6193 err:
6194         report_mismatch_key_root(key_type, rootid);
6195         return -EINVAL;
6196 }
6197
6198 static int run_next_block(struct btrfs_root *root,
6199                           struct block_info *bits,
6200                           int bits_nr,
6201                           u64 *last,
6202                           struct cache_tree *pending,
6203                           struct cache_tree *seen,
6204                           struct cache_tree *reada,
6205                           struct cache_tree *nodes,
6206                           struct cache_tree *extent_cache,
6207                           struct cache_tree *chunk_cache,
6208                           struct rb_root *dev_cache,
6209                           struct block_group_tree *block_group_cache,
6210                           struct device_extent_tree *dev_extent_cache,
6211                           struct root_item_record *ri)
6212 {
6213         struct extent_buffer *buf;
6214         struct extent_record *rec = NULL;
6215         u64 bytenr;
6216         u32 size;
6217         u64 parent;
6218         u64 owner;
6219         u64 flags;
6220         u64 ptr;
6221         u64 gen = 0;
6222         int ret = 0;
6223         int i;
6224         int nritems;
6225         struct btrfs_key key;
6226         struct cache_extent *cache;
6227         int reada_bits;
6228
6229         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6230                                     bits_nr, &reada_bits);
6231         if (nritems == 0)
6232                 return 1;
6233
6234         if (!reada_bits) {
6235                 for(i = 0; i < nritems; i++) {
6236                         ret = add_cache_extent(reada, bits[i].start,
6237                                                bits[i].size);
6238                         if (ret == -EEXIST)
6239                                 continue;
6240
6241                         /* fixme, get the parent transid */
6242                         readahead_tree_block(root, bits[i].start,
6243                                              bits[i].size, 0);
6244                 }
6245         }
6246         *last = bits[0].start;
6247         bytenr = bits[0].start;
6248         size = bits[0].size;
6249
6250         cache = lookup_cache_extent(pending, bytenr, size);
6251         if (cache) {
6252                 remove_cache_extent(pending, cache);
6253                 free(cache);
6254         }
6255         cache = lookup_cache_extent(reada, bytenr, size);
6256         if (cache) {
6257                 remove_cache_extent(reada, cache);
6258                 free(cache);
6259         }
6260         cache = lookup_cache_extent(nodes, bytenr, size);
6261         if (cache) {
6262                 remove_cache_extent(nodes, cache);
6263                 free(cache);
6264         }
6265         cache = lookup_cache_extent(extent_cache, bytenr, size);
6266         if (cache) {
6267                 rec = container_of(cache, struct extent_record, cache);
6268                 gen = rec->parent_generation;
6269         }
6270
6271         /* fixme, get the real parent transid */
6272         buf = read_tree_block(root, bytenr, size, gen);
6273         if (!extent_buffer_uptodate(buf)) {
6274                 record_bad_block_io(root->fs_info,
6275                                     extent_cache, bytenr, size);
6276                 goto out;
6277         }
6278
6279         nritems = btrfs_header_nritems(buf);
6280
6281         flags = 0;
6282         if (!init_extent_tree) {
6283                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6284                                        btrfs_header_level(buf), 1, NULL,
6285                                        &flags);
6286                 if (ret < 0) {
6287                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6288                         if (ret < 0) {
6289                                 fprintf(stderr, "Couldn't calc extent flags\n");
6290                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6291                         }
6292                 }
6293         } else {
6294                 flags = 0;
6295                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6296                 if (ret < 0) {
6297                         fprintf(stderr, "Couldn't calc extent flags\n");
6298                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6299                 }
6300         }
6301
6302         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6303                 if (ri != NULL &&
6304                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6305                     ri->objectid == btrfs_header_owner(buf)) {
6306                         /*
6307                          * Ok we got to this block from it's original owner and
6308                          * we have FULL_BACKREF set.  Relocation can leave
6309                          * converted blocks over so this is altogether possible,
6310                          * however it's not possible if the generation > the
6311                          * last snapshot, so check for this case.
6312                          */
6313                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6314                             btrfs_header_generation(buf) > ri->last_snapshot) {
6315                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6316                                 rec->bad_full_backref = 1;
6317                         }
6318                 }
6319         } else {
6320                 if (ri != NULL &&
6321                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6322                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6323                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6324                         rec->bad_full_backref = 1;
6325                 }
6326         }
6327
6328         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6329                 rec->flag_block_full_backref = 1;
6330                 parent = bytenr;
6331                 owner = 0;
6332         } else {
6333                 rec->flag_block_full_backref = 0;
6334                 parent = 0;
6335                 owner = btrfs_header_owner(buf);
6336         }
6337
6338         ret = check_block(root, extent_cache, buf, flags);
6339         if (ret)
6340                 goto out;
6341
6342         if (btrfs_is_leaf(buf)) {
6343                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6344                 for (i = 0; i < nritems; i++) {
6345                         struct btrfs_file_extent_item *fi;
6346                         btrfs_item_key_to_cpu(buf, &key, i);
6347                         /*
6348                          * Check key type against the leaf owner.
6349                          * Could filter quite a lot of early error if
6350                          * owner is correct
6351                          */
6352                         if (check_type_with_root(btrfs_header_owner(buf),
6353                                                  key.type)) {
6354                                 fprintf(stderr, "ignoring invalid key\n");
6355                                 continue;
6356                         }
6357                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6358                                 process_extent_item(root, extent_cache, buf,
6359                                                     i);
6360                                 continue;
6361                         }
6362                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6363                                 process_extent_item(root, extent_cache, buf,
6364                                                     i);
6365                                 continue;
6366                         }
6367                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6368                                 total_csum_bytes +=
6369                                         btrfs_item_size_nr(buf, i);
6370                                 continue;
6371                         }
6372                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6373                                 process_chunk_item(chunk_cache, &key, buf, i);
6374                                 continue;
6375                         }
6376                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6377                                 process_device_item(dev_cache, &key, buf, i);
6378                                 continue;
6379                         }
6380                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6381                                 process_block_group_item(block_group_cache,
6382                                         &key, buf, i);
6383                                 continue;
6384                         }
6385                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6386                                 process_device_extent_item(dev_extent_cache,
6387                                         &key, buf, i);
6388                                 continue;
6389
6390                         }
6391                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6392 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6393                                 process_extent_ref_v0(extent_cache, buf, i);
6394 #else
6395                                 BUG();
6396 #endif
6397                                 continue;
6398                         }
6399
6400                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6401                                 add_tree_backref(extent_cache, key.objectid, 0,
6402                                                  key.offset, 0);
6403                                 continue;
6404                         }
6405                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6406                                 add_tree_backref(extent_cache, key.objectid,
6407                                                  key.offset, 0, 0);
6408                                 continue;
6409                         }
6410                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6411                                 struct btrfs_extent_data_ref *ref;
6412                                 ref = btrfs_item_ptr(buf, i,
6413                                                 struct btrfs_extent_data_ref);
6414                                 add_data_backref(extent_cache,
6415                                         key.objectid, 0,
6416                                         btrfs_extent_data_ref_root(buf, ref),
6417                                         btrfs_extent_data_ref_objectid(buf,
6418                                                                        ref),
6419                                         btrfs_extent_data_ref_offset(buf, ref),
6420                                         btrfs_extent_data_ref_count(buf, ref),
6421                                         0, root->sectorsize);
6422                                 continue;
6423                         }
6424                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6425                                 struct btrfs_shared_data_ref *ref;
6426                                 ref = btrfs_item_ptr(buf, i,
6427                                                 struct btrfs_shared_data_ref);
6428                                 add_data_backref(extent_cache,
6429                                         key.objectid, key.offset, 0, 0, 0,
6430                                         btrfs_shared_data_ref_count(buf, ref),
6431                                         0, root->sectorsize);
6432                                 continue;
6433                         }
6434                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6435                                 struct bad_item *bad;
6436
6437                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6438                                         continue;
6439                                 if (!owner)
6440                                         continue;
6441                                 bad = malloc(sizeof(struct bad_item));
6442                                 if (!bad)
6443                                         continue;
6444                                 INIT_LIST_HEAD(&bad->list);
6445                                 memcpy(&bad->key, &key,
6446                                        sizeof(struct btrfs_key));
6447                                 bad->root_id = owner;
6448                                 list_add_tail(&bad->list, &delete_items);
6449                                 continue;
6450                         }
6451                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6452                                 continue;
6453                         fi = btrfs_item_ptr(buf, i,
6454                                             struct btrfs_file_extent_item);
6455                         if (btrfs_file_extent_type(buf, fi) ==
6456                             BTRFS_FILE_EXTENT_INLINE)
6457                                 continue;
6458                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6459                                 continue;
6460
6461                         data_bytes_allocated +=
6462                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6463                         if (data_bytes_allocated < root->sectorsize) {
6464                                 abort();
6465                         }
6466                         data_bytes_referenced +=
6467                                 btrfs_file_extent_num_bytes(buf, fi);
6468                         add_data_backref(extent_cache,
6469                                 btrfs_file_extent_disk_bytenr(buf, fi),
6470                                 parent, owner, key.objectid, key.offset -
6471                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6472                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6473                 }
6474         } else {
6475                 int level;
6476                 struct btrfs_key first_key;
6477
6478                 first_key.objectid = 0;
6479
6480                 if (nritems > 0)
6481                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6482                 level = btrfs_header_level(buf);
6483                 for (i = 0; i < nritems; i++) {
6484                         struct extent_record tmpl;
6485
6486                         ptr = btrfs_node_blockptr(buf, i);
6487                         size = root->nodesize;
6488                         btrfs_node_key_to_cpu(buf, &key, i);
6489                         if (ri != NULL) {
6490                                 if ((level == ri->drop_level)
6491                                     && is_dropped_key(&key, &ri->drop_key)) {
6492                                         continue;
6493                                 }
6494                         }
6495
6496                         memset(&tmpl, 0, sizeof(tmpl));
6497                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6498                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6499                         tmpl.start = ptr;
6500                         tmpl.nr = size;
6501                         tmpl.refs = 1;
6502                         tmpl.metadata = 1;
6503                         tmpl.max_size = size;
6504                         ret = add_extent_rec(extent_cache, &tmpl);
6505                         BUG_ON(ret);
6506
6507                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6508
6509                         if (level > 1) {
6510                                 add_pending(nodes, seen, ptr, size);
6511                         } else {
6512                                 add_pending(pending, seen, ptr, size);
6513                         }
6514                 }
6515                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6516                                       nritems) * sizeof(struct btrfs_key_ptr);
6517         }
6518         total_btree_bytes += buf->len;
6519         if (fs_root_objectid(btrfs_header_owner(buf)))
6520                 total_fs_tree_bytes += buf->len;
6521         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6522                 total_extent_tree_bytes += buf->len;
6523         if (!found_old_backref &&
6524             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6525             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6526             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6527                 found_old_backref = 1;
6528 out:
6529         free_extent_buffer(buf);
6530         return ret;
6531 }
6532
6533 static int add_root_to_pending(struct extent_buffer *buf,
6534                                struct cache_tree *extent_cache,
6535                                struct cache_tree *pending,
6536                                struct cache_tree *seen,
6537                                struct cache_tree *nodes,
6538                                u64 objectid)
6539 {
6540         struct extent_record tmpl;
6541
6542         if (btrfs_header_level(buf) > 0)
6543                 add_pending(nodes, seen, buf->start, buf->len);
6544         else
6545                 add_pending(pending, seen, buf->start, buf->len);
6546
6547         memset(&tmpl, 0, sizeof(tmpl));
6548         tmpl.start = buf->start;
6549         tmpl.nr = buf->len;
6550         tmpl.is_root = 1;
6551         tmpl.refs = 1;
6552         tmpl.metadata = 1;
6553         tmpl.max_size = buf->len;
6554         add_extent_rec(extent_cache, &tmpl);
6555
6556         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6557             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6558                 add_tree_backref(extent_cache, buf->start, buf->start,
6559                                  0, 1);
6560         else
6561                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6562         return 0;
6563 }
6564
6565 /* as we fix the tree, we might be deleting blocks that
6566  * we're tracking for repair.  This hook makes sure we
6567  * remove any backrefs for blocks as we are fixing them.
6568  */
6569 static int free_extent_hook(struct btrfs_trans_handle *trans,
6570                             struct btrfs_root *root,
6571                             u64 bytenr, u64 num_bytes, u64 parent,
6572                             u64 root_objectid, u64 owner, u64 offset,
6573                             int refs_to_drop)
6574 {
6575         struct extent_record *rec;
6576         struct cache_extent *cache;
6577         int is_data;
6578         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6579
6580         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6581         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6582         if (!cache)
6583                 return 0;
6584
6585         rec = container_of(cache, struct extent_record, cache);
6586         if (is_data) {
6587                 struct data_backref *back;
6588                 back = find_data_backref(rec, parent, root_objectid, owner,
6589                                          offset, 1, bytenr, num_bytes);
6590                 if (!back)
6591                         goto out;
6592                 if (back->node.found_ref) {
6593                         back->found_ref -= refs_to_drop;
6594                         if (rec->refs)
6595                                 rec->refs -= refs_to_drop;
6596                 }
6597                 if (back->node.found_extent_tree) {
6598                         back->num_refs -= refs_to_drop;
6599                         if (rec->extent_item_refs)
6600                                 rec->extent_item_refs -= refs_to_drop;
6601                 }
6602                 if (back->found_ref == 0)
6603                         back->node.found_ref = 0;
6604                 if (back->num_refs == 0)
6605                         back->node.found_extent_tree = 0;
6606
6607                 if (!back->node.found_extent_tree && back->node.found_ref) {
6608                         rb_erase(&back->node.node, &rec->backref_tree);
6609                         free(back);
6610                 }
6611         } else {
6612                 struct tree_backref *back;
6613                 back = find_tree_backref(rec, parent, root_objectid);
6614                 if (!back)
6615                         goto out;
6616                 if (back->node.found_ref) {
6617                         if (rec->refs)
6618                                 rec->refs--;
6619                         back->node.found_ref = 0;
6620                 }
6621                 if (back->node.found_extent_tree) {
6622                         if (rec->extent_item_refs)
6623                                 rec->extent_item_refs--;
6624                         back->node.found_extent_tree = 0;
6625                 }
6626                 if (!back->node.found_extent_tree && back->node.found_ref) {
6627                         rb_erase(&back->node.node, &rec->backref_tree);
6628                         free(back);
6629                 }
6630         }
6631         maybe_free_extent_rec(extent_cache, rec);
6632 out:
6633         return 0;
6634 }
6635
6636 static int delete_extent_records(struct btrfs_trans_handle *trans,
6637                                  struct btrfs_root *root,
6638                                  struct btrfs_path *path,
6639                                  u64 bytenr, u64 new_len)
6640 {
6641         struct btrfs_key key;
6642         struct btrfs_key found_key;
6643         struct extent_buffer *leaf;
6644         int ret;
6645         int slot;
6646
6647
6648         key.objectid = bytenr;
6649         key.type = (u8)-1;
6650         key.offset = (u64)-1;
6651
6652         while(1) {
6653                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6654                                         &key, path, 0, 1);
6655                 if (ret < 0)
6656                         break;
6657
6658                 if (ret > 0) {
6659                         ret = 0;
6660                         if (path->slots[0] == 0)
6661                                 break;
6662                         path->slots[0]--;
6663                 }
6664                 ret = 0;
6665
6666                 leaf = path->nodes[0];
6667                 slot = path->slots[0];
6668
6669                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6670                 if (found_key.objectid != bytenr)
6671                         break;
6672
6673                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6674                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6675                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6676                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6677                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6678                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6679                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6680                         btrfs_release_path(path);
6681                         if (found_key.type == 0) {
6682                                 if (found_key.offset == 0)
6683                                         break;
6684                                 key.offset = found_key.offset - 1;
6685                                 key.type = found_key.type;
6686                         }
6687                         key.type = found_key.type - 1;
6688                         key.offset = (u64)-1;
6689                         continue;
6690                 }
6691
6692                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6693                         found_key.objectid, found_key.type, found_key.offset);
6694
6695                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6696                 if (ret)
6697                         break;
6698                 btrfs_release_path(path);
6699
6700                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6701                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6702                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6703                                 found_key.offset : root->nodesize;
6704
6705                         ret = btrfs_update_block_group(trans, root, bytenr,
6706                                                        bytes, 0, 0);
6707                         if (ret)
6708                                 break;
6709                 }
6710         }
6711
6712         btrfs_release_path(path);
6713         return ret;
6714 }
6715
6716 /*
6717  * for a single backref, this will allocate a new extent
6718  * and add the backref to it.
6719  */
6720 static int record_extent(struct btrfs_trans_handle *trans,
6721                          struct btrfs_fs_info *info,
6722                          struct btrfs_path *path,
6723                          struct extent_record *rec,
6724                          struct extent_backref *back,
6725                          int allocated, u64 flags)
6726 {
6727         int ret;
6728         struct btrfs_root *extent_root = info->extent_root;
6729         struct extent_buffer *leaf;
6730         struct btrfs_key ins_key;
6731         struct btrfs_extent_item *ei;
6732         struct tree_backref *tback;
6733         struct data_backref *dback;
6734         struct btrfs_tree_block_info *bi;
6735
6736         if (!back->is_data)
6737                 rec->max_size = max_t(u64, rec->max_size,
6738                                     info->extent_root->nodesize);
6739
6740         if (!allocated) {
6741                 u32 item_size = sizeof(*ei);
6742
6743                 if (!back->is_data)
6744                         item_size += sizeof(*bi);
6745
6746                 ins_key.objectid = rec->start;
6747                 ins_key.offset = rec->max_size;
6748                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6749
6750                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6751                                         &ins_key, item_size);
6752                 if (ret)
6753                         goto fail;
6754
6755                 leaf = path->nodes[0];
6756                 ei = btrfs_item_ptr(leaf, path->slots[0],
6757                                     struct btrfs_extent_item);
6758
6759                 btrfs_set_extent_refs(leaf, ei, 0);
6760                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6761
6762                 if (back->is_data) {
6763                         btrfs_set_extent_flags(leaf, ei,
6764                                                BTRFS_EXTENT_FLAG_DATA);
6765                 } else {
6766                         struct btrfs_disk_key copy_key;;
6767
6768                         tback = to_tree_backref(back);
6769                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6770                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6771                                              sizeof(*bi));
6772
6773                         btrfs_set_disk_key_objectid(&copy_key,
6774                                                     rec->info_objectid);
6775                         btrfs_set_disk_key_type(&copy_key, 0);
6776                         btrfs_set_disk_key_offset(&copy_key, 0);
6777
6778                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6779                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6780
6781                         btrfs_set_extent_flags(leaf, ei,
6782                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6783                 }
6784
6785                 btrfs_mark_buffer_dirty(leaf);
6786                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6787                                                rec->max_size, 1, 0);
6788                 if (ret)
6789                         goto fail;
6790                 btrfs_release_path(path);
6791         }
6792
6793         if (back->is_data) {
6794                 u64 parent;
6795                 int i;
6796
6797                 dback = to_data_backref(back);
6798                 if (back->full_backref)
6799                         parent = dback->parent;
6800                 else
6801                         parent = 0;
6802
6803                 for (i = 0; i < dback->found_ref; i++) {
6804                         /* if parent != 0, we're doing a full backref
6805                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6806                          * just makes the backref allocator create a data
6807                          * backref
6808                          */
6809                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6810                                                    rec->start, rec->max_size,
6811                                                    parent,
6812                                                    dback->root,
6813                                                    parent ?
6814                                                    BTRFS_FIRST_FREE_OBJECTID :
6815                                                    dback->owner,
6816                                                    dback->offset);
6817                         if (ret)
6818                                 break;
6819                 }
6820                 fprintf(stderr, "adding new data backref"
6821                                 " on %llu %s %llu owner %llu"
6822                                 " offset %llu found %d\n",
6823                                 (unsigned long long)rec->start,
6824                                 back->full_backref ?
6825                                 "parent" : "root",
6826                                 back->full_backref ?
6827                                 (unsigned long long)parent :
6828                                 (unsigned long long)dback->root,
6829                                 (unsigned long long)dback->owner,
6830                                 (unsigned long long)dback->offset,
6831                                 dback->found_ref);
6832         } else {
6833                 u64 parent;
6834
6835                 tback = to_tree_backref(back);
6836                 if (back->full_backref)
6837                         parent = tback->parent;
6838                 else
6839                         parent = 0;
6840
6841                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6842                                            rec->start, rec->max_size,
6843                                            parent, tback->root, 0, 0);
6844                 fprintf(stderr, "adding new tree backref on "
6845                         "start %llu len %llu parent %llu root %llu\n",
6846                         rec->start, rec->max_size, parent, tback->root);
6847         }
6848 fail:
6849         btrfs_release_path(path);
6850         return ret;
6851 }
6852
6853 static struct extent_entry *find_entry(struct list_head *entries,
6854                                        u64 bytenr, u64 bytes)
6855 {
6856         struct extent_entry *entry = NULL;
6857
6858         list_for_each_entry(entry, entries, list) {
6859                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6860                         return entry;
6861         }
6862
6863         return NULL;
6864 }
6865
6866 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6867 {
6868         struct extent_entry *entry, *best = NULL, *prev = NULL;
6869
6870         list_for_each_entry(entry, entries, list) {
6871                 if (!prev) {
6872                         prev = entry;
6873                         continue;
6874                 }
6875
6876                 /*
6877                  * If there are as many broken entries as entries then we know
6878                  * not to trust this particular entry.
6879                  */
6880                 if (entry->broken == entry->count)
6881                         continue;
6882
6883                 /*
6884                  * If our current entry == best then we can't be sure our best
6885                  * is really the best, so we need to keep searching.
6886                  */
6887                 if (best && best->count == entry->count) {
6888                         prev = entry;
6889                         best = NULL;
6890                         continue;
6891                 }
6892
6893                 /* Prev == entry, not good enough, have to keep searching */
6894                 if (!prev->broken && prev->count == entry->count)
6895                         continue;
6896
6897                 if (!best)
6898                         best = (prev->count > entry->count) ? prev : entry;
6899                 else if (best->count < entry->count)
6900                         best = entry;
6901                 prev = entry;
6902         }
6903
6904         return best;
6905 }
6906
6907 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6908                       struct data_backref *dback, struct extent_entry *entry)
6909 {
6910         struct btrfs_trans_handle *trans;
6911         struct btrfs_root *root;
6912         struct btrfs_file_extent_item *fi;
6913         struct extent_buffer *leaf;
6914         struct btrfs_key key;
6915         u64 bytenr, bytes;
6916         int ret, err;
6917
6918         key.objectid = dback->root;
6919         key.type = BTRFS_ROOT_ITEM_KEY;
6920         key.offset = (u64)-1;
6921         root = btrfs_read_fs_root(info, &key);
6922         if (IS_ERR(root)) {
6923                 fprintf(stderr, "Couldn't find root for our ref\n");
6924                 return -EINVAL;
6925         }
6926
6927         /*
6928          * The backref points to the original offset of the extent if it was
6929          * split, so we need to search down to the offset we have and then walk
6930          * forward until we find the backref we're looking for.
6931          */
6932         key.objectid = dback->owner;
6933         key.type = BTRFS_EXTENT_DATA_KEY;
6934         key.offset = dback->offset;
6935         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6936         if (ret < 0) {
6937                 fprintf(stderr, "Error looking up ref %d\n", ret);
6938                 return ret;
6939         }
6940
6941         while (1) {
6942                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6943                         ret = btrfs_next_leaf(root, path);
6944                         if (ret) {
6945                                 fprintf(stderr, "Couldn't find our ref, next\n");
6946                                 return -EINVAL;
6947                         }
6948                 }
6949                 leaf = path->nodes[0];
6950                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6951                 if (key.objectid != dback->owner ||
6952                     key.type != BTRFS_EXTENT_DATA_KEY) {
6953                         fprintf(stderr, "Couldn't find our ref, search\n");
6954                         return -EINVAL;
6955                 }
6956                 fi = btrfs_item_ptr(leaf, path->slots[0],
6957                                     struct btrfs_file_extent_item);
6958                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6959                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6960
6961                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6962                         break;
6963                 path->slots[0]++;
6964         }
6965
6966         btrfs_release_path(path);
6967
6968         trans = btrfs_start_transaction(root, 1);
6969         if (IS_ERR(trans))
6970                 return PTR_ERR(trans);
6971
6972         /*
6973          * Ok we have the key of the file extent we want to fix, now we can cow
6974          * down to the thing and fix it.
6975          */
6976         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6977         if (ret < 0) {
6978                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6979                         key.objectid, key.type, key.offset, ret);
6980                 goto out;
6981         }
6982         if (ret > 0) {
6983                 fprintf(stderr, "Well that's odd, we just found this key "
6984                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6985                         key.offset);
6986                 ret = -EINVAL;
6987                 goto out;
6988         }
6989         leaf = path->nodes[0];
6990         fi = btrfs_item_ptr(leaf, path->slots[0],
6991                             struct btrfs_file_extent_item);
6992
6993         if (btrfs_file_extent_compression(leaf, fi) &&
6994             dback->disk_bytenr != entry->bytenr) {
6995                 fprintf(stderr, "Ref doesn't match the record start and is "
6996                         "compressed, please take a btrfs-image of this file "
6997                         "system and send it to a btrfs developer so they can "
6998                         "complete this functionality for bytenr %Lu\n",
6999                         dback->disk_bytenr);
7000                 ret = -EINVAL;
7001                 goto out;
7002         }
7003
7004         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7005                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7006         } else if (dback->disk_bytenr > entry->bytenr) {
7007                 u64 off_diff, offset;
7008
7009                 off_diff = dback->disk_bytenr - entry->bytenr;
7010                 offset = btrfs_file_extent_offset(leaf, fi);
7011                 if (dback->disk_bytenr + offset +
7012                     btrfs_file_extent_num_bytes(leaf, fi) >
7013                     entry->bytenr + entry->bytes) {
7014                         fprintf(stderr, "Ref is past the entry end, please "
7015                                 "take a btrfs-image of this file system and "
7016                                 "send it to a btrfs developer, ref %Lu\n",
7017                                 dback->disk_bytenr);
7018                         ret = -EINVAL;
7019                         goto out;
7020                 }
7021                 offset += off_diff;
7022                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7023                 btrfs_set_file_extent_offset(leaf, fi, offset);
7024         } else if (dback->disk_bytenr < entry->bytenr) {
7025                 u64 offset;
7026
7027                 offset = btrfs_file_extent_offset(leaf, fi);
7028                 if (dback->disk_bytenr + offset < entry->bytenr) {
7029                         fprintf(stderr, "Ref is before the entry start, please"
7030                                 " take a btrfs-image of this file system and "
7031                                 "send it to a btrfs developer, ref %Lu\n",
7032                                 dback->disk_bytenr);
7033                         ret = -EINVAL;
7034                         goto out;
7035                 }
7036
7037                 offset += dback->disk_bytenr;
7038                 offset -= entry->bytenr;
7039                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7040                 btrfs_set_file_extent_offset(leaf, fi, offset);
7041         }
7042
7043         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7044
7045         /*
7046          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7047          * only do this if we aren't using compression, otherwise it's a
7048          * trickier case.
7049          */
7050         if (!btrfs_file_extent_compression(leaf, fi))
7051                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7052         else
7053                 printf("ram bytes may be wrong?\n");
7054         btrfs_mark_buffer_dirty(leaf);
7055 out:
7056         err = btrfs_commit_transaction(trans, root);
7057         btrfs_release_path(path);
7058         return ret ? ret : err;
7059 }
7060
7061 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7062                            struct extent_record *rec)
7063 {
7064         struct extent_backref *back, *tmp;
7065         struct data_backref *dback;
7066         struct extent_entry *entry, *best = NULL;
7067         LIST_HEAD(entries);
7068         int nr_entries = 0;
7069         int broken_entries = 0;
7070         int ret = 0;
7071         short mismatch = 0;
7072
7073         /*
7074          * Metadata is easy and the backrefs should always agree on bytenr and
7075          * size, if not we've got bigger issues.
7076          */
7077         if (rec->metadata)
7078                 return 0;
7079
7080         rbtree_postorder_for_each_entry_safe(back, tmp,
7081                                              &rec->backref_tree, node) {
7082                 if (back->full_backref || !back->is_data)
7083                         continue;
7084
7085                 dback = to_data_backref(back);
7086
7087                 /*
7088                  * We only pay attention to backrefs that we found a real
7089                  * backref for.
7090                  */
7091                 if (dback->found_ref == 0)
7092                         continue;
7093
7094                 /*
7095                  * For now we only catch when the bytes don't match, not the
7096                  * bytenr.  We can easily do this at the same time, but I want
7097                  * to have a fs image to test on before we just add repair
7098                  * functionality willy-nilly so we know we won't screw up the
7099                  * repair.
7100                  */
7101
7102                 entry = find_entry(&entries, dback->disk_bytenr,
7103                                    dback->bytes);
7104                 if (!entry) {
7105                         entry = malloc(sizeof(struct extent_entry));
7106                         if (!entry) {
7107                                 ret = -ENOMEM;
7108                                 goto out;
7109                         }
7110                         memset(entry, 0, sizeof(*entry));
7111                         entry->bytenr = dback->disk_bytenr;
7112                         entry->bytes = dback->bytes;
7113                         list_add_tail(&entry->list, &entries);
7114                         nr_entries++;
7115                 }
7116
7117                 /*
7118                  * If we only have on entry we may think the entries agree when
7119                  * in reality they don't so we have to do some extra checking.
7120                  */
7121                 if (dback->disk_bytenr != rec->start ||
7122                     dback->bytes != rec->nr || back->broken)
7123                         mismatch = 1;
7124
7125                 if (back->broken) {
7126                         entry->broken++;
7127                         broken_entries++;
7128                 }
7129
7130                 entry->count++;
7131         }
7132
7133         /* Yay all the backrefs agree, carry on good sir */
7134         if (nr_entries <= 1 && !mismatch)
7135                 goto out;
7136
7137         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7138                 "%Lu\n", rec->start);
7139
7140         /*
7141          * First we want to see if the backrefs can agree amongst themselves who
7142          * is right, so figure out which one of the entries has the highest
7143          * count.
7144          */
7145         best = find_most_right_entry(&entries);
7146
7147         /*
7148          * Ok so we may have an even split between what the backrefs think, so
7149          * this is where we use the extent ref to see what it thinks.
7150          */
7151         if (!best) {
7152                 entry = find_entry(&entries, rec->start, rec->nr);
7153                 if (!entry && (!broken_entries || !rec->found_rec)) {
7154                         fprintf(stderr, "Backrefs don't agree with each other "
7155                                 "and extent record doesn't agree with anybody,"
7156                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7157                                 rec->start, rec->nr);
7158                         ret = -EINVAL;
7159                         goto out;
7160                 } else if (!entry) {
7161                         /*
7162                          * Ok our backrefs were broken, we'll assume this is the
7163                          * correct value and add an entry for this range.
7164                          */
7165                         entry = malloc(sizeof(struct extent_entry));
7166                         if (!entry) {
7167                                 ret = -ENOMEM;
7168                                 goto out;
7169                         }
7170                         memset(entry, 0, sizeof(*entry));
7171                         entry->bytenr = rec->start;
7172                         entry->bytes = rec->nr;
7173                         list_add_tail(&entry->list, &entries);
7174                         nr_entries++;
7175                 }
7176                 entry->count++;
7177                 best = find_most_right_entry(&entries);
7178                 if (!best) {
7179                         fprintf(stderr, "Backrefs and extent record evenly "
7180                                 "split on who is right, this is going to "
7181                                 "require user input to fix bytenr %Lu bytes "
7182                                 "%Lu\n", rec->start, rec->nr);
7183                         ret = -EINVAL;
7184                         goto out;
7185                 }
7186         }
7187
7188         /*
7189          * I don't think this can happen currently as we'll abort() if we catch
7190          * this case higher up, but in case somebody removes that we still can't
7191          * deal with it properly here yet, so just bail out of that's the case.
7192          */
7193         if (best->bytenr != rec->start) {
7194                 fprintf(stderr, "Extent start and backref starts don't match, "
7195                         "please use btrfs-image on this file system and send "
7196                         "it to a btrfs developer so they can make fsck fix "
7197                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7198                         rec->start, rec->nr);
7199                 ret = -EINVAL;
7200                 goto out;
7201         }
7202
7203         /*
7204          * Ok great we all agreed on an extent record, let's go find the real
7205          * references and fix up the ones that don't match.
7206          */
7207         rbtree_postorder_for_each_entry_safe(back, tmp,
7208                                              &rec->backref_tree, node) {
7209                 if (back->full_backref || !back->is_data)
7210                         continue;
7211
7212                 dback = to_data_backref(back);
7213
7214                 /*
7215                  * Still ignoring backrefs that don't have a real ref attached
7216                  * to them.
7217                  */
7218                 if (dback->found_ref == 0)
7219                         continue;
7220
7221                 if (dback->bytes == best->bytes &&
7222                     dback->disk_bytenr == best->bytenr)
7223                         continue;
7224
7225                 ret = repair_ref(info, path, dback, best);
7226                 if (ret)
7227                         goto out;
7228         }
7229
7230         /*
7231          * Ok we messed with the actual refs, which means we need to drop our
7232          * entire cache and go back and rescan.  I know this is a huge pain and
7233          * adds a lot of extra work, but it's the only way to be safe.  Once all
7234          * the backrefs agree we may not need to do anything to the extent
7235          * record itself.
7236          */
7237         ret = -EAGAIN;
7238 out:
7239         while (!list_empty(&entries)) {
7240                 entry = list_entry(entries.next, struct extent_entry, list);
7241                 list_del_init(&entry->list);
7242                 free(entry);
7243         }
7244         return ret;
7245 }
7246
7247 static int process_duplicates(struct btrfs_root *root,
7248                               struct cache_tree *extent_cache,
7249                               struct extent_record *rec)
7250 {
7251         struct extent_record *good, *tmp;
7252         struct cache_extent *cache;
7253         int ret;
7254
7255         /*
7256          * If we found a extent record for this extent then return, or if we
7257          * have more than one duplicate we are likely going to need to delete
7258          * something.
7259          */
7260         if (rec->found_rec || rec->num_duplicates > 1)
7261                 return 0;
7262
7263         /* Shouldn't happen but just in case */
7264         BUG_ON(!rec->num_duplicates);
7265
7266         /*
7267          * So this happens if we end up with a backref that doesn't match the
7268          * actual extent entry.  So either the backref is bad or the extent
7269          * entry is bad.  Either way we want to have the extent_record actually
7270          * reflect what we found in the extent_tree, so we need to take the
7271          * duplicate out and use that as the extent_record since the only way we
7272          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7273          */
7274         remove_cache_extent(extent_cache, &rec->cache);
7275
7276         good = to_extent_record(rec->dups.next);
7277         list_del_init(&good->list);
7278         INIT_LIST_HEAD(&good->backrefs);
7279         INIT_LIST_HEAD(&good->dups);
7280         good->cache.start = good->start;
7281         good->cache.size = good->nr;
7282         good->content_checked = 0;
7283         good->owner_ref_checked = 0;
7284         good->num_duplicates = 0;
7285         good->refs = rec->refs;
7286         list_splice_init(&rec->backrefs, &good->backrefs);
7287         while (1) {
7288                 cache = lookup_cache_extent(extent_cache, good->start,
7289                                             good->nr);
7290                 if (!cache)
7291                         break;
7292                 tmp = container_of(cache, struct extent_record, cache);
7293
7294                 /*
7295                  * If we find another overlapping extent and it's found_rec is
7296                  * set then it's a duplicate and we need to try and delete
7297                  * something.
7298                  */
7299                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7300                         if (list_empty(&good->list))
7301                                 list_add_tail(&good->list,
7302                                               &duplicate_extents);
7303                         good->num_duplicates += tmp->num_duplicates + 1;
7304                         list_splice_init(&tmp->dups, &good->dups);
7305                         list_del_init(&tmp->list);
7306                         list_add_tail(&tmp->list, &good->dups);
7307                         remove_cache_extent(extent_cache, &tmp->cache);
7308                         continue;
7309                 }
7310
7311                 /*
7312                  * Ok we have another non extent item backed extent rec, so lets
7313                  * just add it to this extent and carry on like we did above.
7314                  */
7315                 good->refs += tmp->refs;
7316                 list_splice_init(&tmp->backrefs, &good->backrefs);
7317                 remove_cache_extent(extent_cache, &tmp->cache);
7318                 free(tmp);
7319         }
7320         ret = insert_cache_extent(extent_cache, &good->cache);
7321         BUG_ON(ret);
7322         free(rec);
7323         return good->num_duplicates ? 0 : 1;
7324 }
7325
7326 static int delete_duplicate_records(struct btrfs_root *root,
7327                                     struct extent_record *rec)
7328 {
7329         struct btrfs_trans_handle *trans;
7330         LIST_HEAD(delete_list);
7331         struct btrfs_path *path;
7332         struct extent_record *tmp, *good, *n;
7333         int nr_del = 0;
7334         int ret = 0, err;
7335         struct btrfs_key key;
7336
7337         path = btrfs_alloc_path();
7338         if (!path) {
7339                 ret = -ENOMEM;
7340                 goto out;
7341         }
7342
7343         good = rec;
7344         /* Find the record that covers all of the duplicates. */
7345         list_for_each_entry(tmp, &rec->dups, list) {
7346                 if (good->start < tmp->start)
7347                         continue;
7348                 if (good->nr > tmp->nr)
7349                         continue;
7350
7351                 if (tmp->start + tmp->nr < good->start + good->nr) {
7352                         fprintf(stderr, "Ok we have overlapping extents that "
7353                                 "aren't completely covered by each other, this "
7354                                 "is going to require more careful thought.  "
7355                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7356                                 tmp->start, tmp->nr, good->start, good->nr);
7357                         abort();
7358                 }
7359                 good = tmp;
7360         }
7361
7362         if (good != rec)
7363                 list_add_tail(&rec->list, &delete_list);
7364
7365         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7366                 if (tmp == good)
7367                         continue;
7368                 list_move_tail(&tmp->list, &delete_list);
7369         }
7370
7371         root = root->fs_info->extent_root;
7372         trans = btrfs_start_transaction(root, 1);
7373         if (IS_ERR(trans)) {
7374                 ret = PTR_ERR(trans);
7375                 goto out;
7376         }
7377
7378         list_for_each_entry(tmp, &delete_list, list) {
7379                 if (tmp->found_rec == 0)
7380                         continue;
7381                 key.objectid = tmp->start;
7382                 key.type = BTRFS_EXTENT_ITEM_KEY;
7383                 key.offset = tmp->nr;
7384
7385                 /* Shouldn't happen but just in case */
7386                 if (tmp->metadata) {
7387                         fprintf(stderr, "Well this shouldn't happen, extent "
7388                                 "record overlaps but is metadata? "
7389                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7390                         abort();
7391                 }
7392
7393                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7394                 if (ret) {
7395                         if (ret > 0)
7396                                 ret = -EINVAL;
7397                         break;
7398                 }
7399                 ret = btrfs_del_item(trans, root, path);
7400                 if (ret)
7401                         break;
7402                 btrfs_release_path(path);
7403                 nr_del++;
7404         }
7405         err = btrfs_commit_transaction(trans, root);
7406         if (err && !ret)
7407                 ret = err;
7408 out:
7409         while (!list_empty(&delete_list)) {
7410                 tmp = to_extent_record(delete_list.next);
7411                 list_del_init(&tmp->list);
7412                 if (tmp == rec)
7413                         continue;
7414                 free(tmp);
7415         }
7416
7417         while (!list_empty(&rec->dups)) {
7418                 tmp = to_extent_record(rec->dups.next);
7419                 list_del_init(&tmp->list);
7420                 free(tmp);
7421         }
7422
7423         btrfs_free_path(path);
7424
7425         if (!ret && !nr_del)
7426                 rec->num_duplicates = 0;
7427
7428         return ret ? ret : nr_del;
7429 }
7430
7431 static int find_possible_backrefs(struct btrfs_fs_info *info,
7432                                   struct btrfs_path *path,
7433                                   struct cache_tree *extent_cache,
7434                                   struct extent_record *rec)
7435 {
7436         struct btrfs_root *root;
7437         struct extent_backref *back, *tmp;
7438         struct data_backref *dback;
7439         struct cache_extent *cache;
7440         struct btrfs_file_extent_item *fi;
7441         struct btrfs_key key;
7442         u64 bytenr, bytes;
7443         int ret;
7444
7445         rbtree_postorder_for_each_entry_safe(back, tmp,
7446                                              &rec->backref_tree, node) {
7447                 /* Don't care about full backrefs (poor unloved backrefs) */
7448                 if (back->full_backref || !back->is_data)
7449                         continue;
7450
7451                 dback = to_data_backref(back);
7452
7453                 /* We found this one, we don't need to do a lookup */
7454                 if (dback->found_ref)
7455                         continue;
7456
7457                 key.objectid = dback->root;
7458                 key.type = BTRFS_ROOT_ITEM_KEY;
7459                 key.offset = (u64)-1;
7460
7461                 root = btrfs_read_fs_root(info, &key);
7462
7463                 /* No root, definitely a bad ref, skip */
7464                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7465                         continue;
7466                 /* Other err, exit */
7467                 if (IS_ERR(root))
7468                         return PTR_ERR(root);
7469
7470                 key.objectid = dback->owner;
7471                 key.type = BTRFS_EXTENT_DATA_KEY;
7472                 key.offset = dback->offset;
7473                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7474                 if (ret) {
7475                         btrfs_release_path(path);
7476                         if (ret < 0)
7477                                 return ret;
7478                         /* Didn't find it, we can carry on */
7479                         ret = 0;
7480                         continue;
7481                 }
7482
7483                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7484                                     struct btrfs_file_extent_item);
7485                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7486                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7487                 btrfs_release_path(path);
7488                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7489                 if (cache) {
7490                         struct extent_record *tmp;
7491                         tmp = container_of(cache, struct extent_record, cache);
7492
7493                         /*
7494                          * If we found an extent record for the bytenr for this
7495                          * particular backref then we can't add it to our
7496                          * current extent record.  We only want to add backrefs
7497                          * that don't have a corresponding extent item in the
7498                          * extent tree since they likely belong to this record
7499                          * and we need to fix it if it doesn't match bytenrs.
7500                          */
7501                         if  (tmp->found_rec)
7502                                 continue;
7503                 }
7504
7505                 dback->found_ref += 1;
7506                 dback->disk_bytenr = bytenr;
7507                 dback->bytes = bytes;
7508
7509                 /*
7510                  * Set this so the verify backref code knows not to trust the
7511                  * values in this backref.
7512                  */
7513                 back->broken = 1;
7514         }
7515
7516         return 0;
7517 }
7518
7519 /*
7520  * Record orphan data ref into corresponding root.
7521  *
7522  * Return 0 if the extent item contains data ref and recorded.
7523  * Return 1 if the extent item contains no useful data ref
7524  *   On that case, it may contains only shared_dataref or metadata backref
7525  *   or the file extent exists(this should be handled by the extent bytenr
7526  *   recovery routine)
7527  * Return <0 if something goes wrong.
7528  */
7529 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7530                                       struct extent_record *rec)
7531 {
7532         struct btrfs_key key;
7533         struct btrfs_root *dest_root;
7534         struct extent_backref *back, *tmp;
7535         struct data_backref *dback;
7536         struct orphan_data_extent *orphan;
7537         struct btrfs_path *path;
7538         int recorded_data_ref = 0;
7539         int ret = 0;
7540
7541         if (rec->metadata)
7542                 return 1;
7543         path = btrfs_alloc_path();
7544         if (!path)
7545                 return -ENOMEM;
7546         rbtree_postorder_for_each_entry_safe(back, tmp,
7547                                              &rec->backref_tree, node) {
7548                 if (back->full_backref || !back->is_data ||
7549                     !back->found_extent_tree)
7550                         continue;
7551                 dback = to_data_backref(back);
7552                 if (dback->found_ref)
7553                         continue;
7554                 key.objectid = dback->root;
7555                 key.type = BTRFS_ROOT_ITEM_KEY;
7556                 key.offset = (u64)-1;
7557
7558                 dest_root = btrfs_read_fs_root(fs_info, &key);
7559
7560                 /* For non-exist root we just skip it */
7561                 if (IS_ERR(dest_root) || !dest_root)
7562                         continue;
7563
7564                 key.objectid = dback->owner;
7565                 key.type = BTRFS_EXTENT_DATA_KEY;
7566                 key.offset = dback->offset;
7567
7568                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7569                 /*
7570                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7571                  * we need to record it for inode/file extent rebuild.
7572                  * For ret > 0, we record it only for file extent rebuild.
7573                  * For ret == 0, the file extent exists but only bytenr
7574                  * mismatch, let the original bytenr fix routine to handle,
7575                  * don't record it.
7576                  */
7577                 if (ret == 0)
7578                         continue;
7579                 ret = 0;
7580                 orphan = malloc(sizeof(*orphan));
7581                 if (!orphan) {
7582                         ret = -ENOMEM;
7583                         goto out;
7584                 }
7585                 INIT_LIST_HEAD(&orphan->list);
7586                 orphan->root = dback->root;
7587                 orphan->objectid = dback->owner;
7588                 orphan->offset = dback->offset;
7589                 orphan->disk_bytenr = rec->cache.start;
7590                 orphan->disk_len = rec->cache.size;
7591                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7592                 recorded_data_ref = 1;
7593         }
7594 out:
7595         btrfs_free_path(path);
7596         if (!ret)
7597                 return !recorded_data_ref;
7598         else
7599                 return ret;
7600 }
7601
7602 /*
7603  * when an incorrect extent item is found, this will delete
7604  * all of the existing entries for it and recreate them
7605  * based on what the tree scan found.
7606  */
7607 static int fixup_extent_refs(struct btrfs_fs_info *info,
7608                              struct cache_tree *extent_cache,
7609                              struct extent_record *rec)
7610 {
7611         struct btrfs_trans_handle *trans = NULL;
7612         int ret;
7613         struct btrfs_path *path;
7614         struct cache_extent *cache;
7615         struct extent_backref *back, *tmp;
7616         int allocated = 0;
7617         u64 flags = 0;
7618
7619         if (rec->flag_block_full_backref)
7620                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7621
7622         path = btrfs_alloc_path();
7623         if (!path)
7624                 return -ENOMEM;
7625
7626         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7627                 /*
7628                  * Sometimes the backrefs themselves are so broken they don't
7629                  * get attached to any meaningful rec, so first go back and
7630                  * check any of our backrefs that we couldn't find and throw
7631                  * them into the list if we find the backref so that
7632                  * verify_backrefs can figure out what to do.
7633                  */
7634                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7635                 if (ret < 0)
7636                         goto out;
7637         }
7638
7639         /* step one, make sure all of the backrefs agree */
7640         ret = verify_backrefs(info, path, rec);
7641         if (ret < 0)
7642                 goto out;
7643
7644         trans = btrfs_start_transaction(info->extent_root, 1);
7645         if (IS_ERR(trans)) {
7646                 ret = PTR_ERR(trans);
7647                 goto out;
7648         }
7649
7650         /* step two, delete all the existing records */
7651         ret = delete_extent_records(trans, info->extent_root, path,
7652                                     rec->start, rec->max_size);
7653
7654         if (ret < 0)
7655                 goto out;
7656
7657         /* was this block corrupt?  If so, don't add references to it */
7658         cache = lookup_cache_extent(info->corrupt_blocks,
7659                                     rec->start, rec->max_size);
7660         if (cache) {
7661                 ret = 0;
7662                 goto out;
7663         }
7664
7665         /* step three, recreate all the refs we did find */
7666         rbtree_postorder_for_each_entry_safe(back, tmp,
7667                                              &rec->backref_tree, node) {
7668                 /*
7669                  * if we didn't find any references, don't create a
7670                  * new extent record
7671                  */
7672                 if (!back->found_ref)
7673                         continue;
7674
7675                 rec->bad_full_backref = 0;
7676                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7677                 allocated = 1;
7678
7679                 if (ret)
7680                         goto out;
7681         }
7682 out:
7683         if (trans) {
7684                 int err = btrfs_commit_transaction(trans, info->extent_root);
7685                 if (!ret)
7686                         ret = err;
7687         }
7688
7689         btrfs_free_path(path);
7690         return ret;
7691 }
7692
7693 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7694                               struct extent_record *rec)
7695 {
7696         struct btrfs_trans_handle *trans;
7697         struct btrfs_root *root = fs_info->extent_root;
7698         struct btrfs_path *path;
7699         struct btrfs_extent_item *ei;
7700         struct btrfs_key key;
7701         u64 flags;
7702         int ret = 0;
7703
7704         key.objectid = rec->start;
7705         if (rec->metadata) {
7706                 key.type = BTRFS_METADATA_ITEM_KEY;
7707                 key.offset = rec->info_level;
7708         } else {
7709                 key.type = BTRFS_EXTENT_ITEM_KEY;
7710                 key.offset = rec->max_size;
7711         }
7712
7713         path = btrfs_alloc_path();
7714         if (!path)
7715                 return -ENOMEM;
7716
7717         trans = btrfs_start_transaction(root, 0);
7718         if (IS_ERR(trans)) {
7719                 btrfs_free_path(path);
7720                 return PTR_ERR(trans);
7721         }
7722
7723         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7724         if (ret < 0) {
7725                 btrfs_free_path(path);
7726                 btrfs_commit_transaction(trans, root);
7727                 return ret;
7728         } else if (ret) {
7729                 fprintf(stderr, "Didn't find extent for %llu\n",
7730                         (unsigned long long)rec->start);
7731                 btrfs_free_path(path);
7732                 btrfs_commit_transaction(trans, root);
7733                 return -ENOENT;
7734         }
7735
7736         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7737                             struct btrfs_extent_item);
7738         flags = btrfs_extent_flags(path->nodes[0], ei);
7739         if (rec->flag_block_full_backref) {
7740                 fprintf(stderr, "setting full backref on %llu\n",
7741                         (unsigned long long)key.objectid);
7742                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7743         } else {
7744                 fprintf(stderr, "clearing full backref on %llu\n",
7745                         (unsigned long long)key.objectid);
7746                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7747         }
7748         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7749         btrfs_mark_buffer_dirty(path->nodes[0]);
7750         btrfs_free_path(path);
7751         return btrfs_commit_transaction(trans, root);
7752 }
7753
7754 /* right now we only prune from the extent allocation tree */
7755 static int prune_one_block(struct btrfs_trans_handle *trans,
7756                            struct btrfs_fs_info *info,
7757                            struct btrfs_corrupt_block *corrupt)
7758 {
7759         int ret;
7760         struct btrfs_path path;
7761         struct extent_buffer *eb;
7762         u64 found;
7763         int slot;
7764         int nritems;
7765         int level = corrupt->level + 1;
7766
7767         btrfs_init_path(&path);
7768 again:
7769         /* we want to stop at the parent to our busted block */
7770         path.lowest_level = level;
7771
7772         ret = btrfs_search_slot(trans, info->extent_root,
7773                                 &corrupt->key, &path, -1, 1);
7774
7775         if (ret < 0)
7776                 goto out;
7777
7778         eb = path.nodes[level];
7779         if (!eb) {
7780                 ret = -ENOENT;
7781                 goto out;
7782         }
7783
7784         /*
7785          * hopefully the search gave us the block we want to prune,
7786          * lets try that first
7787          */
7788         slot = path.slots[level];
7789         found =  btrfs_node_blockptr(eb, slot);
7790         if (found == corrupt->cache.start)
7791                 goto del_ptr;
7792
7793         nritems = btrfs_header_nritems(eb);
7794
7795         /* the search failed, lets scan this node and hope we find it */
7796         for (slot = 0; slot < nritems; slot++) {
7797                 found =  btrfs_node_blockptr(eb, slot);
7798                 if (found == corrupt->cache.start)
7799                         goto del_ptr;
7800         }
7801         /*
7802          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7803          * to this block
7804          */
7805         if (eb == info->extent_root->node) {
7806                 ret = -ENOENT;
7807                 goto out;
7808         } else {
7809                 level++;
7810                 btrfs_release_path(&path);
7811                 goto again;
7812         }
7813
7814 del_ptr:
7815         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7816         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7817
7818 out:
7819         btrfs_release_path(&path);
7820         return ret;
7821 }
7822
7823 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7824 {
7825         struct btrfs_trans_handle *trans = NULL;
7826         struct cache_extent *cache;
7827         struct btrfs_corrupt_block *corrupt;
7828
7829         while (1) {
7830                 cache = search_cache_extent(info->corrupt_blocks, 0);
7831                 if (!cache)
7832                         break;
7833                 if (!trans) {
7834                         trans = btrfs_start_transaction(info->extent_root, 1);
7835                         if (IS_ERR(trans))
7836                                 return PTR_ERR(trans);
7837                 }
7838                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7839                 prune_one_block(trans, info, corrupt);
7840                 remove_cache_extent(info->corrupt_blocks, cache);
7841         }
7842         if (trans)
7843                 return btrfs_commit_transaction(trans, info->extent_root);
7844         return 0;
7845 }
7846
7847 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7848 {
7849         struct btrfs_block_group_cache *cache;
7850         u64 start, end;
7851         int ret;
7852
7853         while (1) {
7854                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7855                                             &start, &end, EXTENT_DIRTY);
7856                 if (ret)
7857                         break;
7858                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7859                                    GFP_NOFS);
7860         }
7861
7862         start = 0;
7863         while (1) {
7864                 cache = btrfs_lookup_first_block_group(fs_info, start);
7865                 if (!cache)
7866                         break;
7867                 if (cache->cached)
7868                         cache->cached = 0;
7869                 start = cache->key.objectid + cache->key.offset;
7870         }
7871 }
7872
7873 static int check_extent_refs(struct btrfs_root *root,
7874                              struct cache_tree *extent_cache)
7875 {
7876         struct extent_record *rec;
7877         struct cache_extent *cache;
7878         int err = 0;
7879         int ret = 0;
7880         int fixed = 0;
7881         int had_dups = 0;
7882         int recorded = 0;
7883
7884         if (repair) {
7885                 /*
7886                  * if we're doing a repair, we have to make sure
7887                  * we don't allocate from the problem extents.
7888                  * In the worst case, this will be all the
7889                  * extents in the FS
7890                  */
7891                 cache = search_cache_extent(extent_cache, 0);
7892                 while(cache) {
7893                         rec = container_of(cache, struct extent_record, cache);
7894                         set_extent_dirty(root->fs_info->excluded_extents,
7895                                          rec->start,
7896                                          rec->start + rec->max_size - 1,
7897                                          GFP_NOFS);
7898                         cache = next_cache_extent(cache);
7899                 }
7900
7901                 /* pin down all the corrupted blocks too */
7902                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7903                 while(cache) {
7904                         set_extent_dirty(root->fs_info->excluded_extents,
7905                                          cache->start,
7906                                          cache->start + cache->size - 1,
7907                                          GFP_NOFS);
7908                         cache = next_cache_extent(cache);
7909                 }
7910                 prune_corrupt_blocks(root->fs_info);
7911                 reset_cached_block_groups(root->fs_info);
7912         }
7913
7914         reset_cached_block_groups(root->fs_info);
7915
7916         /*
7917          * We need to delete any duplicate entries we find first otherwise we
7918          * could mess up the extent tree when we have backrefs that actually
7919          * belong to a different extent item and not the weird duplicate one.
7920          */
7921         while (repair && !list_empty(&duplicate_extents)) {
7922                 rec = to_extent_record(duplicate_extents.next);
7923                 list_del_init(&rec->list);
7924
7925                 /* Sometimes we can find a backref before we find an actual
7926                  * extent, so we need to process it a little bit to see if there
7927                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7928                  * if this is a backref screwup.  If we need to delete stuff
7929                  * process_duplicates() will return 0, otherwise it will return
7930                  * 1 and we
7931                  */
7932                 if (process_duplicates(root, extent_cache, rec))
7933                         continue;
7934                 ret = delete_duplicate_records(root, rec);
7935                 if (ret < 0)
7936                         return ret;
7937                 /*
7938                  * delete_duplicate_records will return the number of entries
7939                  * deleted, so if it's greater than 0 then we know we actually
7940                  * did something and we need to remove.
7941                  */
7942                 if (ret)
7943                         had_dups = 1;
7944         }
7945
7946         if (had_dups)
7947                 return -EAGAIN;
7948
7949         while(1) {
7950                 int cur_err = 0;
7951
7952                 fixed = 0;
7953                 recorded = 0;
7954                 cache = search_cache_extent(extent_cache, 0);
7955                 if (!cache)
7956                         break;
7957                 rec = container_of(cache, struct extent_record, cache);
7958                 if (rec->num_duplicates) {
7959                         fprintf(stderr, "extent item %llu has multiple extent "
7960                                 "items\n", (unsigned long long)rec->start);
7961                         err = 1;
7962                         cur_err = 1;
7963                 }
7964
7965                 if (rec->refs != rec->extent_item_refs) {
7966                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7967                                 (unsigned long long)rec->start,
7968                                 (unsigned long long)rec->nr);
7969                         fprintf(stderr, "extent item %llu, found %llu\n",
7970                                 (unsigned long long)rec->extent_item_refs,
7971                                 (unsigned long long)rec->refs);
7972                         ret = record_orphan_data_extents(root->fs_info, rec);
7973                         if (ret < 0)
7974                                 goto repair_abort;
7975                         if (ret == 0) {
7976                                 recorded = 1;
7977                         } else {
7978                                 /*
7979                                  * we can't use the extent to repair file
7980                                  * extent, let the fallback method handle it.
7981                                  */
7982                                 if (!fixed && repair) {
7983                                         ret = fixup_extent_refs(
7984                                                         root->fs_info,
7985                                                         extent_cache, rec);
7986                                         if (ret)
7987                                                 goto repair_abort;
7988                                         fixed = 1;
7989                                 }
7990                         }
7991                         err = 1;
7992                         cur_err = 1;
7993                 }
7994                 if (all_backpointers_checked(rec, 1)) {
7995                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7996                                 (unsigned long long)rec->start,
7997                                 (unsigned long long)rec->nr);
7998
7999                         if (!fixed && !recorded && repair) {
8000                                 ret = fixup_extent_refs(root->fs_info,
8001                                                         extent_cache, rec);
8002                                 if (ret)
8003                                         goto repair_abort;
8004                                 fixed = 1;
8005                         }
8006                         cur_err = 1;
8007                         err = 1;
8008                 }
8009                 if (!rec->owner_ref_checked) {
8010                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8011                                 (unsigned long long)rec->start,
8012                                 (unsigned long long)rec->nr);
8013                         if (!fixed && !recorded && repair) {
8014                                 ret = fixup_extent_refs(root->fs_info,
8015                                                         extent_cache, rec);
8016                                 if (ret)
8017                                         goto repair_abort;
8018                                 fixed = 1;
8019                         }
8020                         err = 1;
8021                         cur_err = 1;
8022                 }
8023                 if (rec->bad_full_backref) {
8024                         fprintf(stderr, "bad full backref, on [%llu]\n",
8025                                 (unsigned long long)rec->start);
8026                         if (repair) {
8027                                 ret = fixup_extent_flags(root->fs_info, rec);
8028                                 if (ret)
8029                                         goto repair_abort;
8030                                 fixed = 1;
8031                         }
8032                         err = 1;
8033                         cur_err = 1;
8034                 }
8035                 /*
8036                  * Although it's not a extent ref's problem, we reuse this
8037                  * routine for error reporting.
8038                  * No repair function yet.
8039                  */
8040                 if (rec->crossing_stripes) {
8041                         fprintf(stderr,
8042                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8043                                 rec->start, rec->start + rec->max_size);
8044                         err = 1;
8045                         cur_err = 1;
8046                 }
8047
8048                 if (rec->wrong_chunk_type) {
8049                         fprintf(stderr,
8050                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8051                                 rec->start, rec->start + rec->max_size);
8052                         err = 1;
8053                         cur_err = 1;
8054                 }
8055
8056                 remove_cache_extent(extent_cache, cache);
8057                 free_all_extent_backrefs(rec);
8058                 if (!init_extent_tree && repair && (!cur_err || fixed))
8059                         clear_extent_dirty(root->fs_info->excluded_extents,
8060                                            rec->start,
8061                                            rec->start + rec->max_size - 1,
8062                                            GFP_NOFS);
8063                 free(rec);
8064         }
8065 repair_abort:
8066         if (repair) {
8067                 if (ret && ret != -EAGAIN) {
8068                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8069                         exit(1);
8070                 } else if (!ret) {
8071                         struct btrfs_trans_handle *trans;
8072
8073                         root = root->fs_info->extent_root;
8074                         trans = btrfs_start_transaction(root, 1);
8075                         if (IS_ERR(trans)) {
8076                                 ret = PTR_ERR(trans);
8077                                 goto repair_abort;
8078                         }
8079
8080                         btrfs_fix_block_accounting(trans, root);
8081                         ret = btrfs_commit_transaction(trans, root);
8082                         if (ret)
8083                                 goto repair_abort;
8084                 }
8085                 if (err)
8086                         fprintf(stderr, "repaired damaged extent references\n");
8087                 return ret;
8088         }
8089         return err;
8090 }
8091
8092 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8093 {
8094         u64 stripe_size;
8095
8096         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8097                 stripe_size = length;
8098                 stripe_size /= num_stripes;
8099         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8100                 stripe_size = length * 2;
8101                 stripe_size /= num_stripes;
8102         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8103                 stripe_size = length;
8104                 stripe_size /= (num_stripes - 1);
8105         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8106                 stripe_size = length;
8107                 stripe_size /= (num_stripes - 2);
8108         } else {
8109                 stripe_size = length;
8110         }
8111         return stripe_size;
8112 }
8113
8114 /*
8115  * Check the chunk with its block group/dev list ref:
8116  * Return 0 if all refs seems valid.
8117  * Return 1 if part of refs seems valid, need later check for rebuild ref
8118  * like missing block group and needs to search extent tree to rebuild them.
8119  * Return -1 if essential refs are missing and unable to rebuild.
8120  */
8121 static int check_chunk_refs(struct chunk_record *chunk_rec,
8122                             struct block_group_tree *block_group_cache,
8123                             struct device_extent_tree *dev_extent_cache,
8124                             int silent)
8125 {
8126         struct cache_extent *block_group_item;
8127         struct block_group_record *block_group_rec;
8128         struct cache_extent *dev_extent_item;
8129         struct device_extent_record *dev_extent_rec;
8130         u64 devid;
8131         u64 offset;
8132         u64 length;
8133         int metadump_v2 = 0;
8134         int i;
8135         int ret = 0;
8136
8137         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8138                                                chunk_rec->offset,
8139                                                chunk_rec->length);
8140         if (block_group_item) {
8141                 block_group_rec = container_of(block_group_item,
8142                                                struct block_group_record,
8143                                                cache);
8144                 if (chunk_rec->length != block_group_rec->offset ||
8145                     chunk_rec->offset != block_group_rec->objectid ||
8146                     (!metadump_v2 &&
8147                      chunk_rec->type_flags != block_group_rec->flags)) {
8148                         if (!silent)
8149                                 fprintf(stderr,
8150                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8151                                         chunk_rec->objectid,
8152                                         chunk_rec->type,
8153                                         chunk_rec->offset,
8154                                         chunk_rec->length,
8155                                         chunk_rec->offset,
8156                                         chunk_rec->type_flags,
8157                                         block_group_rec->objectid,
8158                                         block_group_rec->type,
8159                                         block_group_rec->offset,
8160                                         block_group_rec->offset,
8161                                         block_group_rec->objectid,
8162                                         block_group_rec->flags);
8163                         ret = -1;
8164                 } else {
8165                         list_del_init(&block_group_rec->list);
8166                         chunk_rec->bg_rec = block_group_rec;
8167                 }
8168         } else {
8169                 if (!silent)
8170                         fprintf(stderr,
8171                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8172                                 chunk_rec->objectid,
8173                                 chunk_rec->type,
8174                                 chunk_rec->offset,
8175                                 chunk_rec->length,
8176                                 chunk_rec->offset,
8177                                 chunk_rec->type_flags);
8178                 ret = 1;
8179         }
8180
8181         if (metadump_v2)
8182                 return ret;
8183
8184         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8185                                     chunk_rec->num_stripes);
8186         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8187                 devid = chunk_rec->stripes[i].devid;
8188                 offset = chunk_rec->stripes[i].offset;
8189                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8190                                                        devid, offset, length);
8191                 if (dev_extent_item) {
8192                         dev_extent_rec = container_of(dev_extent_item,
8193                                                 struct device_extent_record,
8194                                                 cache);
8195                         if (dev_extent_rec->objectid != devid ||
8196                             dev_extent_rec->offset != offset ||
8197                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8198                             dev_extent_rec->length != length) {
8199                                 if (!silent)
8200                                         fprintf(stderr,
8201                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8202                                                 chunk_rec->objectid,
8203                                                 chunk_rec->type,
8204                                                 chunk_rec->offset,
8205                                                 chunk_rec->stripes[i].devid,
8206                                                 chunk_rec->stripes[i].offset,
8207                                                 dev_extent_rec->objectid,
8208                                                 dev_extent_rec->offset,
8209                                                 dev_extent_rec->length);
8210                                 ret = -1;
8211                         } else {
8212                                 list_move(&dev_extent_rec->chunk_list,
8213                                           &chunk_rec->dextents);
8214                         }
8215                 } else {
8216                         if (!silent)
8217                                 fprintf(stderr,
8218                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8219                                         chunk_rec->objectid,
8220                                         chunk_rec->type,
8221                                         chunk_rec->offset,
8222                                         chunk_rec->stripes[i].devid,
8223                                         chunk_rec->stripes[i].offset);
8224                         ret = -1;
8225                 }
8226         }
8227         return ret;
8228 }
8229
8230 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8231 int check_chunks(struct cache_tree *chunk_cache,
8232                  struct block_group_tree *block_group_cache,
8233                  struct device_extent_tree *dev_extent_cache,
8234                  struct list_head *good, struct list_head *bad,
8235                  struct list_head *rebuild, int silent)
8236 {
8237         struct cache_extent *chunk_item;
8238         struct chunk_record *chunk_rec;
8239         struct block_group_record *bg_rec;
8240         struct device_extent_record *dext_rec;
8241         int err;
8242         int ret = 0;
8243
8244         chunk_item = first_cache_extent(chunk_cache);
8245         while (chunk_item) {
8246                 chunk_rec = container_of(chunk_item, struct chunk_record,
8247                                          cache);
8248                 err = check_chunk_refs(chunk_rec, block_group_cache,
8249                                        dev_extent_cache, silent);
8250                 if (err < 0)
8251                         ret = err;
8252                 if (err == 0 && good)
8253                         list_add_tail(&chunk_rec->list, good);
8254                 if (err > 0 && rebuild)
8255                         list_add_tail(&chunk_rec->list, rebuild);
8256                 if (err < 0 && bad)
8257                         list_add_tail(&chunk_rec->list, bad);
8258                 chunk_item = next_cache_extent(chunk_item);
8259         }
8260
8261         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8262                 if (!silent)
8263                         fprintf(stderr,
8264                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8265                                 bg_rec->objectid,
8266                                 bg_rec->offset,
8267                                 bg_rec->flags);
8268                 if (!ret)
8269                         ret = 1;
8270         }
8271
8272         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8273                             chunk_list) {
8274                 if (!silent)
8275                         fprintf(stderr,
8276                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8277                                 dext_rec->objectid,
8278                                 dext_rec->offset,
8279                                 dext_rec->length);
8280                 if (!ret)
8281                         ret = 1;
8282         }
8283         return ret;
8284 }
8285
8286
8287 static int check_device_used(struct device_record *dev_rec,
8288                              struct device_extent_tree *dext_cache)
8289 {
8290         struct cache_extent *cache;
8291         struct device_extent_record *dev_extent_rec;
8292         u64 total_byte = 0;
8293
8294         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8295         while (cache) {
8296                 dev_extent_rec = container_of(cache,
8297                                               struct device_extent_record,
8298                                               cache);
8299                 if (dev_extent_rec->objectid != dev_rec->devid)
8300                         break;
8301
8302                 list_del_init(&dev_extent_rec->device_list);
8303                 total_byte += dev_extent_rec->length;
8304                 cache = next_cache_extent(cache);
8305         }
8306
8307         if (total_byte != dev_rec->byte_used) {
8308                 fprintf(stderr,
8309                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8310                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8311                         dev_rec->type, dev_rec->offset);
8312                 return -1;
8313         } else {
8314                 return 0;
8315         }
8316 }
8317
8318 /* check btrfs_dev_item -> btrfs_dev_extent */
8319 static int check_devices(struct rb_root *dev_cache,
8320                          struct device_extent_tree *dev_extent_cache)
8321 {
8322         struct rb_node *dev_node;
8323         struct device_record *dev_rec;
8324         struct device_extent_record *dext_rec;
8325         int err;
8326         int ret = 0;
8327
8328         dev_node = rb_first(dev_cache);
8329         while (dev_node) {
8330                 dev_rec = container_of(dev_node, struct device_record, node);
8331                 err = check_device_used(dev_rec, dev_extent_cache);
8332                 if (err)
8333                         ret = err;
8334
8335                 dev_node = rb_next(dev_node);
8336         }
8337         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8338                             device_list) {
8339                 fprintf(stderr,
8340                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8341                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8342                 if (!ret)
8343                         ret = 1;
8344         }
8345         return ret;
8346 }
8347
8348 static int add_root_item_to_list(struct list_head *head,
8349                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8350                                   u8 level, u8 drop_level,
8351                                   int level_size, struct btrfs_key *drop_key)
8352 {
8353
8354         struct root_item_record *ri_rec;
8355         ri_rec = malloc(sizeof(*ri_rec));
8356         if (!ri_rec)
8357                 return -ENOMEM;
8358         ri_rec->bytenr = bytenr;
8359         ri_rec->objectid = objectid;
8360         ri_rec->level = level;
8361         ri_rec->level_size = level_size;
8362         ri_rec->drop_level = drop_level;
8363         ri_rec->last_snapshot = last_snapshot;
8364         if (drop_key)
8365                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8366         list_add_tail(&ri_rec->list, head);
8367
8368         return 0;
8369 }
8370
8371 static void free_root_item_list(struct list_head *list)
8372 {
8373         struct root_item_record *ri_rec;
8374
8375         while (!list_empty(list)) {
8376                 ri_rec = list_first_entry(list, struct root_item_record,
8377                                           list);
8378                 list_del_init(&ri_rec->list);
8379                 free(ri_rec);
8380         }
8381 }
8382
8383 static int deal_root_from_list(struct list_head *list,
8384                                struct btrfs_root *root,
8385                                struct block_info *bits,
8386                                int bits_nr,
8387                                struct cache_tree *pending,
8388                                struct cache_tree *seen,
8389                                struct cache_tree *reada,
8390                                struct cache_tree *nodes,
8391                                struct cache_tree *extent_cache,
8392                                struct cache_tree *chunk_cache,
8393                                struct rb_root *dev_cache,
8394                                struct block_group_tree *block_group_cache,
8395                                struct device_extent_tree *dev_extent_cache)
8396 {
8397         int ret = 0;
8398         u64 last;
8399
8400         while (!list_empty(list)) {
8401                 struct root_item_record *rec;
8402                 struct extent_buffer *buf;
8403                 rec = list_entry(list->next,
8404                                  struct root_item_record, list);
8405                 last = 0;
8406                 buf = read_tree_block(root->fs_info->tree_root,
8407                                       rec->bytenr, rec->level_size, 0);
8408                 if (!extent_buffer_uptodate(buf)) {
8409                         free_extent_buffer(buf);
8410                         ret = -EIO;
8411                         break;
8412                 }
8413                 add_root_to_pending(buf, extent_cache, pending,
8414                                     seen, nodes, rec->objectid);
8415                 /*
8416                  * To rebuild extent tree, we need deal with snapshot
8417                  * one by one, otherwise we deal with node firstly which
8418                  * can maximize readahead.
8419                  */
8420                 while (1) {
8421                         ret = run_next_block(root, bits, bits_nr, &last,
8422                                              pending, seen, reada, nodes,
8423                                              extent_cache, chunk_cache,
8424                                              dev_cache, block_group_cache,
8425                                              dev_extent_cache, rec);
8426                         if (ret != 0)
8427                                 break;
8428                 }
8429                 free_extent_buffer(buf);
8430                 list_del(&rec->list);
8431                 free(rec);
8432                 if (ret < 0)
8433                         break;
8434         }
8435         while (ret >= 0) {
8436                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8437                                      reada, nodes, extent_cache, chunk_cache,
8438                                      dev_cache, block_group_cache,
8439                                      dev_extent_cache, NULL);
8440                 if (ret != 0) {
8441                         if (ret > 0)
8442                                 ret = 0;
8443                         break;
8444                 }
8445         }
8446         return ret;
8447 }
8448
8449 static int check_chunks_and_extents(struct btrfs_root *root)
8450 {
8451         struct rb_root dev_cache;
8452         struct cache_tree chunk_cache;
8453         struct block_group_tree block_group_cache;
8454         struct device_extent_tree dev_extent_cache;
8455         struct cache_tree extent_cache;
8456         struct cache_tree seen;
8457         struct cache_tree pending;
8458         struct cache_tree reada;
8459         struct cache_tree nodes;
8460         struct extent_io_tree excluded_extents;
8461         struct cache_tree corrupt_blocks;
8462         struct btrfs_path path;
8463         struct btrfs_key key;
8464         struct btrfs_key found_key;
8465         int ret, err = 0;
8466         struct block_info *bits;
8467         int bits_nr;
8468         struct extent_buffer *leaf;
8469         int slot;
8470         struct btrfs_root_item ri;
8471         struct list_head dropping_trees;
8472         struct list_head normal_trees;
8473         struct btrfs_root *root1;
8474         u64 objectid;
8475         u32 level_size;
8476         u8 level;
8477
8478         dev_cache = RB_ROOT;
8479         cache_tree_init(&chunk_cache);
8480         block_group_tree_init(&block_group_cache);
8481         device_extent_tree_init(&dev_extent_cache);
8482
8483         cache_tree_init(&extent_cache);
8484         cache_tree_init(&seen);
8485         cache_tree_init(&pending);
8486         cache_tree_init(&nodes);
8487         cache_tree_init(&reada);
8488         cache_tree_init(&corrupt_blocks);
8489         extent_io_tree_init(&excluded_extents);
8490         INIT_LIST_HEAD(&dropping_trees);
8491         INIT_LIST_HEAD(&normal_trees);
8492
8493         if (repair) {
8494                 root->fs_info->excluded_extents = &excluded_extents;
8495                 root->fs_info->fsck_extent_cache = &extent_cache;
8496                 root->fs_info->free_extent_hook = free_extent_hook;
8497                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8498         }
8499
8500         bits_nr = 1024;
8501         bits = malloc(bits_nr * sizeof(struct block_info));
8502         if (!bits) {
8503                 perror("malloc");
8504                 exit(1);
8505         }
8506
8507         if (ctx.progress_enabled) {
8508                 ctx.tp = TASK_EXTENTS;
8509                 task_start(ctx.info);
8510         }
8511
8512 again:
8513         root1 = root->fs_info->tree_root;
8514         level = btrfs_header_level(root1->node);
8515         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8516                                     root1->node->start, 0, level, 0,
8517                                     root1->nodesize, NULL);
8518         if (ret < 0)
8519                 goto out;
8520         root1 = root->fs_info->chunk_root;
8521         level = btrfs_header_level(root1->node);
8522         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8523                                     root1->node->start, 0, level, 0,
8524                                     root1->nodesize, NULL);
8525         if (ret < 0)
8526                 goto out;
8527         btrfs_init_path(&path);
8528         key.offset = 0;
8529         key.objectid = 0;
8530         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8531         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8532                                         &key, &path, 0, 0);
8533         if (ret < 0)
8534                 goto out;
8535         while(1) {
8536                 leaf = path.nodes[0];
8537                 slot = path.slots[0];
8538                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8539                         ret = btrfs_next_leaf(root, &path);
8540                         if (ret != 0)
8541                                 break;
8542                         leaf = path.nodes[0];
8543                         slot = path.slots[0];
8544                 }
8545                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8546                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8547                         unsigned long offset;
8548                         u64 last_snapshot;
8549
8550                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8551                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8552                         last_snapshot = btrfs_root_last_snapshot(&ri);
8553                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8554                                 level = btrfs_root_level(&ri);
8555                                 level_size = root->nodesize;
8556                                 ret = add_root_item_to_list(&normal_trees,
8557                                                 found_key.objectid,
8558                                                 btrfs_root_bytenr(&ri),
8559                                                 last_snapshot, level,
8560                                                 0, level_size, NULL);
8561                                 if (ret < 0)
8562                                         goto out;
8563                         } else {
8564                                 level = btrfs_root_level(&ri);
8565                                 level_size = root->nodesize;
8566                                 objectid = found_key.objectid;
8567                                 btrfs_disk_key_to_cpu(&found_key,
8568                                                       &ri.drop_progress);
8569                                 ret = add_root_item_to_list(&dropping_trees,
8570                                                 objectid,
8571                                                 btrfs_root_bytenr(&ri),
8572                                                 last_snapshot, level,
8573                                                 ri.drop_level,
8574                                                 level_size, &found_key);
8575                                 if (ret < 0)
8576                                         goto out;
8577                         }
8578                 }
8579                 path.slots[0]++;
8580         }
8581         btrfs_release_path(&path);
8582
8583         /*
8584          * check_block can return -EAGAIN if it fixes something, please keep
8585          * this in mind when dealing with return values from these functions, if
8586          * we get -EAGAIN we want to fall through and restart the loop.
8587          */
8588         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8589                                   &seen, &reada, &nodes, &extent_cache,
8590                                   &chunk_cache, &dev_cache, &block_group_cache,
8591                                   &dev_extent_cache);
8592         if (ret < 0) {
8593                 if (ret == -EAGAIN)
8594                         goto loop;
8595                 goto out;
8596         }
8597         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8598                                   &pending, &seen, &reada, &nodes,
8599                                   &extent_cache, &chunk_cache, &dev_cache,
8600                                   &block_group_cache, &dev_extent_cache);
8601         if (ret < 0) {
8602                 if (ret == -EAGAIN)
8603                         goto loop;
8604                 goto out;
8605         }
8606
8607         ret = check_chunks(&chunk_cache, &block_group_cache,
8608                            &dev_extent_cache, NULL, NULL, NULL, 0);
8609         if (ret) {
8610                 if (ret == -EAGAIN)
8611                         goto loop;
8612                 err = ret;
8613         }
8614
8615         ret = check_extent_refs(root, &extent_cache);
8616         if (ret < 0) {
8617                 if (ret == -EAGAIN)
8618                         goto loop;
8619                 goto out;
8620         }
8621
8622         ret = check_devices(&dev_cache, &dev_extent_cache);
8623         if (ret && err)
8624                 ret = err;
8625
8626 out:
8627         task_stop(ctx.info);
8628         if (repair) {
8629                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8630                 extent_io_tree_cleanup(&excluded_extents);
8631                 root->fs_info->fsck_extent_cache = NULL;
8632                 root->fs_info->free_extent_hook = NULL;
8633                 root->fs_info->corrupt_blocks = NULL;
8634                 root->fs_info->excluded_extents = NULL;
8635         }
8636         free(bits);
8637         free_chunk_cache_tree(&chunk_cache);
8638         free_device_cache_tree(&dev_cache);
8639         free_block_group_tree(&block_group_cache);
8640         free_device_extent_tree(&dev_extent_cache);
8641         free_extent_cache_tree(&seen);
8642         free_extent_cache_tree(&pending);
8643         free_extent_cache_tree(&reada);
8644         free_extent_cache_tree(&nodes);
8645         return ret;
8646 loop:
8647         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8648         free_extent_cache_tree(&seen);
8649         free_extent_cache_tree(&pending);
8650         free_extent_cache_tree(&reada);
8651         free_extent_cache_tree(&nodes);
8652         free_chunk_cache_tree(&chunk_cache);
8653         free_block_group_tree(&block_group_cache);
8654         free_device_cache_tree(&dev_cache);
8655         free_device_extent_tree(&dev_extent_cache);
8656         free_extent_record_cache(root->fs_info, &extent_cache);
8657         free_root_item_list(&normal_trees);
8658         free_root_item_list(&dropping_trees);
8659         extent_io_tree_cleanup(&excluded_extents);
8660         goto again;
8661 }
8662
8663 /*
8664  * Check backrefs of a tree block given by @bytenr or @eb.
8665  *
8666  * @root:       the root containing the @bytenr or @eb
8667  * @eb:         tree block extent buffer, can be NULL
8668  * @bytenr:     bytenr of the tree block to search
8669  * @level:      tree level of the tree block
8670  * @owner:      owner of the tree block
8671  *
8672  * Return >0 for any error found and output error message
8673  * Return 0 for no error found
8674  */
8675 static int check_tree_block_ref(struct btrfs_root *root,
8676                                 struct extent_buffer *eb, u64 bytenr,
8677                                 int level, u64 owner)
8678 {
8679         struct btrfs_key key;
8680         struct btrfs_root *extent_root = root->fs_info->extent_root;
8681         struct btrfs_path path;
8682         struct btrfs_extent_item *ei;
8683         struct btrfs_extent_inline_ref *iref;
8684         struct extent_buffer *leaf;
8685         unsigned long end;
8686         unsigned long ptr;
8687         int slot;
8688         int skinny_level;
8689         int type;
8690         u32 nodesize = root->nodesize;
8691         u32 item_size;
8692         u64 offset;
8693         int found_ref = 0;
8694         int err = 0;
8695         int ret;
8696
8697         btrfs_init_path(&path);
8698         key.objectid = bytenr;
8699         if (btrfs_fs_incompat(root->fs_info,
8700                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8701                 key.type = BTRFS_METADATA_ITEM_KEY;
8702         else
8703                 key.type = BTRFS_EXTENT_ITEM_KEY;
8704         key.offset = (u64)-1;
8705
8706         /* Search for the backref in extent tree */
8707         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8708         if (ret < 0) {
8709                 err |= BACKREF_MISSING;
8710                 goto out;
8711         }
8712         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8713         if (ret) {
8714                 err |= BACKREF_MISSING;
8715                 goto out;
8716         }
8717
8718         leaf = path.nodes[0];
8719         slot = path.slots[0];
8720         btrfs_item_key_to_cpu(leaf, &key, slot);
8721
8722         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8723
8724         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8725                 skinny_level = (int)key.offset;
8726                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8727         } else {
8728                 struct btrfs_tree_block_info *info;
8729
8730                 info = (struct btrfs_tree_block_info *)(ei + 1);
8731                 skinny_level = btrfs_tree_block_level(leaf, info);
8732                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8733         }
8734
8735         if (eb) {
8736                 u64 header_gen;
8737                 u64 extent_gen;
8738
8739                 if (!(btrfs_extent_flags(leaf, ei) &
8740                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8741                         error(
8742                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8743                                 key.objectid, nodesize,
8744                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8745                         err = BACKREF_MISMATCH;
8746                 }
8747                 header_gen = btrfs_header_generation(eb);
8748                 extent_gen = btrfs_extent_generation(leaf, ei);
8749                 if (header_gen != extent_gen) {
8750                         error(
8751         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8752                                 key.objectid, nodesize, header_gen,
8753                                 extent_gen);
8754                         err = BACKREF_MISMATCH;
8755                 }
8756                 if (level != skinny_level) {
8757                         error(
8758                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8759                                 key.objectid, nodesize, level, skinny_level);
8760                         err = BACKREF_MISMATCH;
8761                 }
8762                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8763                         error(
8764                         "extent[%llu %u] is referred by other roots than %llu",
8765                                 key.objectid, nodesize, root->objectid);
8766                         err = BACKREF_MISMATCH;
8767                 }
8768         }
8769
8770         /*
8771          * Iterate the extent/metadata item to find the exact backref
8772          */
8773         item_size = btrfs_item_size_nr(leaf, slot);
8774         ptr = (unsigned long)iref;
8775         end = (unsigned long)ei + item_size;
8776         while (ptr < end) {
8777                 iref = (struct btrfs_extent_inline_ref *)ptr;
8778                 type = btrfs_extent_inline_ref_type(leaf, iref);
8779                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8780
8781                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8782                         (offset == root->objectid || offset == owner)) {
8783                         found_ref = 1;
8784                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8785                         /* Check if the backref points to valid referencer */
8786                         found_ref = !check_tree_block_ref(root, NULL, offset,
8787                                                           level + 1, owner);
8788                 }
8789
8790                 if (found_ref)
8791                         break;
8792                 ptr += btrfs_extent_inline_ref_size(type);
8793         }
8794
8795         /*
8796          * Inlined extent item doesn't have what we need, check
8797          * TREE_BLOCK_REF_KEY
8798          */
8799         if (!found_ref) {
8800                 btrfs_release_path(&path);
8801                 key.objectid = bytenr;
8802                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8803                 key.offset = root->objectid;
8804
8805                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8806                 if (!ret)
8807                         found_ref = 1;
8808         }
8809         if (!found_ref)
8810                 err |= BACKREF_MISSING;
8811 out:
8812         btrfs_release_path(&path);
8813         if (eb && (err & BACKREF_MISSING))
8814                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8815                         bytenr, nodesize, owner, level);
8816         return err;
8817 }
8818
8819 /*
8820  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8821  *
8822  * Return >0 any error found and output error message
8823  * Return 0 for no error found
8824  */
8825 static int check_extent_data_item(struct btrfs_root *root,
8826                                   struct extent_buffer *eb, int slot)
8827 {
8828         struct btrfs_file_extent_item *fi;
8829         struct btrfs_path path;
8830         struct btrfs_root *extent_root = root->fs_info->extent_root;
8831         struct btrfs_key fi_key;
8832         struct btrfs_key dbref_key;
8833         struct extent_buffer *leaf;
8834         struct btrfs_extent_item *ei;
8835         struct btrfs_extent_inline_ref *iref;
8836         struct btrfs_extent_data_ref *dref;
8837         u64 owner;
8838         u64 file_extent_gen;
8839         u64 disk_bytenr;
8840         u64 disk_num_bytes;
8841         u64 extent_num_bytes;
8842         u64 extent_flags;
8843         u64 extent_gen;
8844         u32 item_size;
8845         unsigned long end;
8846         unsigned long ptr;
8847         int type;
8848         u64 ref_root;
8849         int found_dbackref = 0;
8850         int err = 0;
8851         int ret;
8852
8853         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8854         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8855         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8856
8857         /* Nothing to check for hole and inline data extents */
8858         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8859             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8860                 return 0;
8861
8862         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8863         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8864         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8865
8866         /* Check unaligned disk_num_bytes and num_bytes */
8867         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8868                 error(
8869 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8870                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8871                         root->sectorsize);
8872                 err |= BYTES_UNALIGNED;
8873         } else {
8874                 data_bytes_allocated += disk_num_bytes;
8875         }
8876         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8877                 error(
8878 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8879                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8880                         root->sectorsize);
8881                 err |= BYTES_UNALIGNED;
8882         } else {
8883                 data_bytes_referenced += extent_num_bytes;
8884         }
8885         owner = btrfs_header_owner(eb);
8886
8887         /* Check the extent item of the file extent in extent tree */
8888         btrfs_init_path(&path);
8889         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8890         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8891         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8892
8893         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8894         if (ret) {
8895                 err |= BACKREF_MISSING;
8896                 goto error;
8897         }
8898
8899         leaf = path.nodes[0];
8900         slot = path.slots[0];
8901         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8902
8903         extent_flags = btrfs_extent_flags(leaf, ei);
8904         extent_gen = btrfs_extent_generation(leaf, ei);
8905
8906         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8907                 error(
8908                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8909                     disk_bytenr, disk_num_bytes,
8910                     BTRFS_EXTENT_FLAG_DATA);
8911                 err |= BACKREF_MISMATCH;
8912         }
8913
8914         if (file_extent_gen < extent_gen) {
8915                 error(
8916 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8917                         disk_bytenr, disk_num_bytes, file_extent_gen,
8918                         extent_gen);
8919                 err |= BACKREF_MISMATCH;
8920         }
8921
8922         /* Check data backref inside that extent item */
8923         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8924         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8925         ptr = (unsigned long)iref;
8926         end = (unsigned long)ei + item_size;
8927         while (ptr < end) {
8928                 iref = (struct btrfs_extent_inline_ref *)ptr;
8929                 type = btrfs_extent_inline_ref_type(leaf, iref);
8930                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8931
8932                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8933                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8934                         if (ref_root == owner || ref_root == root->objectid)
8935                                 found_dbackref = 1;
8936                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8937                         found_dbackref = !check_tree_block_ref(root, NULL,
8938                                 btrfs_extent_inline_ref_offset(leaf, iref),
8939                                 0, owner);
8940                 }
8941
8942                 if (found_dbackref)
8943                         break;
8944                 ptr += btrfs_extent_inline_ref_size(type);
8945         }
8946
8947         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8948         if (!found_dbackref) {
8949                 btrfs_release_path(&path);
8950
8951                 btrfs_init_path(&path);
8952                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8953                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8954                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8955                                 fi_key.objectid, fi_key.offset);
8956
8957                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8958                                         &dbref_key, &path, 0, 0);
8959                 if (!ret)
8960                         found_dbackref = 1;
8961         }
8962
8963         if (!found_dbackref)
8964                 err |= BACKREF_MISSING;
8965 error:
8966         btrfs_release_path(&path);
8967         if (err & BACKREF_MISSING) {
8968                 error("data extent[%llu %llu] backref lost",
8969                       disk_bytenr, disk_num_bytes);
8970         }
8971         return err;
8972 }
8973
8974 /*
8975  * Get real tree block level for the case like shared block
8976  * Return >= 0 as tree level
8977  * Return <0 for error
8978  */
8979 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8980 {
8981         struct extent_buffer *eb;
8982         struct btrfs_path path;
8983         struct btrfs_key key;
8984         struct btrfs_extent_item *ei;
8985         u64 flags;
8986         u64 transid;
8987         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8988         u8 backref_level;
8989         u8 header_level;
8990         int ret;
8991
8992         /* Search extent tree for extent generation and level */
8993         key.objectid = bytenr;
8994         key.type = BTRFS_METADATA_ITEM_KEY;
8995         key.offset = (u64)-1;
8996
8997         btrfs_init_path(&path);
8998         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8999         if (ret < 0)
9000                 goto release_out;
9001         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9002         if (ret < 0)
9003                 goto release_out;
9004         if (ret > 0) {
9005                 ret = -ENOENT;
9006                 goto release_out;
9007         }
9008
9009         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9010         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9011                             struct btrfs_extent_item);
9012         flags = btrfs_extent_flags(path.nodes[0], ei);
9013         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9014                 ret = -ENOENT;
9015                 goto release_out;
9016         }
9017
9018         /* Get transid for later read_tree_block() check */
9019         transid = btrfs_extent_generation(path.nodes[0], ei);
9020
9021         /* Get backref level as one source */
9022         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9023                 backref_level = key.offset;
9024         } else {
9025                 struct btrfs_tree_block_info *info;
9026
9027                 info = (struct btrfs_tree_block_info *)(ei + 1);
9028                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9029         }
9030         btrfs_release_path(&path);
9031
9032         /* Get level from tree block as an alternative source */
9033         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9034         if (!extent_buffer_uptodate(eb)) {
9035                 free_extent_buffer(eb);
9036                 return -EIO;
9037         }
9038         header_level = btrfs_header_level(eb);
9039         free_extent_buffer(eb);
9040
9041         if (header_level != backref_level)
9042                 return -EIO;
9043         return header_level;
9044
9045 release_out:
9046         btrfs_release_path(&path);
9047         return ret;
9048 }
9049
9050 /*
9051  * Check if a tree block backref is valid (points to a valid tree block)
9052  * if level == -1, level will be resolved
9053  * Return >0 for any error found and print error message
9054  */
9055 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9056                                     u64 bytenr, int level)
9057 {
9058         struct btrfs_root *root;
9059         struct btrfs_key key;
9060         struct btrfs_path path;
9061         struct extent_buffer *eb;
9062         struct extent_buffer *node;
9063         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9064         int err = 0;
9065         int ret;
9066
9067         /* Query level for level == -1 special case */
9068         if (level == -1)
9069                 level = query_tree_block_level(fs_info, bytenr);
9070         if (level < 0) {
9071                 err |= REFERENCER_MISSING;
9072                 goto out;
9073         }
9074
9075         key.objectid = root_id;
9076         key.type = BTRFS_ROOT_ITEM_KEY;
9077         key.offset = (u64)-1;
9078
9079         root = btrfs_read_fs_root(fs_info, &key);
9080         if (IS_ERR(root)) {
9081                 err |= REFERENCER_MISSING;
9082                 goto out;
9083         }
9084
9085         /* Read out the tree block to get item/node key */
9086         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9087         if (!extent_buffer_uptodate(eb)) {
9088                 err |= REFERENCER_MISSING;
9089                 free_extent_buffer(eb);
9090                 goto out;
9091         }
9092
9093         /* Empty tree, no need to check key */
9094         if (!btrfs_header_nritems(eb) && !level) {
9095                 free_extent_buffer(eb);
9096                 goto out;
9097         }
9098
9099         if (level)
9100                 btrfs_node_key_to_cpu(eb, &key, 0);
9101         else
9102                 btrfs_item_key_to_cpu(eb, &key, 0);
9103
9104         free_extent_buffer(eb);
9105
9106         btrfs_init_path(&path);
9107         /* Search with the first key, to ensure we can reach it */
9108         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9109         if (ret) {
9110                 err |= REFERENCER_MISSING;
9111                 goto release_out;
9112         }
9113
9114         node = path.nodes[level];
9115         if (btrfs_header_bytenr(node) != bytenr) {
9116                 error(
9117         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9118                         bytenr, nodesize, bytenr,
9119                         btrfs_header_bytenr(node));
9120                 err |= REFERENCER_MISMATCH;
9121         }
9122         if (btrfs_header_level(node) != level) {
9123                 error(
9124         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9125                         bytenr, nodesize, level,
9126                         btrfs_header_level(node));
9127                 err |= REFERENCER_MISMATCH;
9128         }
9129
9130 release_out:
9131         btrfs_release_path(&path);
9132 out:
9133         if (err & REFERENCER_MISSING) {
9134                 if (level < 0)
9135                         error("extent [%llu %d] lost referencer (owner: %llu)",
9136                                 bytenr, nodesize, root_id);
9137                 else
9138                         error(
9139                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9140                                 bytenr, nodesize, root_id, level);
9141         }
9142
9143         return err;
9144 }
9145
9146 /*
9147  * Check referencer for shared block backref
9148  * If level == -1, this function will resolve the level.
9149  */
9150 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9151                                      u64 parent, u64 bytenr, int level)
9152 {
9153         struct extent_buffer *eb;
9154         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9155         u32 nr;
9156         int found_parent = 0;
9157         int i;
9158
9159         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9160         if (!extent_buffer_uptodate(eb))
9161                 goto out;
9162
9163         if (level == -1)
9164                 level = query_tree_block_level(fs_info, bytenr);
9165         if (level < 0)
9166                 goto out;
9167
9168         if (level + 1 != btrfs_header_level(eb))
9169                 goto out;
9170
9171         nr = btrfs_header_nritems(eb);
9172         for (i = 0; i < nr; i++) {
9173                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9174                         found_parent = 1;
9175                         break;
9176                 }
9177         }
9178 out:
9179         free_extent_buffer(eb);
9180         if (!found_parent) {
9181                 error(
9182         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9183                         bytenr, nodesize, parent, level);
9184                 return REFERENCER_MISSING;
9185         }
9186         return 0;
9187 }
9188
9189 /*
9190  * Check referencer for normal (inlined) data ref
9191  * If len == 0, it will be resolved by searching in extent tree
9192  */
9193 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9194                                      u64 root_id, u64 objectid, u64 offset,
9195                                      u64 bytenr, u64 len, u32 count)
9196 {
9197         struct btrfs_root *root;
9198         struct btrfs_root *extent_root = fs_info->extent_root;
9199         struct btrfs_key key;
9200         struct btrfs_path path;
9201         struct extent_buffer *leaf;
9202         struct btrfs_file_extent_item *fi;
9203         u32 found_count = 0;
9204         int slot;
9205         int ret = 0;
9206
9207         if (!len) {
9208                 key.objectid = bytenr;
9209                 key.type = BTRFS_EXTENT_ITEM_KEY;
9210                 key.offset = (u64)-1;
9211
9212                 btrfs_init_path(&path);
9213                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9214                 if (ret < 0)
9215                         goto out;
9216                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9217                 if (ret)
9218                         goto out;
9219                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9220                 if (key.objectid != bytenr ||
9221                     key.type != BTRFS_EXTENT_ITEM_KEY)
9222                         goto out;
9223                 len = key.offset;
9224                 btrfs_release_path(&path);
9225         }
9226         key.objectid = root_id;
9227         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9228         key.offset = (u64)-1;
9229         btrfs_init_path(&path);
9230
9231         root = btrfs_read_fs_root(fs_info, &key);
9232         if (IS_ERR(root))
9233                 goto out;
9234
9235         key.objectid = objectid;
9236         key.type = BTRFS_EXTENT_DATA_KEY;
9237         /*
9238          * It can be nasty as data backref offset is
9239          * file offset - file extent offset, which is smaller or
9240          * equal to original backref offset.  The only special case is
9241          * overflow.  So we need to special check and do further search.
9242          */
9243         key.offset = offset & (1ULL << 63) ? 0 : offset;
9244
9245         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9246         if (ret < 0)
9247                 goto out;
9248
9249         /*
9250          * Search afterwards to get correct one
9251          * NOTE: As we must do a comprehensive check on the data backref to
9252          * make sure the dref count also matches, we must iterate all file
9253          * extents for that inode.
9254          */
9255         while (1) {
9256                 leaf = path.nodes[0];
9257                 slot = path.slots[0];
9258
9259                 btrfs_item_key_to_cpu(leaf, &key, slot);
9260                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9261                         break;
9262                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9263                 /*
9264                  * Except normal disk bytenr and disk num bytes, we still
9265                  * need to do extra check on dbackref offset as
9266                  * dbackref offset = file_offset - file_extent_offset
9267                  */
9268                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9269                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9270                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9271                     offset)
9272                         found_count++;
9273
9274                 ret = btrfs_next_item(root, &path);
9275                 if (ret)
9276                         break;
9277         }
9278 out:
9279         btrfs_release_path(&path);
9280         if (found_count != count) {
9281                 error(
9282 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9283                         bytenr, len, root_id, objectid, offset, count, found_count);
9284                 return REFERENCER_MISSING;
9285         }
9286         return 0;
9287 }
9288
9289 /*
9290  * Check if the referencer of a shared data backref exists
9291  */
9292 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9293                                      u64 parent, u64 bytenr)
9294 {
9295         struct extent_buffer *eb;
9296         struct btrfs_key key;
9297         struct btrfs_file_extent_item *fi;
9298         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9299         u32 nr;
9300         int found_parent = 0;
9301         int i;
9302
9303         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9304         if (!extent_buffer_uptodate(eb))
9305                 goto out;
9306
9307         nr = btrfs_header_nritems(eb);
9308         for (i = 0; i < nr; i++) {
9309                 btrfs_item_key_to_cpu(eb, &key, i);
9310                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9311                         continue;
9312
9313                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9314                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9315                         continue;
9316
9317                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9318                         found_parent = 1;
9319                         break;
9320                 }
9321         }
9322
9323 out:
9324         free_extent_buffer(eb);
9325         if (!found_parent) {
9326                 error("shared extent %llu referencer lost (parent: %llu)",
9327                         bytenr, parent);
9328                 return REFERENCER_MISSING;
9329         }
9330         return 0;
9331 }
9332
9333 /*
9334  * This function will check a given extent item, including its backref and
9335  * itself (like crossing stripe boundary and type)
9336  *
9337  * Since we don't use extent_record anymore, introduce new error bit
9338  */
9339 static int check_extent_item(struct btrfs_fs_info *fs_info,
9340                              struct extent_buffer *eb, int slot)
9341 {
9342         struct btrfs_extent_item *ei;
9343         struct btrfs_extent_inline_ref *iref;
9344         struct btrfs_extent_data_ref *dref;
9345         unsigned long end;
9346         unsigned long ptr;
9347         int type;
9348         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9349         u32 item_size = btrfs_item_size_nr(eb, slot);
9350         u64 flags;
9351         u64 offset;
9352         int metadata = 0;
9353         int level;
9354         struct btrfs_key key;
9355         int ret;
9356         int err = 0;
9357
9358         btrfs_item_key_to_cpu(eb, &key, slot);
9359         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9360                 bytes_used += key.offset;
9361         else
9362                 bytes_used += nodesize;
9363
9364         if (item_size < sizeof(*ei)) {
9365                 /*
9366                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9367                  * old thing when on disk format is still un-determined.
9368                  * No need to care about it anymore
9369                  */
9370                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9371                 return -ENOTTY;
9372         }
9373
9374         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9375         flags = btrfs_extent_flags(eb, ei);
9376
9377         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9378                 metadata = 1;
9379         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9380                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9381                       key.objectid, key.objectid + nodesize);
9382                 err |= CROSSING_STRIPE_BOUNDARY;
9383         }
9384
9385         ptr = (unsigned long)(ei + 1);
9386
9387         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9388                 /* Old EXTENT_ITEM metadata */
9389                 struct btrfs_tree_block_info *info;
9390
9391                 info = (struct btrfs_tree_block_info *)ptr;
9392                 level = btrfs_tree_block_level(eb, info);
9393                 ptr += sizeof(struct btrfs_tree_block_info);
9394         } else {
9395                 /* New METADATA_ITEM */
9396                 level = key.offset;
9397         }
9398         end = (unsigned long)ei + item_size;
9399
9400         if (ptr >= end) {
9401                 err |= ITEM_SIZE_MISMATCH;
9402                 goto out;
9403         }
9404
9405         /* Now check every backref in this extent item */
9406 next:
9407         iref = (struct btrfs_extent_inline_ref *)ptr;
9408         type = btrfs_extent_inline_ref_type(eb, iref);
9409         offset = btrfs_extent_inline_ref_offset(eb, iref);
9410         switch (type) {
9411         case BTRFS_TREE_BLOCK_REF_KEY:
9412                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9413                                                level);
9414                 err |= ret;
9415                 break;
9416         case BTRFS_SHARED_BLOCK_REF_KEY:
9417                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9418                                                  level);
9419                 err |= ret;
9420                 break;
9421         case BTRFS_EXTENT_DATA_REF_KEY:
9422                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9423                 ret = check_extent_data_backref(fs_info,
9424                                 btrfs_extent_data_ref_root(eb, dref),
9425                                 btrfs_extent_data_ref_objectid(eb, dref),
9426                                 btrfs_extent_data_ref_offset(eb, dref),
9427                                 key.objectid, key.offset,
9428                                 btrfs_extent_data_ref_count(eb, dref));
9429                 err |= ret;
9430                 break;
9431         case BTRFS_SHARED_DATA_REF_KEY:
9432                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9433                 err |= ret;
9434                 break;
9435         default:
9436                 error("extent[%llu %d %llu] has unknown ref type: %d",
9437                         key.objectid, key.type, key.offset, type);
9438                 err |= UNKNOWN_TYPE;
9439                 goto out;
9440         }
9441
9442         ptr += btrfs_extent_inline_ref_size(type);
9443         if (ptr < end)
9444                 goto next;
9445
9446 out:
9447         return err;
9448 }
9449
9450 /*
9451  * Check if a dev extent item is referred correctly by its chunk
9452  */
9453 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9454                                  struct extent_buffer *eb, int slot)
9455 {
9456         struct btrfs_root *chunk_root = fs_info->chunk_root;
9457         struct btrfs_dev_extent *ptr;
9458         struct btrfs_path path;
9459         struct btrfs_key chunk_key;
9460         struct btrfs_key devext_key;
9461         struct btrfs_chunk *chunk;
9462         struct extent_buffer *l;
9463         int num_stripes;
9464         u64 length;
9465         int i;
9466         int found_chunk = 0;
9467         int ret;
9468
9469         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9470         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9471         length = btrfs_dev_extent_length(eb, ptr);
9472
9473         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9474         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9475         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9476
9477         btrfs_init_path(&path);
9478         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9479         if (ret)
9480                 goto out;
9481
9482         l = path.nodes[0];
9483         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9484         if (btrfs_chunk_length(l, chunk) != length)
9485                 goto out;
9486
9487         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9488         for (i = 0; i < num_stripes; i++) {
9489                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9490                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9491
9492                 if (devid == devext_key.objectid &&
9493                     offset == devext_key.offset) {
9494                         found_chunk = 1;
9495                         break;
9496                 }
9497         }
9498 out:
9499         btrfs_release_path(&path);
9500         if (!found_chunk) {
9501                 error(
9502                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9503                         devext_key.objectid, devext_key.offset, length);
9504                 return REFERENCER_MISSING;
9505         }
9506         return 0;
9507 }
9508
9509 /*
9510  * Check if the used space is correct with the dev item
9511  */
9512 static int check_dev_item(struct btrfs_fs_info *fs_info,
9513                           struct extent_buffer *eb, int slot)
9514 {
9515         struct btrfs_root *dev_root = fs_info->dev_root;
9516         struct btrfs_dev_item *dev_item;
9517         struct btrfs_path path;
9518         struct btrfs_key key;
9519         struct btrfs_dev_extent *ptr;
9520         u64 dev_id;
9521         u64 used;
9522         u64 total = 0;
9523         int ret;
9524
9525         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9526         dev_id = btrfs_device_id(eb, dev_item);
9527         used = btrfs_device_bytes_used(eb, dev_item);
9528
9529         key.objectid = dev_id;
9530         key.type = BTRFS_DEV_EXTENT_KEY;
9531         key.offset = 0;
9532
9533         btrfs_init_path(&path);
9534         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9535         if (ret < 0) {
9536                 btrfs_item_key_to_cpu(eb, &key, slot);
9537                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9538                         key.objectid, key.type, key.offset);
9539                 btrfs_release_path(&path);
9540                 return REFERENCER_MISSING;
9541         }
9542
9543         /* Iterate dev_extents to calculate the used space of a device */
9544         while (1) {
9545                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9546
9547                 if (key.objectid > dev_id)
9548                         break;
9549                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9550                         goto next;
9551
9552                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9553                                      struct btrfs_dev_extent);
9554                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9555 next:
9556                 ret = btrfs_next_item(dev_root, &path);
9557                 if (ret)
9558                         break;
9559         }
9560         btrfs_release_path(&path);
9561
9562         if (used != total) {
9563                 btrfs_item_key_to_cpu(eb, &key, slot);
9564                 error(
9565 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9566                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9567                         BTRFS_DEV_EXTENT_KEY, dev_id);
9568                 return ACCOUNTING_MISMATCH;
9569         }
9570         return 0;
9571 }
9572
9573 /*
9574  * Check a block group item with its referener (chunk) and its used space
9575  * with extent/metadata item
9576  */
9577 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9578                                   struct extent_buffer *eb, int slot)
9579 {
9580         struct btrfs_root *extent_root = fs_info->extent_root;
9581         struct btrfs_root *chunk_root = fs_info->chunk_root;
9582         struct btrfs_block_group_item *bi;
9583         struct btrfs_block_group_item bg_item;
9584         struct btrfs_path path;
9585         struct btrfs_key bg_key;
9586         struct btrfs_key chunk_key;
9587         struct btrfs_key extent_key;
9588         struct btrfs_chunk *chunk;
9589         struct extent_buffer *leaf;
9590         struct btrfs_extent_item *ei;
9591         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9592         u64 flags;
9593         u64 bg_flags;
9594         u64 used;
9595         u64 total = 0;
9596         int ret;
9597         int err = 0;
9598
9599         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9600         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9601         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9602         used = btrfs_block_group_used(&bg_item);
9603         bg_flags = btrfs_block_group_flags(&bg_item);
9604
9605         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9606         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9607         chunk_key.offset = bg_key.objectid;
9608
9609         btrfs_init_path(&path);
9610         /* Search for the referencer chunk */
9611         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9612         if (ret) {
9613                 error(
9614                 "block group[%llu %llu] did not find the related chunk item",
9615                         bg_key.objectid, bg_key.offset);
9616                 err |= REFERENCER_MISSING;
9617         } else {
9618                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9619                                         struct btrfs_chunk);
9620                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9621                                                 bg_key.offset) {
9622                         error(
9623         "block group[%llu %llu] related chunk item length does not match",
9624                                 bg_key.objectid, bg_key.offset);
9625                         err |= REFERENCER_MISMATCH;
9626                 }
9627         }
9628         btrfs_release_path(&path);
9629
9630         /* Search from the block group bytenr */
9631         extent_key.objectid = bg_key.objectid;
9632         extent_key.type = 0;
9633         extent_key.offset = 0;
9634
9635         btrfs_init_path(&path);
9636         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9637         if (ret < 0)
9638                 goto out;
9639
9640         /* Iterate extent tree to account used space */
9641         while (1) {
9642                 leaf = path.nodes[0];
9643                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9644                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9645                         break;
9646
9647                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9648                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9649                         goto next;
9650                 if (extent_key.objectid < bg_key.objectid)
9651                         goto next;
9652
9653                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9654                         total += nodesize;
9655                 else
9656                         total += extent_key.offset;
9657
9658                 ei = btrfs_item_ptr(leaf, path.slots[0],
9659                                     struct btrfs_extent_item);
9660                 flags = btrfs_extent_flags(leaf, ei);
9661                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9662                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9663                                 error(
9664                         "bad extent[%llu, %llu) type mismatch with chunk",
9665                                         extent_key.objectid,
9666                                         extent_key.objectid + extent_key.offset);
9667                                 err |= CHUNK_TYPE_MISMATCH;
9668                         }
9669                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9670                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9671                                     BTRFS_BLOCK_GROUP_METADATA))) {
9672                                 error(
9673                         "bad extent[%llu, %llu) type mismatch with chunk",
9674                                         extent_key.objectid,
9675                                         extent_key.objectid + nodesize);
9676                                 err |= CHUNK_TYPE_MISMATCH;
9677                         }
9678                 }
9679 next:
9680                 ret = btrfs_next_item(extent_root, &path);
9681                 if (ret)
9682                         break;
9683         }
9684
9685 out:
9686         btrfs_release_path(&path);
9687
9688         if (total != used) {
9689                 error(
9690                 "block group[%llu %llu] used %llu but extent items used %llu",
9691                         bg_key.objectid, bg_key.offset, used, total);
9692                 err |= ACCOUNTING_MISMATCH;
9693         }
9694         return err;
9695 }
9696
9697 /*
9698  * Check a chunk item.
9699  * Including checking all referred dev_extents and block group
9700  */
9701 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9702                             struct extent_buffer *eb, int slot)
9703 {
9704         struct btrfs_root *extent_root = fs_info->extent_root;
9705         struct btrfs_root *dev_root = fs_info->dev_root;
9706         struct btrfs_path path;
9707         struct btrfs_key chunk_key;
9708         struct btrfs_key bg_key;
9709         struct btrfs_key devext_key;
9710         struct btrfs_chunk *chunk;
9711         struct extent_buffer *leaf;
9712         struct btrfs_block_group_item *bi;
9713         struct btrfs_block_group_item bg_item;
9714         struct btrfs_dev_extent *ptr;
9715         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9716         u64 length;
9717         u64 chunk_end;
9718         u64 type;
9719         u64 profile;
9720         int num_stripes;
9721         u64 offset;
9722         u64 objectid;
9723         int i;
9724         int ret;
9725         int err = 0;
9726
9727         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9728         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9729         length = btrfs_chunk_length(eb, chunk);
9730         chunk_end = chunk_key.offset + length;
9731         if (!IS_ALIGNED(length, sectorsize)) {
9732                 error("chunk[%llu %llu) not aligned to %u",
9733                         chunk_key.offset, chunk_end, sectorsize);
9734                 err |= BYTES_UNALIGNED;
9735                 goto out;
9736         }
9737
9738         type = btrfs_chunk_type(eb, chunk);
9739         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9740         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9741                 error("chunk[%llu %llu) has no chunk type",
9742                         chunk_key.offset, chunk_end);
9743                 err |= UNKNOWN_TYPE;
9744         }
9745         if (profile && (profile & (profile - 1))) {
9746                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9747                         chunk_key.offset, chunk_end, profile);
9748                 err |= UNKNOWN_TYPE;
9749         }
9750
9751         bg_key.objectid = chunk_key.offset;
9752         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9753         bg_key.offset = length;
9754
9755         btrfs_init_path(&path);
9756         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9757         if (ret) {
9758                 error(
9759                 "chunk[%llu %llu) did not find the related block group item",
9760                         chunk_key.offset, chunk_end);
9761                 err |= REFERENCER_MISSING;
9762         } else{
9763                 leaf = path.nodes[0];
9764                 bi = btrfs_item_ptr(leaf, path.slots[0],
9765                                     struct btrfs_block_group_item);
9766                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9767                                    sizeof(bg_item));
9768                 if (btrfs_block_group_flags(&bg_item) != type) {
9769                         error(
9770 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9771                                 chunk_key.offset, chunk_end, type,
9772                                 btrfs_block_group_flags(&bg_item));
9773                         err |= REFERENCER_MISSING;
9774                 }
9775         }
9776
9777         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9778         for (i = 0; i < num_stripes; i++) {
9779                 btrfs_release_path(&path);
9780                 btrfs_init_path(&path);
9781                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9782                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9783                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9784
9785                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9786                                         0, 0);
9787                 if (ret)
9788                         goto not_match_dev;
9789
9790                 leaf = path.nodes[0];
9791                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9792                                      struct btrfs_dev_extent);
9793                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9794                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9795                 if (objectid != chunk_key.objectid ||
9796                     offset != chunk_key.offset ||
9797                     btrfs_dev_extent_length(leaf, ptr) != length)
9798                         goto not_match_dev;
9799                 continue;
9800 not_match_dev:
9801                 err |= BACKREF_MISSING;
9802                 error(
9803                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9804                         chunk_key.objectid, chunk_end, i);
9805                 continue;
9806         }
9807         btrfs_release_path(&path);
9808 out:
9809         return err;
9810 }
9811
9812 /*
9813  * Main entry function to check known items and update related accounting info
9814  */
9815 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9816 {
9817         struct btrfs_fs_info *fs_info = root->fs_info;
9818         struct btrfs_key key;
9819         int slot = 0;
9820         int type;
9821         struct btrfs_extent_data_ref *dref;
9822         int ret;
9823         int err = 0;
9824
9825 next:
9826         btrfs_item_key_to_cpu(eb, &key, slot);
9827         type = btrfs_key_type(&key);
9828
9829         switch (type) {
9830         case BTRFS_EXTENT_DATA_KEY:
9831                 ret = check_extent_data_item(root, eb, slot);
9832                 err |= ret;
9833                 break;
9834         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9835                 ret = check_block_group_item(fs_info, eb, slot);
9836                 err |= ret;
9837                 break;
9838         case BTRFS_DEV_ITEM_KEY:
9839                 ret = check_dev_item(fs_info, eb, slot);
9840                 err |= ret;
9841                 break;
9842         case BTRFS_CHUNK_ITEM_KEY:
9843                 ret = check_chunk_item(fs_info, eb, slot);
9844                 err |= ret;
9845                 break;
9846         case BTRFS_DEV_EXTENT_KEY:
9847                 ret = check_dev_extent_item(fs_info, eb, slot);
9848                 err |= ret;
9849                 break;
9850         case BTRFS_EXTENT_ITEM_KEY:
9851         case BTRFS_METADATA_ITEM_KEY:
9852                 ret = check_extent_item(fs_info, eb, slot);
9853                 err |= ret;
9854                 break;
9855         case BTRFS_EXTENT_CSUM_KEY:
9856                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9857                 break;
9858         case BTRFS_TREE_BLOCK_REF_KEY:
9859                 ret = check_tree_block_backref(fs_info, key.offset,
9860                                                key.objectid, -1);
9861                 err |= ret;
9862                 break;
9863         case BTRFS_EXTENT_DATA_REF_KEY:
9864                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9865                 ret = check_extent_data_backref(fs_info,
9866                                 btrfs_extent_data_ref_root(eb, dref),
9867                                 btrfs_extent_data_ref_objectid(eb, dref),
9868                                 btrfs_extent_data_ref_offset(eb, dref),
9869                                 key.objectid, 0,
9870                                 btrfs_extent_data_ref_count(eb, dref));
9871                 err |= ret;
9872                 break;
9873         case BTRFS_SHARED_BLOCK_REF_KEY:
9874                 ret = check_shared_block_backref(fs_info, key.offset,
9875                                                  key.objectid, -1);
9876                 err |= ret;
9877                 break;
9878         case BTRFS_SHARED_DATA_REF_KEY:
9879                 ret = check_shared_data_backref(fs_info, key.offset,
9880                                                 key.objectid);
9881                 err |= ret;
9882                 break;
9883         default:
9884                 break;
9885         }
9886
9887         if (++slot < btrfs_header_nritems(eb))
9888                 goto next;
9889
9890         return err;
9891 }
9892
9893 /*
9894  * Helper function for later fs/subvol tree check.  To determine if a tree
9895  * block should be checked.
9896  * This function will ensure only the direct referencer with lowest rootid to
9897  * check a fs/subvolume tree block.
9898  *
9899  * Backref check at extent tree would detect errors like missing subvolume
9900  * tree, so we can do aggressive check to reduce duplicated checks.
9901  */
9902 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9903 {
9904         struct btrfs_root *extent_root = root->fs_info->extent_root;
9905         struct btrfs_key key;
9906         struct btrfs_path path;
9907         struct extent_buffer *leaf;
9908         int slot;
9909         struct btrfs_extent_item *ei;
9910         unsigned long ptr;
9911         unsigned long end;
9912         int type;
9913         u32 item_size;
9914         u64 offset;
9915         struct btrfs_extent_inline_ref *iref;
9916         int ret;
9917
9918         btrfs_init_path(&path);
9919         key.objectid = btrfs_header_bytenr(eb);
9920         key.type = BTRFS_METADATA_ITEM_KEY;
9921         key.offset = (u64)-1;
9922
9923         /*
9924          * Any failure in backref resolving means we can't determine
9925          * whom the tree block belongs to.
9926          * So in that case, we need to check that tree block
9927          */
9928         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9929         if (ret < 0)
9930                 goto need_check;
9931
9932         ret = btrfs_previous_extent_item(extent_root, &path,
9933                                          btrfs_header_bytenr(eb));
9934         if (ret)
9935                 goto need_check;
9936
9937         leaf = path.nodes[0];
9938         slot = path.slots[0];
9939         btrfs_item_key_to_cpu(leaf, &key, slot);
9940         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9941
9942         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9943                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9944         } else {
9945                 struct btrfs_tree_block_info *info;
9946
9947                 info = (struct btrfs_tree_block_info *)(ei + 1);
9948                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9949         }
9950
9951         item_size = btrfs_item_size_nr(leaf, slot);
9952         ptr = (unsigned long)iref;
9953         end = (unsigned long)ei + item_size;
9954         while (ptr < end) {
9955                 iref = (struct btrfs_extent_inline_ref *)ptr;
9956                 type = btrfs_extent_inline_ref_type(leaf, iref);
9957                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9958
9959                 /*
9960                  * We only check the tree block if current root is
9961                  * the lowest referencer of it.
9962                  */
9963                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9964                     offset < root->objectid) {
9965                         btrfs_release_path(&path);
9966                         return 0;
9967                 }
9968
9969                 ptr += btrfs_extent_inline_ref_size(type);
9970         }
9971         /*
9972          * Normally we should also check keyed tree block ref, but that may be
9973          * very time consuming.  Inlined ref should already make us skip a lot
9974          * of refs now.  So skip search keyed tree block ref.
9975          */
9976
9977 need_check:
9978         btrfs_release_path(&path);
9979         return 1;
9980 }
9981
9982 /*
9983  * Traversal function for tree block. We will do:
9984  * 1) Skip shared fs/subvolume tree blocks
9985  * 2) Update related bytes accounting
9986  * 3) Pre-order traversal
9987  */
9988 static int traverse_tree_block(struct btrfs_root *root,
9989                                 struct extent_buffer *node)
9990 {
9991         struct extent_buffer *eb;
9992         int level;
9993         u64 nr;
9994         int i;
9995         int err = 0;
9996         int ret;
9997
9998         /*
9999          * Skip shared fs/subvolume tree block, in that case they will
10000          * be checked by referencer with lowest rootid
10001          */
10002         if (is_fstree(root->objectid) && !should_check(root, node))
10003                 return 0;
10004
10005         /* Update bytes accounting */
10006         total_btree_bytes += node->len;
10007         if (fs_root_objectid(btrfs_header_owner(node)))
10008                 total_fs_tree_bytes += node->len;
10009         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10010                 total_extent_tree_bytes += node->len;
10011         if (!found_old_backref &&
10012             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10013             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10014             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10015                 found_old_backref = 1;
10016
10017         /* pre-order tranversal, check itself first */
10018         level = btrfs_header_level(node);
10019         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10020                                    btrfs_header_level(node),
10021                                    btrfs_header_owner(node));
10022         err |= ret;
10023         if (err)
10024                 error(
10025         "check %s failed root %llu bytenr %llu level %d, force continue check",
10026                         level ? "node":"leaf", root->objectid,
10027                         btrfs_header_bytenr(node), btrfs_header_level(node));
10028
10029         if (!level) {
10030                 btree_space_waste += btrfs_leaf_free_space(root, node);
10031                 ret = check_leaf_items(root, node);
10032                 err |= ret;
10033                 return err;
10034         }
10035
10036         nr = btrfs_header_nritems(node);
10037         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10038                 sizeof(struct btrfs_key_ptr);
10039
10040         /* Then check all its children */
10041         for (i = 0; i < nr; i++) {
10042                 u64 blocknr = btrfs_node_blockptr(node, i);
10043
10044                 /*
10045                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10046                  * to call the function itself.
10047                  */
10048                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10049                 if (extent_buffer_uptodate(eb)) {
10050                         ret = traverse_tree_block(root, eb);
10051                         err |= ret;
10052                 }
10053                 free_extent_buffer(eb);
10054         }
10055
10056         return err;
10057 }
10058
10059 /*
10060  * Low memory usage version check_chunks_and_extents.
10061  */
10062 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10063 {
10064         struct btrfs_path path;
10065         struct btrfs_key key;
10066         struct btrfs_root *root1;
10067         struct btrfs_root *cur_root;
10068         int err = 0;
10069         int ret;
10070
10071         root1 = root->fs_info->chunk_root;
10072         ret = traverse_tree_block(root1, root1->node);
10073         err |= ret;
10074
10075         root1 = root->fs_info->tree_root;
10076         ret = traverse_tree_block(root1, root1->node);
10077         err |= ret;
10078
10079         btrfs_init_path(&path);
10080         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10081         key.offset = 0;
10082         key.type = BTRFS_ROOT_ITEM_KEY;
10083
10084         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10085         if (ret) {
10086                 error("cannot find extent treet in tree_root");
10087                 goto out;
10088         }
10089
10090         while (1) {
10091                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10092                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10093                         goto next;
10094                 key.offset = (u64)-1;
10095
10096                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10097                 if (IS_ERR(cur_root) || !cur_root) {
10098                         error("failed to read tree: %lld", key.objectid);
10099                         goto next;
10100                 }
10101
10102                 ret = traverse_tree_block(cur_root, cur_root->node);
10103                 err |= ret;
10104
10105 next:
10106                 ret = btrfs_next_item(root1, &path);
10107                 if (ret)
10108                         goto out;
10109         }
10110
10111 out:
10112         btrfs_release_path(&path);
10113         return err;
10114 }
10115
10116 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10117                            struct btrfs_root *root, int overwrite)
10118 {
10119         struct extent_buffer *c;
10120         struct extent_buffer *old = root->node;
10121         int level;
10122         int ret;
10123         struct btrfs_disk_key disk_key = {0,0,0};
10124
10125         level = 0;
10126
10127         if (overwrite) {
10128                 c = old;
10129                 extent_buffer_get(c);
10130                 goto init;
10131         }
10132         c = btrfs_alloc_free_block(trans, root,
10133                                    root->nodesize,
10134                                    root->root_key.objectid,
10135                                    &disk_key, level, 0, 0);
10136         if (IS_ERR(c)) {
10137                 c = old;
10138                 extent_buffer_get(c);
10139                 overwrite = 1;
10140         }
10141 init:
10142         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10143         btrfs_set_header_level(c, level);
10144         btrfs_set_header_bytenr(c, c->start);
10145         btrfs_set_header_generation(c, trans->transid);
10146         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10147         btrfs_set_header_owner(c, root->root_key.objectid);
10148
10149         write_extent_buffer(c, root->fs_info->fsid,
10150                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10151
10152         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10153                             btrfs_header_chunk_tree_uuid(c),
10154                             BTRFS_UUID_SIZE);
10155
10156         btrfs_mark_buffer_dirty(c);
10157         /*
10158          * this case can happen in the following case:
10159          *
10160          * 1.overwrite previous root.
10161          *
10162          * 2.reinit reloc data root, this is because we skip pin
10163          * down reloc data tree before which means we can allocate
10164          * same block bytenr here.
10165          */
10166         if (old->start == c->start) {
10167                 btrfs_set_root_generation(&root->root_item,
10168                                           trans->transid);
10169                 root->root_item.level = btrfs_header_level(root->node);
10170                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10171                                         &root->root_key, &root->root_item);
10172                 if (ret) {
10173                         free_extent_buffer(c);
10174                         return ret;
10175                 }
10176         }
10177         free_extent_buffer(old);
10178         root->node = c;
10179         add_root_to_dirty_list(root);
10180         return 0;
10181 }
10182
10183 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10184                                 struct extent_buffer *eb, int tree_root)
10185 {
10186         struct extent_buffer *tmp;
10187         struct btrfs_root_item *ri;
10188         struct btrfs_key key;
10189         u64 bytenr;
10190         u32 nodesize;
10191         int level = btrfs_header_level(eb);
10192         int nritems;
10193         int ret;
10194         int i;
10195
10196         /*
10197          * If we have pinned this block before, don't pin it again.
10198          * This can not only avoid forever loop with broken filesystem
10199          * but also give us some speedups.
10200          */
10201         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10202                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10203                 return 0;
10204
10205         btrfs_pin_extent(fs_info, eb->start, eb->len);
10206
10207         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10208         nritems = btrfs_header_nritems(eb);
10209         for (i = 0; i < nritems; i++) {
10210                 if (level == 0) {
10211                         btrfs_item_key_to_cpu(eb, &key, i);
10212                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10213                                 continue;
10214                         /* Skip the extent root and reloc roots */
10215                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10216                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10217                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10218                                 continue;
10219                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10220                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10221
10222                         /*
10223                          * If at any point we start needing the real root we
10224                          * will have to build a stump root for the root we are
10225                          * in, but for now this doesn't actually use the root so
10226                          * just pass in extent_root.
10227                          */
10228                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10229                                               nodesize, 0);
10230                         if (!extent_buffer_uptodate(tmp)) {
10231                                 fprintf(stderr, "Error reading root block\n");
10232                                 return -EIO;
10233                         }
10234                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10235                         free_extent_buffer(tmp);
10236                         if (ret)
10237                                 return ret;
10238                 } else {
10239                         bytenr = btrfs_node_blockptr(eb, i);
10240
10241                         /* If we aren't the tree root don't read the block */
10242                         if (level == 1 && !tree_root) {
10243                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10244                                 continue;
10245                         }
10246
10247                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10248                                               nodesize, 0);
10249                         if (!extent_buffer_uptodate(tmp)) {
10250                                 fprintf(stderr, "Error reading tree block\n");
10251                                 return -EIO;
10252                         }
10253                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10254                         free_extent_buffer(tmp);
10255                         if (ret)
10256                                 return ret;
10257                 }
10258         }
10259
10260         return 0;
10261 }
10262
10263 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10264 {
10265         int ret;
10266
10267         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10268         if (ret)
10269                 return ret;
10270
10271         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10272 }
10273
10274 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10275 {
10276         struct btrfs_block_group_cache *cache;
10277         struct btrfs_path *path;
10278         struct extent_buffer *leaf;
10279         struct btrfs_chunk *chunk;
10280         struct btrfs_key key;
10281         int ret;
10282         u64 start;
10283
10284         path = btrfs_alloc_path();
10285         if (!path)
10286                 return -ENOMEM;
10287
10288         key.objectid = 0;
10289         key.type = BTRFS_CHUNK_ITEM_KEY;
10290         key.offset = 0;
10291
10292         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10293         if (ret < 0) {
10294                 btrfs_free_path(path);
10295                 return ret;
10296         }
10297
10298         /*
10299          * We do this in case the block groups were screwed up and had alloc
10300          * bits that aren't actually set on the chunks.  This happens with
10301          * restored images every time and could happen in real life I guess.
10302          */
10303         fs_info->avail_data_alloc_bits = 0;
10304         fs_info->avail_metadata_alloc_bits = 0;
10305         fs_info->avail_system_alloc_bits = 0;
10306
10307         /* First we need to create the in-memory block groups */
10308         while (1) {
10309                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10310                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10311                         if (ret < 0) {
10312                                 btrfs_free_path(path);
10313                                 return ret;
10314                         }
10315                         if (ret) {
10316                                 ret = 0;
10317                                 break;
10318                         }
10319                 }
10320                 leaf = path->nodes[0];
10321                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10322                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10323                         path->slots[0]++;
10324                         continue;
10325                 }
10326
10327                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10328                                        struct btrfs_chunk);
10329                 btrfs_add_block_group(fs_info, 0,
10330                                       btrfs_chunk_type(leaf, chunk),
10331                                       key.objectid, key.offset,
10332                                       btrfs_chunk_length(leaf, chunk));
10333                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10334                                  key.offset + btrfs_chunk_length(leaf, chunk),
10335                                  GFP_NOFS);
10336                 path->slots[0]++;
10337         }
10338         start = 0;
10339         while (1) {
10340                 cache = btrfs_lookup_first_block_group(fs_info, start);
10341                 if (!cache)
10342                         break;
10343                 cache->cached = 1;
10344                 start = cache->key.objectid + cache->key.offset;
10345         }
10346
10347         btrfs_free_path(path);
10348         return 0;
10349 }
10350
10351 static int reset_balance(struct btrfs_trans_handle *trans,
10352                          struct btrfs_fs_info *fs_info)
10353 {
10354         struct btrfs_root *root = fs_info->tree_root;
10355         struct btrfs_path *path;
10356         struct extent_buffer *leaf;
10357         struct btrfs_key key;
10358         int del_slot, del_nr = 0;
10359         int ret;
10360         int found = 0;
10361
10362         path = btrfs_alloc_path();
10363         if (!path)
10364                 return -ENOMEM;
10365
10366         key.objectid = BTRFS_BALANCE_OBJECTID;
10367         key.type = BTRFS_BALANCE_ITEM_KEY;
10368         key.offset = 0;
10369
10370         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10371         if (ret) {
10372                 if (ret > 0)
10373                         ret = 0;
10374                 if (!ret)
10375                         goto reinit_data_reloc;
10376                 else
10377                         goto out;
10378         }
10379
10380         ret = btrfs_del_item(trans, root, path);
10381         if (ret)
10382                 goto out;
10383         btrfs_release_path(path);
10384
10385         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10386         key.type = BTRFS_ROOT_ITEM_KEY;
10387         key.offset = 0;
10388
10389         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10390         if (ret < 0)
10391                 goto out;
10392         while (1) {
10393                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10394                         if (!found)
10395                                 break;
10396
10397                         if (del_nr) {
10398                                 ret = btrfs_del_items(trans, root, path,
10399                                                       del_slot, del_nr);
10400                                 del_nr = 0;
10401                                 if (ret)
10402                                         goto out;
10403                         }
10404                         key.offset++;
10405                         btrfs_release_path(path);
10406
10407                         found = 0;
10408                         ret = btrfs_search_slot(trans, root, &key, path,
10409                                                 -1, 1);
10410                         if (ret < 0)
10411                                 goto out;
10412                         continue;
10413                 }
10414                 found = 1;
10415                 leaf = path->nodes[0];
10416                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10417                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10418                         break;
10419                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10420                         path->slots[0]++;
10421                         continue;
10422                 }
10423                 if (!del_nr) {
10424                         del_slot = path->slots[0];
10425                         del_nr = 1;
10426                 } else {
10427                         del_nr++;
10428                 }
10429                 path->slots[0]++;
10430         }
10431
10432         if (del_nr) {
10433                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10434                 if (ret)
10435                         goto out;
10436         }
10437         btrfs_release_path(path);
10438
10439 reinit_data_reloc:
10440         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10441         key.type = BTRFS_ROOT_ITEM_KEY;
10442         key.offset = (u64)-1;
10443         root = btrfs_read_fs_root(fs_info, &key);
10444         if (IS_ERR(root)) {
10445                 fprintf(stderr, "Error reading data reloc tree\n");
10446                 ret = PTR_ERR(root);
10447                 goto out;
10448         }
10449         record_root_in_trans(trans, root);
10450         ret = btrfs_fsck_reinit_root(trans, root, 0);
10451         if (ret)
10452                 goto out;
10453         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10454 out:
10455         btrfs_free_path(path);
10456         return ret;
10457 }
10458
10459 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10460                               struct btrfs_fs_info *fs_info)
10461 {
10462         u64 start = 0;
10463         int ret;
10464
10465         /*
10466          * The only reason we don't do this is because right now we're just
10467          * walking the trees we find and pinning down their bytes, we don't look
10468          * at any of the leaves.  In order to do mixed groups we'd have to check
10469          * the leaves of any fs roots and pin down the bytes for any file
10470          * extents we find.  Not hard but why do it if we don't have to?
10471          */
10472         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10473                 fprintf(stderr, "We don't support re-initing the extent tree "
10474                         "for mixed block groups yet, please notify a btrfs "
10475                         "developer you want to do this so they can add this "
10476                         "functionality.\n");
10477                 return -EINVAL;
10478         }
10479
10480         /*
10481          * first we need to walk all of the trees except the extent tree and pin
10482          * down the bytes that are in use so we don't overwrite any existing
10483          * metadata.
10484          */
10485         ret = pin_metadata_blocks(fs_info);
10486         if (ret) {
10487                 fprintf(stderr, "error pinning down used bytes\n");
10488                 return ret;
10489         }
10490
10491         /*
10492          * Need to drop all the block groups since we're going to recreate all
10493          * of them again.
10494          */
10495         btrfs_free_block_groups(fs_info);
10496         ret = reset_block_groups(fs_info);
10497         if (ret) {
10498                 fprintf(stderr, "error resetting the block groups\n");
10499                 return ret;
10500         }
10501
10502         /* Ok we can allocate now, reinit the extent root */
10503         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10504         if (ret) {
10505                 fprintf(stderr, "extent root initialization failed\n");
10506                 /*
10507                  * When the transaction code is updated we should end the
10508                  * transaction, but for now progs only knows about commit so
10509                  * just return an error.
10510                  */
10511                 return ret;
10512         }
10513
10514         /*
10515          * Now we have all the in-memory block groups setup so we can make
10516          * allocations properly, and the metadata we care about is safe since we
10517          * pinned all of it above.
10518          */
10519         while (1) {
10520                 struct btrfs_block_group_cache *cache;
10521
10522                 cache = btrfs_lookup_first_block_group(fs_info, start);
10523                 if (!cache)
10524                         break;
10525                 start = cache->key.objectid + cache->key.offset;
10526                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10527                                         &cache->key, &cache->item,
10528                                         sizeof(cache->item));
10529                 if (ret) {
10530                         fprintf(stderr, "Error adding block group\n");
10531                         return ret;
10532                 }
10533                 btrfs_extent_post_op(trans, fs_info->extent_root);
10534         }
10535
10536         ret = reset_balance(trans, fs_info);
10537         if (ret)
10538                 fprintf(stderr, "error resetting the pending balance\n");
10539
10540         return ret;
10541 }
10542
10543 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10544 {
10545         struct btrfs_path *path;
10546         struct btrfs_trans_handle *trans;
10547         struct btrfs_key key;
10548         int ret;
10549
10550         printf("Recowing metadata block %llu\n", eb->start);
10551         key.objectid = btrfs_header_owner(eb);
10552         key.type = BTRFS_ROOT_ITEM_KEY;
10553         key.offset = (u64)-1;
10554
10555         root = btrfs_read_fs_root(root->fs_info, &key);
10556         if (IS_ERR(root)) {
10557                 fprintf(stderr, "Couldn't find owner root %llu\n",
10558                         key.objectid);
10559                 return PTR_ERR(root);
10560         }
10561
10562         path = btrfs_alloc_path();
10563         if (!path)
10564                 return -ENOMEM;
10565
10566         trans = btrfs_start_transaction(root, 1);
10567         if (IS_ERR(trans)) {
10568                 btrfs_free_path(path);
10569                 return PTR_ERR(trans);
10570         }
10571
10572         path->lowest_level = btrfs_header_level(eb);
10573         if (path->lowest_level)
10574                 btrfs_node_key_to_cpu(eb, &key, 0);
10575         else
10576                 btrfs_item_key_to_cpu(eb, &key, 0);
10577
10578         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10579         btrfs_commit_transaction(trans, root);
10580         btrfs_free_path(path);
10581         return ret;
10582 }
10583
10584 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10585 {
10586         struct btrfs_path *path;
10587         struct btrfs_trans_handle *trans;
10588         struct btrfs_key key;
10589         int ret;
10590
10591         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10592                bad->key.type, bad->key.offset);
10593         key.objectid = bad->root_id;
10594         key.type = BTRFS_ROOT_ITEM_KEY;
10595         key.offset = (u64)-1;
10596
10597         root = btrfs_read_fs_root(root->fs_info, &key);
10598         if (IS_ERR(root)) {
10599                 fprintf(stderr, "Couldn't find owner root %llu\n",
10600                         key.objectid);
10601                 return PTR_ERR(root);
10602         }
10603
10604         path = btrfs_alloc_path();
10605         if (!path)
10606                 return -ENOMEM;
10607
10608         trans = btrfs_start_transaction(root, 1);
10609         if (IS_ERR(trans)) {
10610                 btrfs_free_path(path);
10611                 return PTR_ERR(trans);
10612         }
10613
10614         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10615         if (ret) {
10616                 if (ret > 0)
10617                         ret = 0;
10618                 goto out;
10619         }
10620         ret = btrfs_del_item(trans, root, path);
10621 out:
10622         btrfs_commit_transaction(trans, root);
10623         btrfs_free_path(path);
10624         return ret;
10625 }
10626
10627 static int zero_log_tree(struct btrfs_root *root)
10628 {
10629         struct btrfs_trans_handle *trans;
10630         int ret;
10631
10632         trans = btrfs_start_transaction(root, 1);
10633         if (IS_ERR(trans)) {
10634                 ret = PTR_ERR(trans);
10635                 return ret;
10636         }
10637         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10638         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10639         ret = btrfs_commit_transaction(trans, root);
10640         return ret;
10641 }
10642
10643 static int populate_csum(struct btrfs_trans_handle *trans,
10644                          struct btrfs_root *csum_root, char *buf, u64 start,
10645                          u64 len)
10646 {
10647         u64 offset = 0;
10648         u64 sectorsize;
10649         int ret = 0;
10650
10651         while (offset < len) {
10652                 sectorsize = csum_root->sectorsize;
10653                 ret = read_extent_data(csum_root, buf, start + offset,
10654                                        &sectorsize, 0);
10655                 if (ret)
10656                         break;
10657                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10658                                             start + offset, buf, sectorsize);
10659                 if (ret)
10660                         break;
10661                 offset += sectorsize;
10662         }
10663         return ret;
10664 }
10665
10666 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10667                                       struct btrfs_root *csum_root,
10668                                       struct btrfs_root *cur_root)
10669 {
10670         struct btrfs_path *path;
10671         struct btrfs_key key;
10672         struct extent_buffer *node;
10673         struct btrfs_file_extent_item *fi;
10674         char *buf = NULL;
10675         u64 start = 0;
10676         u64 len = 0;
10677         int slot = 0;
10678         int ret = 0;
10679
10680         path = btrfs_alloc_path();
10681         if (!path)
10682                 return -ENOMEM;
10683         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10684         if (!buf) {
10685                 ret = -ENOMEM;
10686                 goto out;
10687         }
10688
10689         key.objectid = 0;
10690         key.offset = 0;
10691         key.type = 0;
10692
10693         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10694         if (ret < 0)
10695                 goto out;
10696         /* Iterate all regular file extents and fill its csum */
10697         while (1) {
10698                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10699
10700                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10701                         goto next;
10702                 node = path->nodes[0];
10703                 slot = path->slots[0];
10704                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10705                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10706                         goto next;
10707                 start = btrfs_file_extent_disk_bytenr(node, fi);
10708                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10709
10710                 ret = populate_csum(trans, csum_root, buf, start, len);
10711                 if (ret == -EEXIST)
10712                         ret = 0;
10713                 if (ret < 0)
10714                         goto out;
10715 next:
10716                 /*
10717                  * TODO: if next leaf is corrupted, jump to nearest next valid
10718                  * leaf.
10719                  */
10720                 ret = btrfs_next_item(cur_root, path);
10721                 if (ret < 0)
10722                         goto out;
10723                 if (ret > 0) {
10724                         ret = 0;
10725                         goto out;
10726                 }
10727         }
10728
10729 out:
10730         btrfs_free_path(path);
10731         free(buf);
10732         return ret;
10733 }
10734
10735 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10736                                   struct btrfs_root *csum_root)
10737 {
10738         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10739         struct btrfs_path *path;
10740         struct btrfs_root *tree_root = fs_info->tree_root;
10741         struct btrfs_root *cur_root;
10742         struct extent_buffer *node;
10743         struct btrfs_key key;
10744         int slot = 0;
10745         int ret = 0;
10746
10747         path = btrfs_alloc_path();
10748         if (!path)
10749                 return -ENOMEM;
10750
10751         key.objectid = BTRFS_FS_TREE_OBJECTID;
10752         key.offset = 0;
10753         key.type = BTRFS_ROOT_ITEM_KEY;
10754
10755         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10756         if (ret < 0)
10757                 goto out;
10758         if (ret > 0) {
10759                 ret = -ENOENT;
10760                 goto out;
10761         }
10762
10763         while (1) {
10764                 node = path->nodes[0];
10765                 slot = path->slots[0];
10766                 btrfs_item_key_to_cpu(node, &key, slot);
10767                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10768                         goto out;
10769                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10770                         goto next;
10771                 if (!is_fstree(key.objectid))
10772                         goto next;
10773                 key.offset = (u64)-1;
10774
10775                 cur_root = btrfs_read_fs_root(fs_info, &key);
10776                 if (IS_ERR(cur_root) || !cur_root) {
10777                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10778                                 key.objectid);
10779                         goto out;
10780                 }
10781                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10782                                 cur_root);
10783                 if (ret < 0)
10784                         goto out;
10785 next:
10786                 ret = btrfs_next_item(tree_root, path);
10787                 if (ret > 0) {
10788                         ret = 0;
10789                         goto out;
10790                 }
10791                 if (ret < 0)
10792                         goto out;
10793         }
10794
10795 out:
10796         btrfs_free_path(path);
10797         return ret;
10798 }
10799
10800 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10801                                       struct btrfs_root *csum_root)
10802 {
10803         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10804         struct btrfs_path *path;
10805         struct btrfs_extent_item *ei;
10806         struct extent_buffer *leaf;
10807         char *buf;
10808         struct btrfs_key key;
10809         int ret;
10810
10811         path = btrfs_alloc_path();
10812         if (!path)
10813                 return -ENOMEM;
10814
10815         key.objectid = 0;
10816         key.type = BTRFS_EXTENT_ITEM_KEY;
10817         key.offset = 0;
10818
10819         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10820         if (ret < 0) {
10821                 btrfs_free_path(path);
10822                 return ret;
10823         }
10824
10825         buf = malloc(csum_root->sectorsize);
10826         if (!buf) {
10827                 btrfs_free_path(path);
10828                 return -ENOMEM;
10829         }
10830
10831         while (1) {
10832                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10833                         ret = btrfs_next_leaf(extent_root, path);
10834                         if (ret < 0)
10835                                 break;
10836                         if (ret) {
10837                                 ret = 0;
10838                                 break;
10839                         }
10840                 }
10841                 leaf = path->nodes[0];
10842
10843                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10844                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10845                         path->slots[0]++;
10846                         continue;
10847                 }
10848
10849                 ei = btrfs_item_ptr(leaf, path->slots[0],
10850                                     struct btrfs_extent_item);
10851                 if (!(btrfs_extent_flags(leaf, ei) &
10852                       BTRFS_EXTENT_FLAG_DATA)) {
10853                         path->slots[0]++;
10854                         continue;
10855                 }
10856
10857                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10858                                     key.offset);
10859                 if (ret)
10860                         break;
10861                 path->slots[0]++;
10862         }
10863
10864         btrfs_free_path(path);
10865         free(buf);
10866         return ret;
10867 }
10868
10869 /*
10870  * Recalculate the csum and put it into the csum tree.
10871  *
10872  * Extent tree init will wipe out all the extent info, so in that case, we
10873  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10874  * will use fs/subvol trees to init the csum tree.
10875  */
10876 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10877                           struct btrfs_root *csum_root,
10878                           int search_fs_tree)
10879 {
10880         if (search_fs_tree)
10881                 return fill_csum_tree_from_fs(trans, csum_root);
10882         else
10883                 return fill_csum_tree_from_extent(trans, csum_root);
10884 }
10885
10886 static void free_roots_info_cache(void)
10887 {
10888         if (!roots_info_cache)
10889                 return;
10890
10891         while (!cache_tree_empty(roots_info_cache)) {
10892                 struct cache_extent *entry;
10893                 struct root_item_info *rii;
10894
10895                 entry = first_cache_extent(roots_info_cache);
10896                 if (!entry)
10897                         break;
10898                 remove_cache_extent(roots_info_cache, entry);
10899                 rii = container_of(entry, struct root_item_info, cache_extent);
10900                 free(rii);
10901         }
10902
10903         free(roots_info_cache);
10904         roots_info_cache = NULL;
10905 }
10906
10907 static int build_roots_info_cache(struct btrfs_fs_info *info)
10908 {
10909         int ret = 0;
10910         struct btrfs_key key;
10911         struct extent_buffer *leaf;
10912         struct btrfs_path *path;
10913
10914         if (!roots_info_cache) {
10915                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10916                 if (!roots_info_cache)
10917                         return -ENOMEM;
10918                 cache_tree_init(roots_info_cache);
10919         }
10920
10921         path = btrfs_alloc_path();
10922         if (!path)
10923                 return -ENOMEM;
10924
10925         key.objectid = 0;
10926         key.type = BTRFS_EXTENT_ITEM_KEY;
10927         key.offset = 0;
10928
10929         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10930         if (ret < 0)
10931                 goto out;
10932         leaf = path->nodes[0];
10933
10934         while (1) {
10935                 struct btrfs_key found_key;
10936                 struct btrfs_extent_item *ei;
10937                 struct btrfs_extent_inline_ref *iref;
10938                 int slot = path->slots[0];
10939                 int type;
10940                 u64 flags;
10941                 u64 root_id;
10942                 u8 level;
10943                 struct cache_extent *entry;
10944                 struct root_item_info *rii;
10945
10946                 if (slot >= btrfs_header_nritems(leaf)) {
10947                         ret = btrfs_next_leaf(info->extent_root, path);
10948                         if (ret < 0) {
10949                                 break;
10950                         } else if (ret) {
10951                                 ret = 0;
10952                                 break;
10953                         }
10954                         leaf = path->nodes[0];
10955                         slot = path->slots[0];
10956                 }
10957
10958                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10959
10960                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10961                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10962                         goto next;
10963
10964                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10965                 flags = btrfs_extent_flags(leaf, ei);
10966
10967                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10968                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10969                         goto next;
10970
10971                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10972                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10973                         level = found_key.offset;
10974                 } else {
10975                         struct btrfs_tree_block_info *binfo;
10976
10977                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10978                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10979                         level = btrfs_tree_block_level(leaf, binfo);
10980                 }
10981
10982                 /*
10983                  * For a root extent, it must be of the following type and the
10984                  * first (and only one) iref in the item.
10985                  */
10986                 type = btrfs_extent_inline_ref_type(leaf, iref);
10987                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10988                         goto next;
10989
10990                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10991                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10992                 if (!entry) {
10993                         rii = malloc(sizeof(struct root_item_info));
10994                         if (!rii) {
10995                                 ret = -ENOMEM;
10996                                 goto out;
10997                         }
10998                         rii->cache_extent.start = root_id;
10999                         rii->cache_extent.size = 1;
11000                         rii->level = (u8)-1;
11001                         entry = &rii->cache_extent;
11002                         ret = insert_cache_extent(roots_info_cache, entry);
11003                         ASSERT(ret == 0);
11004                 } else {
11005                         rii = container_of(entry, struct root_item_info,
11006                                            cache_extent);
11007                 }
11008
11009                 ASSERT(rii->cache_extent.start == root_id);
11010                 ASSERT(rii->cache_extent.size == 1);
11011
11012                 if (level > rii->level || rii->level == (u8)-1) {
11013                         rii->level = level;
11014                         rii->bytenr = found_key.objectid;
11015                         rii->gen = btrfs_extent_generation(leaf, ei);
11016                         rii->node_count = 1;
11017                 } else if (level == rii->level) {
11018                         rii->node_count++;
11019                 }
11020 next:
11021                 path->slots[0]++;
11022         }
11023
11024 out:
11025         btrfs_free_path(path);
11026
11027         return ret;
11028 }
11029
11030 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11031                                   struct btrfs_path *path,
11032                                   const struct btrfs_key *root_key,
11033                                   const int read_only_mode)
11034 {
11035         const u64 root_id = root_key->objectid;
11036         struct cache_extent *entry;
11037         struct root_item_info *rii;
11038         struct btrfs_root_item ri;
11039         unsigned long offset;
11040
11041         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11042         if (!entry) {
11043                 fprintf(stderr,
11044                         "Error: could not find extent items for root %llu\n",
11045                         root_key->objectid);
11046                 return -ENOENT;
11047         }
11048
11049         rii = container_of(entry, struct root_item_info, cache_extent);
11050         ASSERT(rii->cache_extent.start == root_id);
11051         ASSERT(rii->cache_extent.size == 1);
11052
11053         if (rii->node_count != 1) {
11054                 fprintf(stderr,
11055                         "Error: could not find btree root extent for root %llu\n",
11056                         root_id);
11057                 return -ENOENT;
11058         }
11059
11060         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11061         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11062
11063         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11064             btrfs_root_level(&ri) != rii->level ||
11065             btrfs_root_generation(&ri) != rii->gen) {
11066
11067                 /*
11068                  * If we're in repair mode but our caller told us to not update
11069                  * the root item, i.e. just check if it needs to be updated, don't
11070                  * print this message, since the caller will call us again shortly
11071                  * for the same root item without read only mode (the caller will
11072                  * open a transaction first).
11073                  */
11074                 if (!(read_only_mode && repair))
11075                         fprintf(stderr,
11076                                 "%sroot item for root %llu,"
11077                                 " current bytenr %llu, current gen %llu, current level %u,"
11078                                 " new bytenr %llu, new gen %llu, new level %u\n",
11079                                 (read_only_mode ? "" : "fixing "),
11080                                 root_id,
11081                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11082                                 btrfs_root_level(&ri),
11083                                 rii->bytenr, rii->gen, rii->level);
11084
11085                 if (btrfs_root_generation(&ri) > rii->gen) {
11086                         fprintf(stderr,
11087                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11088                                 root_id, btrfs_root_generation(&ri), rii->gen);
11089                         return -EINVAL;
11090                 }
11091
11092                 if (!read_only_mode) {
11093                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11094                         btrfs_set_root_level(&ri, rii->level);
11095                         btrfs_set_root_generation(&ri, rii->gen);
11096                         write_extent_buffer(path->nodes[0], &ri,
11097                                             offset, sizeof(ri));
11098                 }
11099
11100                 return 1;
11101         }
11102
11103         return 0;
11104 }
11105
11106 /*
11107  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11108  * caused read-only snapshots to be corrupted if they were created at a moment
11109  * when the source subvolume/snapshot had orphan items. The issue was that the
11110  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11111  * node instead of the post orphan cleanup root node.
11112  * So this function, and its callees, just detects and fixes those cases. Even
11113  * though the regression was for read-only snapshots, this function applies to
11114  * any snapshot/subvolume root.
11115  * This must be run before any other repair code - not doing it so, makes other
11116  * repair code delete or modify backrefs in the extent tree for example, which
11117  * will result in an inconsistent fs after repairing the root items.
11118  */
11119 static int repair_root_items(struct btrfs_fs_info *info)
11120 {
11121         struct btrfs_path *path = NULL;
11122         struct btrfs_key key;
11123         struct extent_buffer *leaf;
11124         struct btrfs_trans_handle *trans = NULL;
11125         int ret = 0;
11126         int bad_roots = 0;
11127         int need_trans = 0;
11128
11129         ret = build_roots_info_cache(info);
11130         if (ret)
11131                 goto out;
11132
11133         path = btrfs_alloc_path();
11134         if (!path) {
11135                 ret = -ENOMEM;
11136                 goto out;
11137         }
11138
11139         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11140         key.type = BTRFS_ROOT_ITEM_KEY;
11141         key.offset = 0;
11142
11143 again:
11144         /*
11145          * Avoid opening and committing transactions if a leaf doesn't have
11146          * any root items that need to be fixed, so that we avoid rotating
11147          * backup roots unnecessarily.
11148          */
11149         if (need_trans) {
11150                 trans = btrfs_start_transaction(info->tree_root, 1);
11151                 if (IS_ERR(trans)) {
11152                         ret = PTR_ERR(trans);
11153                         goto out;
11154                 }
11155         }
11156
11157         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11158                                 0, trans ? 1 : 0);
11159         if (ret < 0)
11160                 goto out;
11161         leaf = path->nodes[0];
11162
11163         while (1) {
11164                 struct btrfs_key found_key;
11165
11166                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11167                         int no_more_keys = find_next_key(path, &key);
11168
11169                         btrfs_release_path(path);
11170                         if (trans) {
11171                                 ret = btrfs_commit_transaction(trans,
11172                                                                info->tree_root);
11173                                 trans = NULL;
11174                                 if (ret < 0)
11175                                         goto out;
11176                         }
11177                         need_trans = 0;
11178                         if (no_more_keys)
11179                                 break;
11180                         goto again;
11181                 }
11182
11183                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11184
11185                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11186                         goto next;
11187                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11188                         goto next;
11189
11190                 ret = maybe_repair_root_item(info, path, &found_key,
11191                                              trans ? 0 : 1);
11192                 if (ret < 0)
11193                         goto out;
11194                 if (ret) {
11195                         if (!trans && repair) {
11196                                 need_trans = 1;
11197                                 key = found_key;
11198                                 btrfs_release_path(path);
11199                                 goto again;
11200                         }
11201                         bad_roots++;
11202                 }
11203 next:
11204                 path->slots[0]++;
11205         }
11206         ret = 0;
11207 out:
11208         free_roots_info_cache();
11209         btrfs_free_path(path);
11210         if (trans)
11211                 btrfs_commit_transaction(trans, info->tree_root);
11212         if (ret < 0)
11213                 return ret;
11214
11215         return bad_roots;
11216 }
11217
11218 const char * const cmd_check_usage[] = {
11219         "btrfs check [options] <device>",
11220         "Check structural integrity of a filesystem (unmounted).",
11221         "Check structural integrity of an unmounted filesystem. Verify internal",
11222         "trees' consistency and item connectivity. In the repair mode try to",
11223         "fix the problems found. ",
11224         "WARNING: the repair mode is considered dangerous",
11225         "",
11226         "-s|--super <superblock>     use this superblock copy",
11227         "-b|--backup                 use the first valid backup root copy",
11228         "--repair                    try to repair the filesystem",
11229         "--readonly                  run in read-only mode (default)",
11230         "--init-csum-tree            create a new CRC tree",
11231         "--init-extent-tree          create a new extent tree",
11232         "--mode <MODE>               select mode, allows to make some memory/IO",
11233         "                            trade-offs, where MODE is one of:",
11234         "                            original - read inodes and extents to memory (requires",
11235         "                                       more memory, does less IO)",
11236         "                            lowmem   - try to use less memory but read blocks again",
11237         "                                       when needed",
11238         "--check-data-csum           verify checksums of data blocks",
11239         "-Q|--qgroup-report           print a report on qgroup consistency",
11240         "-E|--subvol-extents <subvolid>",
11241         "                            print subvolume extents and sharing state",
11242         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11243         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11244         "-p|--progress               indicate progress",
11245         NULL
11246 };
11247
11248 int cmd_check(int argc, char **argv)
11249 {
11250         struct cache_tree root_cache;
11251         struct btrfs_root *root;
11252         struct btrfs_fs_info *info;
11253         u64 bytenr = 0;
11254         u64 subvolid = 0;
11255         u64 tree_root_bytenr = 0;
11256         u64 chunk_root_bytenr = 0;
11257         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11258         int ret;
11259         u64 num;
11260         int init_csum_tree = 0;
11261         int readonly = 0;
11262         int qgroup_report = 0;
11263         int qgroups_repaired = 0;
11264         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11265
11266         while(1) {
11267                 int c;
11268                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11269                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11270                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11271                         GETOPT_VAL_MODE };
11272                 static const struct option long_options[] = {
11273                         { "super", required_argument, NULL, 's' },
11274                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11275                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11276                         { "init-csum-tree", no_argument, NULL,
11277                                 GETOPT_VAL_INIT_CSUM },
11278                         { "init-extent-tree", no_argument, NULL,
11279                                 GETOPT_VAL_INIT_EXTENT },
11280                         { "check-data-csum", no_argument, NULL,
11281                                 GETOPT_VAL_CHECK_CSUM },
11282                         { "backup", no_argument, NULL, 'b' },
11283                         { "subvol-extents", required_argument, NULL, 'E' },
11284                         { "qgroup-report", no_argument, NULL, 'Q' },
11285                         { "tree-root", required_argument, NULL, 'r' },
11286                         { "chunk-root", required_argument, NULL,
11287                                 GETOPT_VAL_CHUNK_TREE },
11288                         { "progress", no_argument, NULL, 'p' },
11289                         { "mode", required_argument, NULL,
11290                                 GETOPT_VAL_MODE },
11291                         { NULL, 0, NULL, 0}
11292                 };
11293
11294                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11295                 if (c < 0)
11296                         break;
11297                 switch(c) {
11298                         case 'a': /* ignored */ break;
11299                         case 'b':
11300                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11301                                 break;
11302                         case 's':
11303                                 num = arg_strtou64(optarg);
11304                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11305                                         fprintf(stderr,
11306                                                 "ERROR: super mirror should be less than: %d\n",
11307                                                 BTRFS_SUPER_MIRROR_MAX);
11308                                         exit(1);
11309                                 }
11310                                 bytenr = btrfs_sb_offset(((int)num));
11311                                 printf("using SB copy %llu, bytenr %llu\n", num,
11312                                        (unsigned long long)bytenr);
11313                                 break;
11314                         case 'Q':
11315                                 qgroup_report = 1;
11316                                 break;
11317                         case 'E':
11318                                 subvolid = arg_strtou64(optarg);
11319                                 break;
11320                         case 'r':
11321                                 tree_root_bytenr = arg_strtou64(optarg);
11322                                 break;
11323                         case GETOPT_VAL_CHUNK_TREE:
11324                                 chunk_root_bytenr = arg_strtou64(optarg);
11325                                 break;
11326                         case 'p':
11327                                 ctx.progress_enabled = true;
11328                                 break;
11329                         case '?':
11330                         case 'h':
11331                                 usage(cmd_check_usage);
11332                         case GETOPT_VAL_REPAIR:
11333                                 printf("enabling repair mode\n");
11334                                 repair = 1;
11335                                 ctree_flags |= OPEN_CTREE_WRITES;
11336                                 break;
11337                         case GETOPT_VAL_READONLY:
11338                                 readonly = 1;
11339                                 break;
11340                         case GETOPT_VAL_INIT_CSUM:
11341                                 printf("Creating a new CRC tree\n");
11342                                 init_csum_tree = 1;
11343                                 repair = 1;
11344                                 ctree_flags |= OPEN_CTREE_WRITES;
11345                                 break;
11346                         case GETOPT_VAL_INIT_EXTENT:
11347                                 init_extent_tree = 1;
11348                                 ctree_flags |= (OPEN_CTREE_WRITES |
11349                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11350                                 repair = 1;
11351                                 break;
11352                         case GETOPT_VAL_CHECK_CSUM:
11353                                 check_data_csum = 1;
11354                                 break;
11355                         case GETOPT_VAL_MODE:
11356                                 check_mode = parse_check_mode(optarg);
11357                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11358                                         error("unknown mode: %s", optarg);
11359                                         exit(1);
11360                                 }
11361                                 break;
11362                 }
11363         }
11364
11365         if (check_argc_exact(argc - optind, 1))
11366                 usage(cmd_check_usage);
11367
11368         if (ctx.progress_enabled) {
11369                 ctx.tp = TASK_NOTHING;
11370                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11371         }
11372
11373         /* This check is the only reason for --readonly to exist */
11374         if (readonly && repair) {
11375                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11376                 exit(1);
11377         }
11378
11379         /*
11380          * Not supported yet
11381          */
11382         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11383                 error("Low memory mode doesn't support repair yet");
11384                 exit(1);
11385         }
11386
11387         radix_tree_init();
11388         cache_tree_init(&root_cache);
11389
11390         if((ret = check_mounted(argv[optind])) < 0) {
11391                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11392                 goto err_out;
11393         } else if(ret) {
11394                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11395                 ret = -EBUSY;
11396                 goto err_out;
11397         }
11398
11399         /* only allow partial opening under repair mode */
11400         if (repair)
11401                 ctree_flags |= OPEN_CTREE_PARTIAL;
11402
11403         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11404                                   chunk_root_bytenr, ctree_flags);
11405         if (!info) {
11406                 fprintf(stderr, "Couldn't open file system\n");
11407                 ret = -EIO;
11408                 goto err_out;
11409         }
11410
11411         global_info = info;
11412         root = info->fs_root;
11413
11414         /*
11415          * repair mode will force us to commit transaction which
11416          * will make us fail to load log tree when mounting.
11417          */
11418         if (repair && btrfs_super_log_root(info->super_copy)) {
11419                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11420                 if (!ret) {
11421                         ret = 1;
11422                         goto close_out;
11423                 }
11424                 ret = zero_log_tree(root);
11425                 if (ret) {
11426                         fprintf(stderr, "fail to zero log tree\n");
11427                         goto close_out;
11428                 }
11429         }
11430
11431         uuid_unparse(info->super_copy->fsid, uuidbuf);
11432         if (qgroup_report) {
11433                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11434                        uuidbuf);
11435                 ret = qgroup_verify_all(info);
11436                 if (ret == 0)
11437                         report_qgroups(1);
11438                 goto close_out;
11439         }
11440         if (subvolid) {
11441                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11442                        subvolid, argv[optind], uuidbuf);
11443                 ret = print_extent_state(info, subvolid);
11444                 goto close_out;
11445         }
11446         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11447
11448         if (!extent_buffer_uptodate(info->tree_root->node) ||
11449             !extent_buffer_uptodate(info->dev_root->node) ||
11450             !extent_buffer_uptodate(info->chunk_root->node)) {
11451                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11452                 ret = -EIO;
11453                 goto close_out;
11454         }
11455
11456         if (init_extent_tree || init_csum_tree) {
11457                 struct btrfs_trans_handle *trans;
11458
11459                 trans = btrfs_start_transaction(info->extent_root, 0);
11460                 if (IS_ERR(trans)) {
11461                         fprintf(stderr, "Error starting transaction\n");
11462                         ret = PTR_ERR(trans);
11463                         goto close_out;
11464                 }
11465
11466                 if (init_extent_tree) {
11467                         printf("Creating a new extent tree\n");
11468                         ret = reinit_extent_tree(trans, info);
11469                         if (ret)
11470                                 goto close_out;
11471                 }
11472
11473                 if (init_csum_tree) {
11474                         fprintf(stderr, "Reinit crc root\n");
11475                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11476                         if (ret) {
11477                                 fprintf(stderr, "crc root initialization failed\n");
11478                                 ret = -EIO;
11479                                 goto close_out;
11480                         }
11481
11482                         ret = fill_csum_tree(trans, info->csum_root,
11483                                              init_extent_tree);
11484                         if (ret) {
11485                                 fprintf(stderr, "crc refilling failed\n");
11486                                 return -EIO;
11487                         }
11488                 }
11489                 /*
11490                  * Ok now we commit and run the normal fsck, which will add
11491                  * extent entries for all of the items it finds.
11492                  */
11493                 ret = btrfs_commit_transaction(trans, info->extent_root);
11494                 if (ret)
11495                         goto close_out;
11496         }
11497         if (!extent_buffer_uptodate(info->extent_root->node)) {
11498                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11499                 ret = -EIO;
11500                 goto close_out;
11501         }
11502         if (!extent_buffer_uptodate(info->csum_root->node)) {
11503                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11504                 ret = -EIO;
11505                 goto close_out;
11506         }
11507
11508         if (!ctx.progress_enabled)
11509                 fprintf(stderr, "checking extents\n");
11510         if (check_mode == CHECK_MODE_LOWMEM)
11511                 ret = check_chunks_and_extents_v2(root);
11512         else
11513                 ret = check_chunks_and_extents(root);
11514         if (ret)
11515                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11516
11517         ret = repair_root_items(info);
11518         if (ret < 0)
11519                 goto close_out;
11520         if (repair) {
11521                 fprintf(stderr, "Fixed %d roots.\n", ret);
11522                 ret = 0;
11523         } else if (ret > 0) {
11524                 fprintf(stderr,
11525                        "Found %d roots with an outdated root item.\n",
11526                        ret);
11527                 fprintf(stderr,
11528                         "Please run a filesystem check with the option --repair to fix them.\n");
11529                 ret = 1;
11530                 goto close_out;
11531         }
11532
11533         if (!ctx.progress_enabled) {
11534                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11535                         fprintf(stderr, "checking free space tree\n");
11536                 else
11537                         fprintf(stderr, "checking free space cache\n");
11538         }
11539         ret = check_space_cache(root);
11540         if (ret)
11541                 goto out;
11542
11543         /*
11544          * We used to have to have these hole extents in between our real
11545          * extents so if we don't have this flag set we need to make sure there
11546          * are no gaps in the file extents for inodes, otherwise we can just
11547          * ignore it when this happens.
11548          */
11549         no_holes = btrfs_fs_incompat(root->fs_info,
11550                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11551         if (!ctx.progress_enabled)
11552                 fprintf(stderr, "checking fs roots\n");
11553         ret = check_fs_roots(root, &root_cache);
11554         if (ret)
11555                 goto out;
11556
11557         fprintf(stderr, "checking csums\n");
11558         ret = check_csums(root);
11559         if (ret)
11560                 goto out;
11561
11562         fprintf(stderr, "checking root refs\n");
11563         ret = check_root_refs(root, &root_cache);
11564         if (ret)
11565                 goto out;
11566
11567         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11568                 struct extent_buffer *eb;
11569
11570                 eb = list_first_entry(&root->fs_info->recow_ebs,
11571                                       struct extent_buffer, recow);
11572                 list_del_init(&eb->recow);
11573                 ret = recow_extent_buffer(root, eb);
11574                 if (ret)
11575                         break;
11576         }
11577
11578         while (!list_empty(&delete_items)) {
11579                 struct bad_item *bad;
11580
11581                 bad = list_first_entry(&delete_items, struct bad_item, list);
11582                 list_del_init(&bad->list);
11583                 if (repair)
11584                         ret = delete_bad_item(root, bad);
11585                 free(bad);
11586         }
11587
11588         if (info->quota_enabled) {
11589                 int err;
11590                 fprintf(stderr, "checking quota groups\n");
11591                 err = qgroup_verify_all(info);
11592                 if (err)
11593                         goto out;
11594                 report_qgroups(0);
11595                 err = repair_qgroups(info, &qgroups_repaired);
11596                 if (err)
11597                         goto out;
11598         }
11599
11600         if (!list_empty(&root->fs_info->recow_ebs)) {
11601                 fprintf(stderr, "Transid errors in file system\n");
11602                 ret = 1;
11603         }
11604 out:
11605         /* Don't override original ret */
11606         if (!ret && qgroups_repaired)
11607                 ret = qgroups_repaired;
11608
11609         if (found_old_backref) { /*
11610                  * there was a disk format change when mixed
11611                  * backref was in testing tree. The old format
11612                  * existed about one week.
11613                  */
11614                 printf("\n * Found old mixed backref format. "
11615                        "The old format is not supported! *"
11616                        "\n * Please mount the FS in readonly mode, "
11617                        "backup data and re-format the FS. *\n\n");
11618                 ret = 1;
11619         }
11620         printf("found %llu bytes used err is %d\n",
11621                (unsigned long long)bytes_used, ret);
11622         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11623         printf("total tree bytes: %llu\n",
11624                (unsigned long long)total_btree_bytes);
11625         printf("total fs tree bytes: %llu\n",
11626                (unsigned long long)total_fs_tree_bytes);
11627         printf("total extent tree bytes: %llu\n",
11628                (unsigned long long)total_extent_tree_bytes);
11629         printf("btree space waste bytes: %llu\n",
11630                (unsigned long long)btree_space_waste);
11631         printf("file data blocks allocated: %llu\n referenced %llu\n",
11632                 (unsigned long long)data_bytes_allocated,
11633                 (unsigned long long)data_bytes_referenced);
11634
11635         free_qgroup_counts();
11636         free_root_recs_tree(&root_cache);
11637 close_out:
11638         close_ctree(root);
11639 err_out:
11640         if (ctx.progress_enabled)
11641                 task_deinit(ctx.info);
11642
11643         return ret;
11644 }